2 * parse-diff.c: functions for parsing diff files
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
29 #include "svn_types.h"
30 #include "svn_error.h"
32 #include "svn_pools.h"
33 #include "svn_props.h"
34 #include "svn_string.h"
36 #include "svn_dirent_uri.h"
38 #include "svn_ctype.h"
39 #include "svn_mergeinfo.h"
41 #include "private/svn_eol_private.h"
42 #include "private/svn_dep_compat.h"
43 #include "private/svn_diff_private.h"
44 #include "private/svn_sorts_private.h"
48 #include "svn_private_config.h"
50 /* Helper macro for readability */
51 #define starts_with(str, start) \
52 (strncmp((str), (start), strlen(start)) == 0)
54 /* Like strlen() but for string literals. */
55 #define STRLEN_LITERAL(str) (sizeof(str) - 1)
57 /* This struct describes a range within a file, as well as the
58 * current cursor position within the range. All numbers are in bytes. */
59 struct svn_diff__hunk_range {
65 struct svn_diff_hunk_t {
66 /* The patch this hunk belongs to. */
67 const svn_patch_t *patch;
69 /* APR file handle to the patch file this hunk came from. */
72 /* Ranges used to keep track of this hunk's texts positions within
74 struct svn_diff__hunk_range diff_text_range;
75 struct svn_diff__hunk_range original_text_range;
76 struct svn_diff__hunk_range modified_text_range;
78 /* Hunk ranges as they appeared in the patch file.
79 * All numbers are lines, not bytes. */
80 svn_linenum_t original_start;
81 svn_linenum_t original_length;
82 svn_linenum_t modified_start;
83 svn_linenum_t modified_length;
85 /* Number of lines of leading and trailing hunk context. */
86 svn_linenum_t leading_context;
87 svn_linenum_t trailing_context;
89 /* Did we see a 'file does not end with eol' marker in this hunk? */
90 svn_boolean_t original_no_final_eol;
91 svn_boolean_t modified_no_final_eol;
93 /* Fuzz penalty, triggered by bad patch targets */
94 svn_linenum_t original_fuzz;
95 svn_linenum_t modified_fuzz;
98 struct svn_diff_binary_patch_t {
99 /* The patch this hunk belongs to. */
100 const svn_patch_t *patch;
102 /* APR file handle to the patch file this hunk came from. */
103 apr_file_t *apr_file;
105 /* Offsets inside APR_FILE representing the location of the patch */
108 svn_filesize_t src_filesize; /* Expanded/final size */
110 /* Offsets inside APR_FILE representing the location of the patch */
113 svn_filesize_t dst_filesize; /* Expanded/final size */
116 /* Common guts of svn_diff_hunk__create_adds_single_line() and
117 * svn_diff_hunk__create_deletes_single_line().
119 * ADD is TRUE if adding and FALSE if deleting.
122 add_or_delete_single_line(svn_diff_hunk_t **hunk_out,
124 const svn_patch_t *patch,
126 apr_pool_t *result_pool,
127 apr_pool_t *scratch_pool)
129 svn_diff_hunk_t *hunk = apr_pcalloc(result_pool, sizeof(*hunk));
130 static const char *hunk_header[] = { "@@ -1 +0,0 @@\n", "@@ -0,0 +1 @@\n" };
131 const apr_size_t header_len = strlen(hunk_header[add]);
132 const apr_size_t len = strlen(line);
133 const apr_size_t end = header_len + (1 + len); /* The +1 is for the \n. */
134 svn_stringbuf_t *buf = svn_stringbuf_create_ensure(end + 1, scratch_pool);
138 /* hunk->apr_file is created below. */
140 hunk->diff_text_range.start = header_len;
141 hunk->diff_text_range.current = header_len;
145 hunk->original_text_range.start = 0; /* There's no "original" text. */
146 hunk->original_text_range.current = 0;
147 hunk->original_text_range.end = 0;
148 hunk->original_no_final_eol = FALSE;
150 hunk->modified_text_range.start = header_len;
151 hunk->modified_text_range.current = header_len;
152 hunk->modified_text_range.end = end;
153 hunk->modified_no_final_eol = TRUE;
155 hunk->original_start = 0;
156 hunk->original_length = 0;
158 hunk->modified_start = 1;
159 hunk->modified_length = 1;
163 hunk->original_text_range.start = header_len;
164 hunk->original_text_range.current = header_len;
165 hunk->original_text_range.end = end;
166 hunk->original_no_final_eol = TRUE;
168 hunk->modified_text_range.start = 0; /* There's no "original" text. */
169 hunk->modified_text_range.current = 0;
170 hunk->modified_text_range.end = 0;
171 hunk->modified_no_final_eol = FALSE;
173 hunk->original_start = 1;
174 hunk->original_length = 1;
176 hunk->modified_start = 0;
177 hunk->modified_length = 0; /* setting to '1' works too */
180 hunk->leading_context = 0;
181 hunk->trailing_context = 0;
183 /* Create APR_FILE and put just a hunk in it (without a diff header).
184 * Save the offset of the last byte of the diff line. */
185 svn_stringbuf_appendbytes(buf, hunk_header[add], header_len);
186 svn_stringbuf_appendbyte(buf, add ? '+' : '-');
187 svn_stringbuf_appendbytes(buf, line, len);
188 svn_stringbuf_appendbyte(buf, '\n');
189 svn_stringbuf_appendcstr(buf, "\\ No newline at end of hunk\n");
191 hunk->diff_text_range.end = buf->len;
193 SVN_ERR(svn_io_open_unique_file3(&hunk->apr_file, NULL /* filename */,
194 NULL /* system tempdir */,
195 svn_io_file_del_on_pool_cleanup,
196 result_pool, scratch_pool));
197 SVN_ERR(svn_io_file_write_full(hunk->apr_file,
199 NULL, scratch_pool));
200 /* No need to seek. */
207 svn_diff_hunk__create_adds_single_line(svn_diff_hunk_t **hunk_out,
209 const svn_patch_t *patch,
210 apr_pool_t *result_pool,
211 apr_pool_t *scratch_pool)
213 SVN_ERR(add_or_delete_single_line(hunk_out, line, patch,
215 result_pool, scratch_pool));
220 svn_diff_hunk__create_deletes_single_line(svn_diff_hunk_t **hunk_out,
222 const svn_patch_t *patch,
223 apr_pool_t *result_pool,
224 apr_pool_t *scratch_pool)
226 SVN_ERR(add_or_delete_single_line(hunk_out, line, patch,
228 result_pool, scratch_pool));
233 svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk)
235 hunk->diff_text_range.current = hunk->diff_text_range.start;
239 svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk)
241 if (hunk->patch->reverse)
242 hunk->modified_text_range.current = hunk->modified_text_range.start;
244 hunk->original_text_range.current = hunk->original_text_range.start;
248 svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk)
250 if (hunk->patch->reverse)
251 hunk->original_text_range.current = hunk->original_text_range.start;
253 hunk->modified_text_range.current = hunk->modified_text_range.start;
257 svn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk)
259 return hunk->patch->reverse ? hunk->modified_start : hunk->original_start;
263 svn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk)
265 return hunk->patch->reverse ? hunk->modified_length : hunk->original_length;
269 svn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk)
271 return hunk->patch->reverse ? hunk->original_start : hunk->modified_start;
275 svn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk)
277 return hunk->patch->reverse ? hunk->original_length : hunk->modified_length;
281 svn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk)
283 return hunk->leading_context;
287 svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk)
289 return hunk->trailing_context;
293 svn_diff_hunk__get_fuzz_penalty(const svn_diff_hunk_t *hunk)
295 return hunk->patch->reverse ? hunk->original_fuzz : hunk->modified_fuzz;
298 /* Baton for the base85 stream implementation */
299 struct base85_baton_t
302 apr_pool_t *iterpool;
303 char buffer[52]; /* Bytes on current line */
304 apr_off_t next_pos; /* Start position of next line */
305 apr_off_t end_pos; /* Position after last line */
306 apr_size_t buf_size; /* Bytes available (52 unless at eof) */
307 apr_size_t buf_pos; /* Bytes in linebuffer */
308 svn_boolean_t done; /* At eof? */
311 /* Implements svn_read_fn_t for the base85 read stream */
313 read_handler_base85(void *baton, char *buffer, apr_size_t *len)
315 struct base85_baton_t *b85b = baton;
316 apr_pool_t *iterpool = b85b->iterpool;
317 apr_size_t remaining = *len;
320 svn_pool_clear(iterpool);
328 while (remaining && (b85b->buf_size > b85b->buf_pos
329 || b85b->next_pos < b85b->end_pos))
331 svn_stringbuf_t *line;
332 svn_boolean_t at_eof;
334 apr_size_t available = b85b->buf_size - b85b->buf_pos;
337 apr_size_t n = (remaining < available) ? remaining : available;
339 memcpy(dest, b85b->buffer + b85b->buf_pos, n);
345 return SVN_NO_ERROR; /* *len = OK */
348 if (b85b->next_pos >= b85b->end_pos)
350 SVN_ERR(svn_io_file_seek(b85b->file, APR_SET, &b85b->next_pos,
352 SVN_ERR(svn_io_file_readline(b85b->file, &line, NULL, &at_eof,
353 APR_SIZE_MAX, iterpool, iterpool));
355 b85b->next_pos = b85b->end_pos;
358 SVN_ERR(svn_io_file_get_offset(&b85b->next_pos, b85b->file,
362 if (line->len && line->data[0] >= 'A' && line->data[0] <= 'Z')
363 b85b->buf_size = line->data[0] - 'A' + 1;
364 else if (line->len && line->data[0] >= 'a' && line->data[0] <= 'z')
365 b85b->buf_size = line->data[0] - 'a' + 26 + 1;
367 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
368 _("Unexpected data in base85 section"));
370 if (b85b->buf_size < 52)
371 b85b->next_pos = b85b->end_pos; /* Handle as EOF */
373 SVN_ERR(svn_diff__base85_decode_line(b85b->buffer, b85b->buf_size,
374 line->data + 1, line->len - 1,
385 /* Implements svn_close_fn_t for the base85 read stream */
387 close_handler_base85(void *baton)
389 struct base85_baton_t *b85b = baton;
391 svn_pool_destroy(b85b->iterpool);
396 /* Gets a stream that reads decoded base85 data from a segment of a file.
397 The current implementation might assume that both start_pos and end_pos
398 are located at line boundaries. */
399 static svn_stream_t *
400 get_base85_data_stream(apr_file_t *file,
403 apr_pool_t *result_pool)
405 struct base85_baton_t *b85b = apr_pcalloc(result_pool, sizeof(*b85b));
406 svn_stream_t *base85s = svn_stream_create(b85b, result_pool);
409 b85b->iterpool = svn_pool_create(result_pool);
410 b85b->next_pos = start_pos;
411 b85b->end_pos = end_pos;
413 svn_stream_set_read2(base85s, NULL /* only full read support */,
414 read_handler_base85);
415 svn_stream_set_close(base85s, close_handler_base85);
419 /* Baton for the length verification stream functions */
420 struct length_verify_baton_t
423 svn_filesize_t remaining;
426 /* Implements svn_read_fn_t for the length verification stream */
428 read_handler_length_verify(void *baton, char *buffer, apr_size_t *len)
430 struct length_verify_baton_t *lvb = baton;
431 apr_size_t requested_len = *len;
433 SVN_ERR(svn_stream_read_full(lvb->inner, buffer, len));
435 if (*len > lvb->remaining)
436 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
437 _("Base85 data expands to longer than declared "
439 else if (requested_len > *len && *len != lvb->remaining)
440 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
441 _("Base85 data expands to smaller than declared "
444 lvb->remaining -= *len;
449 /* Implements svn_close_fn_t for the length verification stream */
451 close_handler_length_verify(void *baton)
453 struct length_verify_baton_t *lvb = baton;
455 return svn_error_trace(svn_stream_close(lvb->inner));
458 /* Gets a stream that verifies on reads that the inner stream is exactly
459 of the specified length */
460 static svn_stream_t *
461 get_verify_length_stream(svn_stream_t *inner,
462 svn_filesize_t expected_size,
463 apr_pool_t *result_pool)
465 struct length_verify_baton_t *lvb = apr_palloc(result_pool, sizeof(*lvb));
466 svn_stream_t *len_stream = svn_stream_create(lvb, result_pool);
469 lvb->remaining = expected_size;
471 svn_stream_set_read2(len_stream, NULL /* only full read support */,
472 read_handler_length_verify);
473 svn_stream_set_close(len_stream, close_handler_length_verify);
479 svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t *bpatch,
480 apr_pool_t *result_pool)
482 svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->src_start,
483 bpatch->src_end, result_pool);
485 s = svn_stream_compressed(s, result_pool);
487 /* ### If we (ever) want to support the DELTA format, then we should hook the
488 undelta handling here */
490 return get_verify_length_stream(s, bpatch->src_filesize, result_pool);
494 svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t *bpatch,
495 apr_pool_t *result_pool)
497 svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->dst_start,
498 bpatch->dst_end, result_pool);
500 s = svn_stream_compressed(s, result_pool);
502 /* ### If we (ever) want to support the DELTA format, then we should hook the
503 undelta handling here */
505 return get_verify_length_stream(s, bpatch->dst_filesize, result_pool);
508 /* Try to parse a positive number from a decimal number encoded
509 * in the string NUMBER. Return parsed number in OFFSET, and return
510 * TRUE if parsing was successful. */
512 parse_offset(svn_linenum_t *offset, const char *number)
517 err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10);
520 svn_error_clear(err);
524 *offset = (svn_linenum_t)val;
529 /* Try to parse a hunk range specification from the string RANGE.
530 * Return parsed information in *START and *LENGTH, and return TRUE
531 * if the range parsed correctly. Note: This function may modify the
532 * input value RANGE. */
534 parse_range(svn_linenum_t *start, svn_linenum_t *length, char *range)
541 comma = strstr(range, ",");
544 if (strlen(comma + 1) > 0)
546 /* Try to parse the length. */
547 if (! parse_offset(length, comma + 1))
550 /* Snip off the end of the string,
551 * so we can comfortably parse the line
552 * number the hunk starts at. */
556 /* A comma but no length? */
564 /* Try to parse the line number the hunk starts at. */
565 return parse_offset(start, range);
568 /* Try to parse a hunk header in string HEADER, putting parsed information
569 * into HUNK. Return TRUE if the header parsed correctly. ATAT is the
570 * character string used to delimit the hunk header.
571 * Do all allocations in POOL. */
573 parse_hunk_header(const char *header, svn_diff_hunk_t *hunk,
574 const char *atat, apr_pool_t *pool)
578 svn_stringbuf_t *range;
580 p = header + strlen(atat);
588 /* OK, this may be worth allocating some memory for... */
589 range = svn_stringbuf_create_ensure(31, pool);
591 while (*p && *p != ' ')
600 svn_stringbuf_appendbytes(range, start, p - start);
602 /* Try to parse the first range. */
603 if (! parse_range(&hunk->original_start, &hunk->original_length, range->data))
606 /* Clear the stringbuf so we can reuse it for the second range. */
607 svn_stringbuf_setempty(range);
612 /* OK, this may be worth copying... */
614 while (*p && *p != ' ')
622 svn_stringbuf_appendbytes(range, start, p - start);
624 /* Check for trailing @@ */
626 if (! starts_with(p, atat))
629 /* There may be stuff like C-function names after the trailing @@,
630 * but we ignore that. */
632 /* Try to parse the second range. */
633 if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data))
636 /* Hunk header is good. */
640 /* Read a line of original or modified hunk text from the specified
641 * RANGE within FILE. FILE is expected to contain unidiff text.
642 * Leading unidiff symbols ('+', '-', and ' ') are removed from the line,
643 * Any lines commencing with the VERBOTEN character are discarded.
644 * VERBOTEN should be '+' or '-', depending on which form of hunk text
645 * is being read. NO_FINAL_EOL declares if the hunk contains a no final
648 * All other parameters are as in svn_diff_hunk_readline_original_text()
649 * and svn_diff_hunk_readline_modified_text().
652 hunk_readline_original_or_modified(apr_file_t *file,
653 struct svn_diff__hunk_range *range,
654 svn_stringbuf_t **stringbuf,
658 svn_boolean_t no_final_eol,
659 apr_pool_t *result_pool,
660 apr_pool_t *scratch_pool)
663 svn_boolean_t filtered;
665 svn_stringbuf_t *str;
667 apr_pool_t *last_pool;
672 if (range->current >= range->end)
674 /* We're past the range. Indicate that no bytes can be read. */
677 *stringbuf = svn_stringbuf_create_empty(result_pool);
681 SVN_ERR(svn_io_file_get_offset(&pos, file, scratch_pool));
682 SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool));
684 /* It's not ITERPOOL because we use data allocated in LAST_POOL out
686 last_pool = svn_pool_create(scratch_pool);
689 svn_pool_clear(last_pool);
691 max_len = range->end - range->current;
692 SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len,
693 last_pool, last_pool));
694 SVN_ERR(svn_io_file_get_offset(&range->current, file, last_pool));
695 filtered = (str->data[0] == verboten || str->data[0] == '\\');
697 while (filtered && ! *eof);
701 /* EOF, return an empty string. */
702 *stringbuf = svn_stringbuf_create_ensure(0, result_pool);
705 else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ')
707 /* Shave off leading unidiff symbols. */
708 *stringbuf = svn_stringbuf_create(str->data + 1, result_pool);
712 /* Return the line as-is. Handle as a chopped leading spaces */
713 *stringbuf = svn_stringbuf_dup(str, result_pool);
716 if (!filtered && *eof && !*eol && *str->data)
718 /* Ok, we miss a final EOL in the patch file, but didn't see a
721 We should report that we had an EOL or the patch code will
722 misbehave (and it knows nothing about no eol markers) */
724 if (!no_final_eol && eol != &eol_p)
728 SVN_ERR(svn_io_file_seek(file, APR_SET, &start, scratch_pool));
730 SVN_ERR(svn_io_file_readline(file, &str, eol, NULL, APR_SIZE_MAX,
731 scratch_pool, scratch_pool));
733 /* Every patch file that has hunks has at least one EOL*/
734 SVN_ERR_ASSERT(*eol != NULL);
738 /* Fall through to seek back to the right location */
740 SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool));
742 svn_pool_destroy(last_pool);
747 svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk,
748 svn_stringbuf_t **stringbuf,
751 apr_pool_t *result_pool,
752 apr_pool_t *scratch_pool)
754 return svn_error_trace(
755 hunk_readline_original_or_modified(hunk->apr_file,
756 hunk->patch->reverse ?
757 &hunk->modified_text_range :
758 &hunk->original_text_range,
760 hunk->patch->reverse ? '-' : '+',
762 ? hunk->modified_no_final_eol
763 : hunk->original_no_final_eol,
764 result_pool, scratch_pool));
768 svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk,
769 svn_stringbuf_t **stringbuf,
772 apr_pool_t *result_pool,
773 apr_pool_t *scratch_pool)
775 return svn_error_trace(
776 hunk_readline_original_or_modified(hunk->apr_file,
777 hunk->patch->reverse ?
778 &hunk->original_text_range :
779 &hunk->modified_text_range,
781 hunk->patch->reverse ? '+' : '-',
783 ? hunk->original_no_final_eol
784 : hunk->modified_no_final_eol,
785 result_pool, scratch_pool));
789 svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk,
790 svn_stringbuf_t **stringbuf,
793 apr_pool_t *result_pool,
794 apr_pool_t *scratch_pool)
796 svn_stringbuf_t *line;
804 if (hunk->diff_text_range.current >= hunk->diff_text_range.end)
806 /* We're past the range. Indicate that no bytes can be read. */
809 *stringbuf = svn_stringbuf_create_empty(result_pool);
813 SVN_ERR(svn_io_file_get_offset(&pos, hunk->apr_file, scratch_pool));
814 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET,
815 &hunk->diff_text_range.current, scratch_pool));
816 max_len = hunk->diff_text_range.end - hunk->diff_text_range.current;
817 SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len,
820 SVN_ERR(svn_io_file_get_offset(&hunk->diff_text_range.current,
821 hunk->apr_file, scratch_pool));
823 if (*eof && !*eol && *line->data)
825 /* Ok, we miss a final EOL in the patch file, but didn't see a
828 We should report that we had an EOL or the patch code will
829 misbehave (and it knows nothing about no eol markers) */
833 /* Lets pick the first eol we find in our patch file */
835 svn_stringbuf_t *str;
837 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &start,
840 SVN_ERR(svn_io_file_readline(hunk->apr_file, &str, eol, NULL,
842 scratch_pool, scratch_pool));
844 /* Every patch file that has hunks has at least one EOL*/
845 SVN_ERR_ASSERT(*eol != NULL);
850 /* Fall through to seek back to the right location */
853 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool));
855 if (hunk->patch->reverse)
857 if (line->data[0] == '+')
859 else if (line->data[0] == '-')
868 /* Parse *PROP_NAME from HEADER as the part after the INDICATOR line.
869 * Allocate *PROP_NAME in RESULT_POOL.
870 * Set *PROP_NAME to NULL if no valid property name was found. */
872 parse_prop_name(const char **prop_name, const char *header,
873 const char *indicator, apr_pool_t *result_pool)
875 SVN_ERR(svn_utf_cstring_to_utf8(prop_name,
876 header + strlen(indicator),
878 if (**prop_name == '\0')
880 else if (! svn_prop_name_is_valid(*prop_name))
882 svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool);
883 svn_stringbuf_strip_whitespace(buf);
884 *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL);
891 /* A helper function to parse svn:mergeinfo diffs.
893 * These diffs use a special pretty-print format, for instance:
895 * Added: svn:mergeinfo
899 * The hunk header has the following format:
900 * ## -0,NUMBER_OF_REVERSE_MERGES +0,NUMBER_OF_FORWARD_MERGES ##
902 * At this point, the number of reverse merges has already been
903 * parsed into HUNK->ORIGINAL_LENGTH, and the number of forward
904 * merges has been parsed into HUNK->MODIFIED_LENGTH.
906 * The header is followed by a list of mergeinfo, one path per line.
907 * This function parses such lines. Lines describing reverse merges
908 * appear first, and then all lines describing forward merges appear.
910 * Parts of the line are affected by i18n. The words 'Merged'
911 * and 'Reverse-merged' can appear in any language and at any
912 * position within the line. We can only assume that a leading
913 * '/' starts the merge source path, the path is followed by
914 * ":r", which in turn is followed by a mergeinfo revision range,
915 * which is terminated by whitespace or end-of-string.
917 * If the current line meets the above criteria and we're able
918 * to parse valid mergeinfo from it, the resulting mergeinfo
919 * is added to patch->mergeinfo or patch->reverse_mergeinfo,
920 * and we proceed to the next line.
923 parse_mergeinfo(svn_boolean_t *found_mergeinfo,
924 svn_stringbuf_t *line,
925 svn_diff_hunk_t *hunk,
927 apr_pool_t *result_pool,
928 apr_pool_t *scratch_pool)
930 char *slash = strchr(line->data, '/');
931 char *colon = strrchr(line->data, ':');
933 *found_mergeinfo = FALSE;
935 if (slash && colon && colon[1] == 'r' && slash < colon)
937 svn_stringbuf_t *input;
938 svn_mergeinfo_t mergeinfo = NULL;
942 input = svn_stringbuf_create_ensure(line->len, scratch_pool);
944 /* Copy the merge source path + colon */
948 svn_stringbuf_appendbyte(input, *s);
952 /* skip 'r' after colon */
955 /* Copy the revision range. */
956 while (s < line->data + line->len)
958 if (svn_ctype_isspace(*s))
960 svn_stringbuf_appendbyte(input, *s);
964 err = svn_mergeinfo_parse(&mergeinfo, input->data, result_pool);
965 if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
967 svn_error_clear(err);
975 if (hunk->original_length > 0) /* reverse merges */
979 if (patch->mergeinfo == NULL)
980 patch->mergeinfo = mergeinfo;
982 SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
989 if (patch->reverse_mergeinfo == NULL)
990 patch->reverse_mergeinfo = mergeinfo;
992 SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
997 hunk->original_length--;
999 else if (hunk->modified_length > 0) /* forward merges */
1003 if (patch->reverse_mergeinfo == NULL)
1004 patch->reverse_mergeinfo = mergeinfo;
1006 SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
1013 if (patch->mergeinfo == NULL)
1014 patch->mergeinfo = mergeinfo;
1016 SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
1021 hunk->modified_length--;
1024 *found_mergeinfo = TRUE;
1028 return SVN_NO_ERROR;
1031 /* Return the next *HUNK from a PATCH in APR_FILE.
1032 * If no hunk can be found, set *HUNK to NULL.
1033 * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK
1034 * is the first belonging to a certain property, then PROP_NAME and
1035 * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be
1036 * NULL. If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be
1037 * treated as context lines. Allocate results in RESULT_POOL.
1038 * Use SCRATCH_POOL for all other allocations. */
1039 static svn_error_t *
1040 parse_next_hunk(svn_diff_hunk_t **hunk,
1041 svn_boolean_t *is_property,
1042 const char **prop_name,
1043 svn_diff_operation_kind_t *prop_operation,
1045 apr_file_t *apr_file,
1046 svn_boolean_t ignore_whitespace,
1047 apr_pool_t *result_pool,
1048 apr_pool_t *scratch_pool)
1050 static const char * const minus = "--- ";
1051 static const char * const text_atat = "@@";
1052 static const char * const prop_atat = "##";
1053 svn_stringbuf_t *line;
1054 svn_boolean_t eof, in_hunk, hunk_seen;
1055 apr_off_t pos, last_line;
1056 apr_off_t start, end;
1057 apr_off_t original_end;
1058 apr_off_t modified_end;
1059 svn_boolean_t original_no_final_eol = FALSE;
1060 svn_boolean_t modified_no_final_eol = FALSE;
1061 svn_linenum_t original_lines;
1062 svn_linenum_t modified_lines;
1063 svn_linenum_t leading_context;
1064 svn_linenum_t trailing_context;
1065 svn_boolean_t changed_line_seen;
1072 apr_pool_t *iterpool;
1074 *prop_operation = svn_diff_op_unchanged;
1076 /* We only set this if we have a property hunk header. */
1078 *is_property = FALSE;
1080 if (apr_file_eof(apr_file) == APR_EOF)
1082 /* No more hunks here. */
1084 return SVN_NO_ERROR;
1089 leading_context = 0;
1090 trailing_context = 0;
1091 changed_line_seen = FALSE;
1094 *hunk = apr_pcalloc(result_pool, sizeof(**hunk));
1096 /* Get current seek position. */
1097 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool));
1099 /* Start out assuming noise. */
1100 last_line_type = noise_line;
1102 iterpool = svn_pool_create(scratch_pool);
1106 svn_pool_clear(iterpool);
1108 /* Remember the current line's offset, and read the line. */
1110 SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
1111 iterpool, iterpool));
1113 /* Update line offset for next iteration. */
1114 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool));
1116 /* Lines starting with a backslash indicate a missing EOL:
1117 * "\ No newline at end of file" or "end of property". */
1118 if (line->data[0] == '\\')
1125 apr_off_t hunk_text_end;
1127 /* Comment terminates the hunk text and says the hunk text
1128 * has no trailing EOL. Snip off trailing EOL which is part
1129 * of the patch file but not part of the hunk text. */
1130 off = last_line - 2;
1131 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool));
1132 len = sizeof(eolbuf);
1133 SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len,
1135 if (eolbuf[0] == '\r' && eolbuf[1] == '\n')
1136 hunk_text_end = last_line - 2;
1137 else if (eolbuf[1] == '\n' || eolbuf[1] == '\r')
1138 hunk_text_end = last_line - 1;
1140 hunk_text_end = last_line;
1142 if (last_line_type == original_line && original_end == 0)
1143 original_end = hunk_text_end;
1144 else if (last_line_type == modified_line && modified_end == 0)
1145 modified_end = hunk_text_end;
1146 else if (last_line_type == context_line)
1148 if (original_end == 0)
1149 original_end = hunk_text_end;
1150 if (modified_end == 0)
1151 modified_end = hunk_text_end;
1154 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool));
1155 /* Set for the type and context by using != the other type */
1156 if (last_line_type != modified_line)
1157 original_no_final_eol = TRUE;
1158 if (last_line_type != original_line)
1159 modified_no_final_eol = TRUE;
1165 if (in_hunk && *is_property && *prop_name &&
1166 strcmp(*prop_name, SVN_PROP_MERGEINFO) == 0)
1168 svn_boolean_t found_mergeinfo;
1170 SVN_ERR(parse_mergeinfo(&found_mergeinfo, line, *hunk, patch,
1171 result_pool, iterpool));
1172 if (found_mergeinfo)
1173 continue; /* Proceed to the next line in the svn:mergeinfo hunk. */
1176 /* Perhaps we can also use original_lines/modified_lines here */
1178 in_hunk = FALSE; /* On to next property */
1185 static const char add = '+';
1186 static const char del = '-';
1190 /* We're reading the first line of the hunk, so the start
1191 * of the line just read is the hunk text's byte offset. */
1197 || ((original_lines > 0 && modified_lines > 0)
1199 /* Tolerate chopped leading spaces on empty lines. */
1200 (! eof && line->len == 0)
1201 /* Maybe tolerate chopped leading spaces on non-empty lines. */
1202 || (ignore_whitespace && c != del && c != add))))
1204 /* It's a "context" line in the hunk. */
1206 if (original_lines > 0)
1210 (*hunk)->original_length++;
1211 (*hunk)->original_fuzz++;
1213 if (modified_lines > 0)
1217 (*hunk)->modified_length++;
1218 (*hunk)->modified_fuzz++;
1220 if (changed_line_seen)
1224 last_line_type = context_line;
1227 && (original_lines > 0 || line->data[1] != del))
1229 /* It's a "deleted" line in the hunk. */
1231 changed_line_seen = TRUE;
1233 /* A hunk may have context in the middle. We only want
1234 trailing lines of context. */
1235 if (trailing_context > 0)
1236 trailing_context = 0;
1238 if (original_lines > 0)
1242 (*hunk)->original_length++;
1243 (*hunk)->original_fuzz++;
1245 last_line_type = original_line;
1248 && (modified_lines > 0 || line->data[1] != add))
1250 /* It's an "added" line in the hunk. */
1252 changed_line_seen = TRUE;
1254 /* A hunk may have context in the middle. We only want
1255 trailing lines of context. */
1256 if (trailing_context > 0)
1257 trailing_context = 0;
1259 if (modified_lines > 0)
1263 (*hunk)->modified_length++;
1264 (*hunk)->modified_fuzz++;
1266 last_line_type = modified_line;
1272 /* The hunk ends at EOF. */
1277 /* The start of the current line marks the first byte
1278 * after the hunk text. */
1281 if (original_end == 0)
1283 if (modified_end == 0)
1285 break; /* Hunk was empty or has been read. */
1290 if (starts_with(line->data, text_atat))
1292 /* Looks like we have a hunk header, try to rip it apart. */
1293 in_hunk = parse_hunk_header(line->data, *hunk, text_atat,
1297 original_lines = (*hunk)->original_length;
1298 modified_lines = (*hunk)->modified_length;
1299 *is_property = FALSE;
1302 else if (starts_with(line->data, prop_atat))
1304 /* Looks like we have a property hunk header, try to rip it
1306 in_hunk = parse_hunk_header(line->data, *hunk, prop_atat,
1310 original_lines = (*hunk)->original_length;
1311 modified_lines = (*hunk)->modified_length;
1312 *is_property = TRUE;
1315 else if (starts_with(line->data, "Added: "))
1317 SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ",
1320 *prop_operation = (patch->reverse ? svn_diff_op_deleted
1321 : svn_diff_op_added);
1323 else if (starts_with(line->data, "Deleted: "))
1325 SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ",
1328 *prop_operation = (patch->reverse ? svn_diff_op_added
1329 : svn_diff_op_deleted);
1331 else if (starts_with(line->data, "Modified: "))
1333 SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ",
1336 *prop_operation = svn_diff_op_modified;
1338 else if (starts_with(line->data, minus)
1339 || starts_with(line->data, "diff --git "))
1340 /* This could be a header of another patch. Bail out. */
1344 /* Check for the line length since a file may not have a newline at the
1345 * end and we depend upon the last line to be an empty one. */
1346 while (! eof || line->len > 0);
1347 svn_pool_destroy(iterpool);
1350 /* Rewind to the start of the line just read, so subsequent calls
1351 * to this function or svn_diff_parse_next_patch() don't end
1352 * up skipping the line -- it may contain a patch or hunk header. */
1353 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
1355 if (hunk_seen && start < end)
1357 /* Did we get the number of context lines announced in the header?
1359 If not... let's limit the number from the header to what we
1360 actually have, and apply a fuzz penalty */
1363 (*hunk)->original_length -= original_lines;
1364 (*hunk)->original_fuzz += original_lines;
1368 (*hunk)->modified_length -= modified_lines;
1369 (*hunk)->modified_fuzz += modified_lines;
1372 (*hunk)->patch = patch;
1373 (*hunk)->apr_file = apr_file;
1374 (*hunk)->leading_context = leading_context;
1375 (*hunk)->trailing_context = trailing_context;
1376 (*hunk)->diff_text_range.start = start;
1377 (*hunk)->diff_text_range.current = start;
1378 (*hunk)->diff_text_range.end = end;
1379 (*hunk)->original_text_range.start = start;
1380 (*hunk)->original_text_range.current = start;
1381 (*hunk)->original_text_range.end = original_end;
1382 (*hunk)->modified_text_range.start = start;
1383 (*hunk)->modified_text_range.current = start;
1384 (*hunk)->modified_text_range.end = modified_end;
1385 (*hunk)->original_no_final_eol = original_no_final_eol;
1386 (*hunk)->modified_no_final_eol = modified_no_final_eol;
1389 /* Something went wrong, just discard the result. */
1392 return SVN_NO_ERROR;
1395 /* Compare function for sorting hunks after parsing.
1396 * We sort hunks by their original line offset. */
1398 compare_hunks(const void *a, const void *b)
1400 const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a);
1401 const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b);
1403 if (ha->original_start < hb->original_start)
1405 if (ha->original_start > hb->original_start)
1410 /* Possible states of the diff header parser. */
1413 state_start, /* initial */
1414 state_git_diff_seen, /* diff --git */
1415 state_git_tree_seen, /* a tree operation, rather than content change */
1416 state_git_minus_seen, /* --- /dev/null; or --- a/ */
1417 state_git_plus_seen, /* +++ /dev/null; or +++ a/ */
1418 state_old_mode_seen, /* old mode 100644 */
1419 state_git_mode_seen, /* new mode 100644 */
1420 state_move_from_seen, /* rename from foo.c */
1421 state_copy_from_seen, /* copy from foo.c */
1422 state_minus_seen, /* --- foo.c */
1423 state_unidiff_found, /* valid start of a regular unidiff header */
1424 state_git_header_found, /* valid start of a --git diff header */
1425 state_binary_patch_found /* valid start of binary patch */
1428 /* Data type describing a valid state transition of the parser. */
1431 const char *expected_input;
1432 enum parse_state required_state;
1434 /* A callback called upon each parser state transition. */
1435 svn_error_t *(*fn)(enum parse_state *new_state, char *input,
1436 svn_patch_t *patch, apr_pool_t *result_pool,
1437 apr_pool_t *scratch_pool);
1440 /* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */
1441 static svn_error_t *
1442 grab_filename(const char **file_name, const char *line, apr_pool_t *result_pool,
1443 apr_pool_t *scratch_pool)
1445 const char *utf8_path;
1446 const char *canon_path;
1448 /* Grab the filename and encode it in UTF-8. */
1449 /* TODO: Allow specifying the patch file's encoding.
1450 * For now, we assume its encoding is native. */
1451 /* ### This can fail if the filename cannot be represented in the current
1452 * ### locale's encoding. */
1453 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path,
1457 /* Canonicalize the path name. */
1458 canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool);
1460 *file_name = apr_pstrdup(result_pool, canon_path);
1462 return SVN_NO_ERROR;
1465 /* Parse the '--- ' line of a regular unidiff. */
1466 static svn_error_t *
1467 diff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1468 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1470 /* If we can find a tab, it separates the filename from
1471 * the rest of the line which we can discard. */
1472 char *tab = strchr(line, '\t');
1476 SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "),
1477 result_pool, scratch_pool));
1479 *new_state = state_minus_seen;
1481 return SVN_NO_ERROR;
1484 /* Parse the '+++ ' line of a regular unidiff. */
1485 static svn_error_t *
1486 diff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1487 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1489 /* If we can find a tab, it separates the filename from
1490 * the rest of the line which we can discard. */
1491 char *tab = strchr(line, '\t');
1495 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "),
1496 result_pool, scratch_pool));
1498 *new_state = state_unidiff_found;
1500 return SVN_NO_ERROR;
1503 /* Parse the first line of a git extended unidiff. */
1504 static svn_error_t *
1505 git_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1506 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1508 const char *old_path_start;
1510 const char *new_path_start;
1511 const char *new_path_end;
1512 char *new_path_marker;
1513 const char *old_path_marker;
1515 /* ### Add handling of escaped paths
1516 * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html:
1518 * TAB, LF, double quote and backslash characters in pathnames are
1519 * represented as \t, \n, \" and \\, respectively. If there is need for
1520 * such substitution then the whole pathname is put in double quotes.
1523 /* Our line should look like this: 'diff --git a/path b/path'.
1525 * If we find any deviations from that format, we return with state reset
1528 old_path_marker = strstr(line, " a/");
1530 if (! old_path_marker)
1532 *new_state = state_start;
1533 return SVN_NO_ERROR;
1536 if (! *(old_path_marker + 3))
1538 *new_state = state_start;
1539 return SVN_NO_ERROR;
1542 new_path_marker = strstr(old_path_marker, " b/");
1544 if (! new_path_marker)
1546 *new_state = state_start;
1547 return SVN_NO_ERROR;
1550 if (! *(new_path_marker + 3))
1552 *new_state = state_start;
1553 return SVN_NO_ERROR;
1556 /* By now, we know that we have a line on the form '--git diff a/.+ b/.+'
1557 * We only need the filenames when we have deleted or added empty
1558 * files. In those cases the old_path and new_path is identical on the
1559 * 'diff --git' line. For all other cases we fetch the filenames from
1560 * other header lines. */
1561 old_path_start = line + STRLEN_LITERAL("diff --git a/");
1562 new_path_end = line + strlen(line);
1563 new_path_start = old_path_start;
1570 new_path_marker = strstr(new_path_start, " b/");
1572 /* No new path marker, bail out. */
1573 if (! new_path_marker)
1576 old_path_end = new_path_marker;
1577 new_path_start = new_path_marker + STRLEN_LITERAL(" b/");
1579 /* No path after the marker. */
1580 if (! *new_path_start)
1583 len_old = old_path_end - old_path_start;
1584 len_new = new_path_end - new_path_start;
1586 /* Are the paths before and after the " b/" marker the same? */
1587 if (len_old == len_new
1588 && ! strncmp(old_path_start, new_path_start, len_old))
1590 *old_path_end = '\0';
1591 SVN_ERR(grab_filename(&patch->old_filename, old_path_start,
1592 result_pool, scratch_pool));
1594 SVN_ERR(grab_filename(&patch->new_filename, new_path_start,
1595 result_pool, scratch_pool));
1600 /* We assume that the path is only modified until we've found a 'tree'
1602 patch->operation = svn_diff_op_modified;
1604 *new_state = state_git_diff_seen;
1605 return SVN_NO_ERROR;
1608 /* Parse the '--- ' line of a git extended unidiff. */
1609 static svn_error_t *
1610 git_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1611 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1613 /* If we can find a tab, it separates the filename from
1614 * the rest of the line which we can discard. */
1615 char *tab = strchr(line, '\t');
1619 if (starts_with(line, "--- /dev/null"))
1620 SVN_ERR(grab_filename(&patch->old_filename, "/dev/null",
1621 result_pool, scratch_pool));
1623 SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"),
1624 result_pool, scratch_pool));
1626 *new_state = state_git_minus_seen;
1627 return SVN_NO_ERROR;
1630 /* Parse the '+++ ' line of a git extended unidiff. */
1631 static svn_error_t *
1632 git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1633 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1635 /* If we can find a tab, it separates the filename from
1636 * the rest of the line which we can discard. */
1637 char *tab = strchr(line, '\t');
1641 if (starts_with(line, "+++ /dev/null"))
1642 SVN_ERR(grab_filename(&patch->new_filename, "/dev/null",
1643 result_pool, scratch_pool));
1645 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"),
1646 result_pool, scratch_pool));
1648 *new_state = state_git_header_found;
1649 return SVN_NO_ERROR;
1652 /* Helper for git_old_mode() and git_new_mode(). Translate the git
1653 * file mode MODE_STR into a binary "executable?" and "symlink?" state. */
1654 static svn_error_t *
1655 parse_git_mode_bits(svn_tristate_t *executable_p,
1656 svn_tristate_t *symlink_p,
1657 const char *mode_str)
1660 SVN_ERR(svn_cstring_strtoui64(&mode, mode_str,
1662 0777777 /* max: six octal digits */,
1663 010 /* radix (octal) */));
1665 /* Note: 0644 and 0755 are the only modes that can occur for plain files.
1666 * We deliberately choose to parse only those values: we are strict in what
1667 * we accept _and_ in what we produce.
1669 * (Having said that, though, we could consider relaxing the parser to also
1671 * (mode & 0111) == 0000 -> svn_tristate_false
1672 * (mode & 0111) == 0111 -> svn_tristate_true
1673 * [anything else] -> svn_tristate_unknown
1677 switch (mode & 0777)
1680 *executable_p = svn_tristate_false;
1684 *executable_p = svn_tristate_true;
1688 /* Ignore unknown values. */
1689 *executable_p = svn_tristate_unknown;
1693 switch (mode & 0170000 /* S_IFMT */)
1695 case 0120000: /* S_IFLNK */
1696 *symlink_p = svn_tristate_true;
1699 case 0100000: /* S_IFREG */
1700 case 0040000: /* S_IFDIR */
1701 *symlink_p = svn_tristate_false;
1705 /* Ignore unknown values.
1706 (Including those generated by Subversion <= 1.9) */
1707 *symlink_p = svn_tristate_unknown;
1711 return SVN_NO_ERROR;
1714 /* Parse the 'old mode ' line of a git extended unidiff. */
1715 static svn_error_t *
1716 git_old_mode(enum parse_state *new_state, char *line, svn_patch_t *patch,
1717 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1719 SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit,
1720 &patch->old_symlink_bit,
1721 line + STRLEN_LITERAL("old mode ")));
1724 /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */
1725 SVN_ERR_ASSERT(patch->old_executable_bit != svn_tristate_unknown);
1728 *new_state = state_old_mode_seen;
1729 return SVN_NO_ERROR;
1732 /* Parse the 'new mode ' line of a git extended unidiff. */
1733 static svn_error_t *
1734 git_new_mode(enum parse_state *new_state, char *line, svn_patch_t *patch,
1735 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1737 SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1738 &patch->new_symlink_bit,
1739 line + STRLEN_LITERAL("new mode ")));
1742 /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */
1743 SVN_ERR_ASSERT(patch->new_executable_bit != svn_tristate_unknown);
1746 /* Don't touch patch->operation. */
1748 *new_state = state_git_mode_seen;
1749 return SVN_NO_ERROR;
1752 static svn_error_t *
1753 git_index(enum parse_state *new_state, char *line, svn_patch_t *patch,
1754 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1756 /* We either have something like "index 33e5b38..0000000" (which we just
1757 ignore as we are not interested in git specific shas) or something like
1758 "index 33e5b38..0000000 120000" which tells us the mode, that isn't
1759 changed by applying this patch.
1761 If the mode would have changed then we would see 'old mode' and 'new mode'
1764 line = strchr(line + STRLEN_LITERAL("index "), ' ');
1766 if (line && patch->new_executable_bit == svn_tristate_unknown
1767 && patch->new_symlink_bit == svn_tristate_unknown
1768 && patch->operation != svn_diff_op_added
1769 && patch->operation != svn_diff_op_deleted)
1771 SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1772 &patch->new_symlink_bit,
1775 /* There is no change.. so set the old values to the new values */
1776 patch->old_executable_bit = patch->new_executable_bit;
1777 patch->old_symlink_bit = patch->new_symlink_bit;
1780 /* This function doesn't change the state! */
1781 /* *new_state = *new_state */
1782 return SVN_NO_ERROR;
1785 /* Parse the 'rename from ' line of a git extended unidiff. */
1786 static svn_error_t *
1787 git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1788 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1790 SVN_ERR(grab_filename(&patch->old_filename,
1791 line + STRLEN_LITERAL("rename from "),
1792 result_pool, scratch_pool));
1794 *new_state = state_move_from_seen;
1795 return SVN_NO_ERROR;
1798 /* Parse the 'rename to ' line of a git extended unidiff. */
1799 static svn_error_t *
1800 git_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1801 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1803 SVN_ERR(grab_filename(&patch->new_filename,
1804 line + STRLEN_LITERAL("rename to "),
1805 result_pool, scratch_pool));
1807 patch->operation = svn_diff_op_moved;
1809 *new_state = state_git_tree_seen;
1810 return SVN_NO_ERROR;
1813 /* Parse the 'copy from ' line of a git extended unidiff. */
1814 static svn_error_t *
1815 git_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1816 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1818 SVN_ERR(grab_filename(&patch->old_filename,
1819 line + STRLEN_LITERAL("copy from "),
1820 result_pool, scratch_pool));
1822 *new_state = state_copy_from_seen;
1823 return SVN_NO_ERROR;
1826 /* Parse the 'copy to ' line of a git extended unidiff. */
1827 static svn_error_t *
1828 git_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1829 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1831 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "),
1832 result_pool, scratch_pool));
1834 patch->operation = svn_diff_op_copied;
1836 *new_state = state_git_tree_seen;
1837 return SVN_NO_ERROR;
1840 /* Parse the 'new file ' line of a git extended unidiff. */
1841 static svn_error_t *
1842 git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1843 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1845 SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1846 &patch->new_symlink_bit,
1847 line + STRLEN_LITERAL("new file mode ")));
1849 patch->operation = svn_diff_op_added;
1851 /* Filename already retrieved from diff --git header. */
1853 *new_state = state_git_tree_seen;
1854 return SVN_NO_ERROR;
1857 /* Parse the 'deleted file ' line of a git extended unidiff. */
1858 static svn_error_t *
1859 git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1860 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1862 SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit,
1863 &patch->old_symlink_bit,
1864 line + STRLEN_LITERAL("deleted file mode ")));
1866 patch->operation = svn_diff_op_deleted;
1868 /* Filename already retrieved from diff --git header. */
1870 *new_state = state_git_tree_seen;
1871 return SVN_NO_ERROR;
1874 /* Parse the 'GIT binary patch' header */
1875 static svn_error_t *
1876 binary_patch_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1877 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1879 *new_state = state_binary_patch_found;
1880 return SVN_NO_ERROR;
1884 /* Add a HUNK associated with the property PROP_NAME to PATCH. */
1885 static svn_error_t *
1886 add_property_hunk(svn_patch_t *patch, const char *prop_name,
1887 svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation,
1888 apr_pool_t *result_pool)
1890 svn_prop_patch_t *prop_patch;
1892 prop_patch = svn_hash_gets(patch->prop_patches, prop_name);
1896 prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t));
1897 prop_patch->name = prop_name;
1898 prop_patch->operation = operation;
1899 prop_patch->hunks = apr_array_make(result_pool, 1,
1900 sizeof(svn_diff_hunk_t *));
1902 svn_hash_sets(patch->prop_patches, prop_name, prop_patch);
1905 APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk;
1907 return SVN_NO_ERROR;
1910 struct svn_patch_file_t
1912 /* The APR file handle to the patch file. */
1913 apr_file_t *apr_file;
1915 /* The file offset at which the next patch is expected. */
1916 apr_off_t next_patch_offset;
1920 svn_diff_open_patch_file(svn_patch_file_t **patch_file,
1921 const char *local_abspath,
1922 apr_pool_t *result_pool)
1924 svn_patch_file_t *p;
1926 p = apr_palloc(result_pool, sizeof(*p));
1927 SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath,
1928 APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
1930 p->next_patch_offset = 0;
1933 return SVN_NO_ERROR;
1936 /* Parse hunks from APR_FILE and store them in PATCH->HUNKS.
1937 * Parsing stops if no valid next hunk can be found.
1938 * If IGNORE_WHITESPACE is TRUE, lines without
1939 * leading spaces will be treated as context lines.
1940 * Allocate results in RESULT_POOL.
1941 * Use SCRATCH_POOL for temporary allocations. */
1942 static svn_error_t *
1943 parse_hunks(svn_patch_t *patch, apr_file_t *apr_file,
1944 svn_boolean_t ignore_whitespace,
1945 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1947 svn_diff_hunk_t *hunk;
1948 svn_boolean_t is_property;
1949 const char *last_prop_name;
1950 const char *prop_name;
1951 svn_diff_operation_kind_t prop_operation;
1952 apr_pool_t *iterpool;
1954 last_prop_name = NULL;
1956 patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *));
1957 patch->prop_patches = apr_hash_make(result_pool);
1958 iterpool = svn_pool_create(scratch_pool);
1961 svn_pool_clear(iterpool);
1963 SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation,
1964 patch, apr_file, ignore_whitespace, result_pool,
1967 if (hunk && is_property)
1970 prop_name = last_prop_name;
1972 last_prop_name = prop_name;
1974 /* Skip svn:mergeinfo properties.
1975 * Mergeinfo data cannot be represented as a hunk and
1976 * is therefore stored in PATCH itself. */
1977 if (strcmp(prop_name, SVN_PROP_MERGEINFO) == 0)
1980 SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation,
1985 APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk;
1986 last_prop_name = NULL;
1991 svn_pool_destroy(iterpool);
1993 return SVN_NO_ERROR;
1996 static svn_error_t *
1997 parse_binary_patch(svn_patch_t *patch, apr_file_t *apr_file,
1998 svn_boolean_t reverse,
1999 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
2001 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2002 apr_off_t pos, last_line;
2003 svn_stringbuf_t *line;
2004 svn_boolean_t eof = FALSE;
2005 svn_diff_binary_patch_t *bpatch = apr_pcalloc(result_pool, sizeof(*bpatch));
2006 svn_boolean_t in_blob = FALSE;
2007 svn_boolean_t in_src = FALSE;
2009 bpatch->apr_file = apr_file;
2011 patch->prop_patches = apr_hash_make(result_pool);
2013 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool));
2018 SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
2019 iterpool, iterpool));
2021 /* Update line offset for next iteration. */
2022 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool));
2026 char c = line->data[0];
2028 /* 66 = len byte + (52/4*5) chars */
2029 if (((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
2031 && !strchr(line->data, ':')
2032 && !strchr(line->data, ' '))
2034 /* One more blop line */
2036 bpatch->src_end = pos;
2038 bpatch->dst_end = pos;
2040 else if (svn_stringbuf_first_non_whitespace(line) < line->len
2041 && !(in_src && bpatch->src_start < last_line))
2043 break; /* Bad patch */
2047 patch->binary_patch = bpatch; /* SUCCESS! */
2056 else if (starts_with(line->data, "literal "))
2058 apr_uint64_t expanded_size;
2059 svn_error_t *err = svn_cstring_strtoui64(&expanded_size,
2061 0, APR_UINT64_MAX, 10);
2065 svn_error_clear(err);
2071 bpatch->src_start = pos;
2072 bpatch->src_filesize = expanded_size;
2076 bpatch->dst_start = pos;
2077 bpatch->dst_filesize = expanded_size;
2082 break; /* We don't support GIT deltas (yet) */
2084 svn_pool_destroy(iterpool);
2087 /* Rewind to the start of the line just read, so subsequent calls
2088 * don't end up skipping the line. It may contain a patch or hunk header.*/
2089 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
2091 && ((bpatch->src_end > bpatch->src_start) || !bpatch->src_filesize))
2093 patch->binary_patch = bpatch; /* SUCCESS */
2096 /* Reverse patch if requested */
2097 if (reverse && patch->binary_patch)
2099 apr_off_t tmp_start = bpatch->src_start;
2100 apr_off_t tmp_end = bpatch->src_end;
2101 svn_filesize_t tmp_filesize = bpatch->src_filesize;
2103 bpatch->src_start = bpatch->dst_start;
2104 bpatch->src_end = bpatch->dst_end;
2105 bpatch->src_filesize = bpatch->dst_filesize;
2107 bpatch->dst_start = tmp_start;
2108 bpatch->dst_end = tmp_end;
2109 bpatch->dst_filesize = tmp_filesize;
2112 return SVN_NO_ERROR;
2115 /* State machine for the diff header parser.
2116 * Expected Input Required state Function to call */
2117 static struct transition transitions[] =
2119 {"--- ", state_start, diff_minus},
2120 {"+++ ", state_minus_seen, diff_plus},
2122 {"diff --git", state_start, git_start},
2123 {"--- a/", state_git_diff_seen, git_minus},
2124 {"--- a/", state_git_mode_seen, git_minus},
2125 {"--- a/", state_git_tree_seen, git_minus},
2126 {"--- /dev/null", state_git_mode_seen, git_minus},
2127 {"--- /dev/null", state_git_tree_seen, git_minus},
2128 {"+++ b/", state_git_minus_seen, git_plus},
2129 {"+++ /dev/null", state_git_minus_seen, git_plus},
2131 {"old mode ", state_git_diff_seen, git_old_mode},
2132 {"new mode ", state_old_mode_seen, git_new_mode},
2134 {"rename from ", state_git_diff_seen, git_move_from},
2135 {"rename from ", state_git_mode_seen, git_move_from},
2136 {"rename to ", state_move_from_seen, git_move_to},
2138 {"copy from ", state_git_diff_seen, git_copy_from},
2139 {"copy from ", state_git_mode_seen, git_copy_from},
2140 {"copy to ", state_copy_from_seen, git_copy_to},
2142 {"new file ", state_git_diff_seen, git_new_file},
2144 {"deleted file ", state_git_diff_seen, git_deleted_file},
2146 {"index ", state_git_diff_seen, git_index},
2147 {"index ", state_git_tree_seen, git_index},
2148 {"index ", state_git_mode_seen, git_index},
2150 {"GIT binary patch", state_git_diff_seen, binary_patch_start},
2151 {"GIT binary patch", state_git_tree_seen, binary_patch_start},
2152 {"GIT binary patch", state_git_mode_seen, binary_patch_start},
2156 svn_diff_parse_next_patch(svn_patch_t **patch_p,
2157 svn_patch_file_t *patch_file,
2158 svn_boolean_t reverse,
2159 svn_boolean_t ignore_whitespace,
2160 apr_pool_t *result_pool,
2161 apr_pool_t *scratch_pool)
2163 apr_off_t pos, last_line;
2165 svn_boolean_t line_after_tree_header_read = FALSE;
2166 apr_pool_t *iterpool;
2168 enum parse_state state = state_start;
2170 if (apr_file_eof(patch_file->apr_file) == APR_EOF)
2172 /* No more patches here. */
2174 return SVN_NO_ERROR;
2177 patch = apr_pcalloc(result_pool, sizeof(*patch));
2178 patch->old_executable_bit = svn_tristate_unknown;
2179 patch->new_executable_bit = svn_tristate_unknown;
2180 patch->old_symlink_bit = svn_tristate_unknown;
2181 patch->new_symlink_bit = svn_tristate_unknown;
2183 pos = patch_file->next_patch_offset;
2184 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool));
2186 iterpool = svn_pool_create(scratch_pool);
2189 svn_stringbuf_t *line;
2190 svn_boolean_t valid_header_line = FALSE;
2193 svn_pool_clear(iterpool);
2195 /* Remember the current line's offset, and read the line. */
2197 SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof,
2198 APR_SIZE_MAX, iterpool, iterpool));
2202 /* Update line offset for next iteration. */
2203 SVN_ERR(svn_io_file_get_offset(&pos, patch_file->apr_file,
2207 /* Run the state machine. */
2208 for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++)
2210 if (starts_with(line->data, transitions[i].expected_input)
2211 && state == transitions[i].required_state)
2213 SVN_ERR(transitions[i].fn(&state, line->data, patch,
2214 result_pool, iterpool));
2215 valid_header_line = TRUE;
2220 if (state == state_unidiff_found
2221 || state == state_git_header_found
2222 || state == state_binary_patch_found)
2224 /* We have a valid diff header, yay! */
2227 else if ((state == state_git_tree_seen || state == state_git_mode_seen)
2228 && line_after_tree_header_read
2229 && !valid_header_line)
2231 /* We have a valid diff header for a patch with only tree changes.
2232 * Rewind to the start of the line just read, so subsequent calls
2233 * to this function don't end up skipping the line -- it may
2234 * contain a patch. */
2235 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
2239 else if (state == state_git_tree_seen
2240 || state == state_git_mode_seen)
2242 line_after_tree_header_read = TRUE;
2244 else if (! valid_header_line && state != state_start
2245 && state != state_git_diff_seen)
2247 /* We've encountered an invalid diff header.
2249 * Rewind to the start of the line just read - it may be a new
2250 * header that begins there. */
2251 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
2253 state = state_start;
2259 patch->reverse = reverse;
2263 svn_tristate_t ts_tmp;
2265 temp = patch->old_filename;
2266 patch->old_filename = patch->new_filename;
2267 patch->new_filename = temp;
2269 switch (patch->operation)
2271 case svn_diff_op_added:
2272 patch->operation = svn_diff_op_deleted;
2274 case svn_diff_op_deleted:
2275 patch->operation = svn_diff_op_added;
2278 case svn_diff_op_modified:
2279 break; /* Stays modified. */
2281 case svn_diff_op_copied:
2282 case svn_diff_op_moved:
2283 break; /* Stays copied or moved, just in the other direction. */
2284 case svn_diff_op_unchanged:
2285 break; /* Stays unchanged, of course. */
2288 ts_tmp = patch->old_executable_bit;
2289 patch->old_executable_bit = patch->new_executable_bit;
2290 patch->new_executable_bit = ts_tmp;
2292 ts_tmp = patch->old_symlink_bit;
2293 patch->old_symlink_bit = patch->new_symlink_bit;
2294 patch->new_symlink_bit = ts_tmp;
2297 if (patch->old_filename == NULL || patch->new_filename == NULL)
2299 /* Something went wrong, just discard the result. */
2304 if (state == state_binary_patch_found)
2306 SVN_ERR(parse_binary_patch(patch, patch_file->apr_file, reverse,
2307 result_pool, iterpool));
2308 /* And fall through in property parsing */
2311 SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace,
2312 result_pool, iterpool));
2315 svn_pool_destroy(iterpool);
2317 SVN_ERR(svn_io_file_get_offset(&patch_file->next_patch_offset,
2318 patch_file->apr_file, scratch_pool));
2320 if (patch && patch->hunks)
2322 /* Usually, hunks appear in the patch sorted by their original line
2323 * offset. But just in case they weren't parsed in this order for
2324 * some reason, we sort them so that our caller can assume that hunks
2325 * are sorted as if parsed from a usual patch. */
2326 svn_sort__array(patch->hunks, compare_hunks);
2330 return SVN_NO_ERROR;
2334 svn_diff_close_patch_file(svn_patch_file_t *patch_file,
2335 apr_pool_t *scratch_pool)
2337 return svn_error_trace(svn_io_file_close(patch_file->apr_file,