2 * parse-diff.c: functions for parsing diff files
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
29 #include "svn_types.h"
30 #include "svn_error.h"
32 #include "svn_pools.h"
33 #include "svn_props.h"
34 #include "svn_string.h"
36 #include "svn_dirent_uri.h"
39 #include "private/svn_eol_private.h"
40 #include "private/svn_dep_compat.h"
42 /* Helper macro for readability */
43 #define starts_with(str, start) \
44 (strncmp((str), (start), strlen(start)) == 0)
46 /* Like strlen() but for string literals. */
47 #define STRLEN_LITERAL(str) (sizeof(str) - 1)
49 /* This struct describes a range within a file, as well as the
50 * current cursor position within the range. All numbers are in bytes. */
51 struct svn_diff__hunk_range {
57 struct svn_diff_hunk_t {
58 /* The patch this hunk belongs to. */
61 /* APR file handle to the patch file this hunk came from. */
64 /* Ranges used to keep track of this hunk's texts positions within
66 struct svn_diff__hunk_range diff_text_range;
67 struct svn_diff__hunk_range original_text_range;
68 struct svn_diff__hunk_range modified_text_range;
70 /* Hunk ranges as they appeared in the patch file.
71 * All numbers are lines, not bytes. */
72 svn_linenum_t original_start;
73 svn_linenum_t original_length;
74 svn_linenum_t modified_start;
75 svn_linenum_t modified_length;
77 /* Number of lines of leading and trailing hunk context. */
78 svn_linenum_t leading_context;
79 svn_linenum_t trailing_context;
83 svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk)
85 hunk->diff_text_range.current = hunk->diff_text_range.start;
89 svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk)
91 if (hunk->patch->reverse)
92 hunk->modified_text_range.current = hunk->modified_text_range.start;
94 hunk->original_text_range.current = hunk->original_text_range.start;
98 svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk)
100 if (hunk->patch->reverse)
101 hunk->original_text_range.current = hunk->original_text_range.start;
103 hunk->modified_text_range.current = hunk->modified_text_range.start;
107 svn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk)
109 return hunk->patch->reverse ? hunk->modified_start : hunk->original_start;
113 svn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk)
115 return hunk->patch->reverse ? hunk->modified_length : hunk->original_length;
119 svn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk)
121 return hunk->patch->reverse ? hunk->original_start : hunk->modified_start;
125 svn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk)
127 return hunk->patch->reverse ? hunk->original_length : hunk->modified_length;
131 svn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk)
133 return hunk->leading_context;
137 svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk)
139 return hunk->trailing_context;
142 /* Try to parse a positive number from a decimal number encoded
143 * in the string NUMBER. Return parsed number in OFFSET, and return
144 * TRUE if parsing was successful. */
146 parse_offset(svn_linenum_t *offset, const char *number)
151 err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10);
154 svn_error_clear(err);
158 *offset = (svn_linenum_t)val;
163 /* Try to parse a hunk range specification from the string RANGE.
164 * Return parsed information in *START and *LENGTH, and return TRUE
165 * if the range parsed correctly. Note: This function may modify the
166 * input value RANGE. */
168 parse_range(svn_linenum_t *start, svn_linenum_t *length, char *range)
175 comma = strstr(range, ",");
178 if (strlen(comma + 1) > 0)
180 /* Try to parse the length. */
181 if (! parse_offset(length, comma + 1))
184 /* Snip off the end of the string,
185 * so we can comfortably parse the line
186 * number the hunk starts at. */
190 /* A comma but no length? */
198 /* Try to parse the line number the hunk starts at. */
199 return parse_offset(start, range);
202 /* Try to parse a hunk header in string HEADER, putting parsed information
203 * into HUNK. Return TRUE if the header parsed correctly. ATAT is the
204 * character string used to delimit the hunk header.
205 * Do all allocations in POOL. */
207 parse_hunk_header(const char *header, svn_diff_hunk_t *hunk,
208 const char *atat, apr_pool_t *pool)
212 svn_stringbuf_t *range;
214 p = header + strlen(atat);
222 /* OK, this may be worth allocating some memory for... */
223 range = svn_stringbuf_create_ensure(31, pool);
225 while (*p && *p != ' ')
234 svn_stringbuf_appendbytes(range, start, p - start);
236 /* Try to parse the first range. */
237 if (! parse_range(&hunk->original_start, &hunk->original_length, range->data))
240 /* Clear the stringbuf so we can reuse it for the second range. */
241 svn_stringbuf_setempty(range);
246 /* OK, this may be worth copying... */
248 while (*p && *p != ' ')
256 svn_stringbuf_appendbytes(range, start, p - start);
258 /* Check for trailing @@ */
260 if (! starts_with(p, atat))
263 /* There may be stuff like C-function names after the trailing @@,
264 * but we ignore that. */
266 /* Try to parse the second range. */
267 if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data))
270 /* Hunk header is good. */
274 /* Read a line of original or modified hunk text from the specified
275 * RANGE within FILE. FILE is expected to contain unidiff text.
276 * Leading unidiff symbols ('+', '-', and ' ') are removed from the line,
277 * Any lines commencing with the VERBOTEN character are discarded.
278 * VERBOTEN should be '+' or '-', depending on which form of hunk text
281 * All other parameters are as in svn_diff_hunk_readline_original_text()
282 * and svn_diff_hunk_readline_modified_text().
285 hunk_readline_original_or_modified(apr_file_t *file,
286 struct svn_diff__hunk_range *range,
287 svn_stringbuf_t **stringbuf,
291 apr_pool_t *result_pool,
292 apr_pool_t *scratch_pool)
295 svn_boolean_t filtered;
297 svn_stringbuf_t *str;
299 if (range->current >= range->end)
301 /* We're past the range. Indicate that no bytes can be read. */
305 *stringbuf = svn_stringbuf_create_empty(result_pool);
310 SVN_ERR(svn_io_file_seek(file, APR_CUR, &pos, scratch_pool));
311 SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool));
314 max_len = range->end - range->current;
315 SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len,
316 result_pool, scratch_pool));
318 SVN_ERR(svn_io_file_seek(file, APR_CUR, &range->current, scratch_pool));
319 filtered = (str->data[0] == verboten || str->data[0] == '\\');
321 while (filtered && ! *eof);
325 /* EOF, return an empty string. */
326 *stringbuf = svn_stringbuf_create_ensure(0, result_pool);
328 else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ')
330 /* Shave off leading unidiff symbols. */
331 *stringbuf = svn_stringbuf_create(str->data + 1, result_pool);
335 /* Return the line as-is. */
336 *stringbuf = svn_stringbuf_dup(str, result_pool);
339 SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool));
345 svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk,
346 svn_stringbuf_t **stringbuf,
349 apr_pool_t *result_pool,
350 apr_pool_t *scratch_pool)
352 return svn_error_trace(
353 hunk_readline_original_or_modified(hunk->apr_file,
354 hunk->patch->reverse ?
355 &hunk->modified_text_range :
356 &hunk->original_text_range,
358 hunk->patch->reverse ? '-' : '+',
359 result_pool, scratch_pool));
363 svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk,
364 svn_stringbuf_t **stringbuf,
367 apr_pool_t *result_pool,
368 apr_pool_t *scratch_pool)
370 return svn_error_trace(
371 hunk_readline_original_or_modified(hunk->apr_file,
372 hunk->patch->reverse ?
373 &hunk->original_text_range :
374 &hunk->modified_text_range,
376 hunk->patch->reverse ? '+' : '-',
377 result_pool, scratch_pool));
381 svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk,
382 svn_stringbuf_t **stringbuf,
385 apr_pool_t *result_pool,
386 apr_pool_t *scratch_pool)
388 svn_diff_hunk_t dummy;
389 svn_stringbuf_t *line;
393 if (hunk->diff_text_range.current >= hunk->diff_text_range.end)
395 /* We're past the range. Indicate that no bytes can be read. */
399 *stringbuf = svn_stringbuf_create_empty(result_pool);
404 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, &pos, scratch_pool));
405 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET,
406 &hunk->diff_text_range.current, scratch_pool));
407 max_len = hunk->diff_text_range.end - hunk->diff_text_range.current;
408 SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len,
411 hunk->diff_text_range.current = 0;
412 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR,
413 &hunk->diff_text_range.current, scratch_pool));
414 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool));
416 if (hunk->patch->reverse)
418 if (parse_hunk_header(line->data, &dummy, "@@", scratch_pool))
420 /* Line is a hunk header, reverse it. */
421 line = svn_stringbuf_createf(result_pool,
422 "@@ -%lu,%lu +%lu,%lu @@",
423 hunk->modified_start,
424 hunk->modified_length,
425 hunk->original_start,
426 hunk->original_length);
428 else if (parse_hunk_header(line->data, &dummy, "##", scratch_pool))
430 /* Line is a hunk header, reverse it. */
431 line = svn_stringbuf_createf(result_pool,
432 "## -%lu,%lu +%lu,%lu ##",
433 hunk->modified_start,
434 hunk->modified_length,
435 hunk->original_start,
436 hunk->original_length);
440 if (line->data[0] == '+')
442 else if (line->data[0] == '-')
452 /* Parse *PROP_NAME from HEADER as the part after the INDICATOR line.
453 * Allocate *PROP_NAME in RESULT_POOL.
454 * Set *PROP_NAME to NULL if no valid property name was found. */
456 parse_prop_name(const char **prop_name, const char *header,
457 const char *indicator, apr_pool_t *result_pool)
459 SVN_ERR(svn_utf_cstring_to_utf8(prop_name,
460 header + strlen(indicator),
462 if (**prop_name == '\0')
464 else if (! svn_prop_name_is_valid(*prop_name))
466 svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool);
467 svn_stringbuf_strip_whitespace(buf);
468 *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL);
474 /* Return the next *HUNK from a PATCH in APR_FILE.
475 * If no hunk can be found, set *HUNK to NULL.
476 * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK
477 * is the first belonging to a certain property, then PROP_NAME and
478 * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be
479 * NULL. If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be
480 * treated as context lines. Allocate results in RESULT_POOL.
481 * Use SCRATCH_POOL for all other allocations. */
483 parse_next_hunk(svn_diff_hunk_t **hunk,
484 svn_boolean_t *is_property,
485 const char **prop_name,
486 svn_diff_operation_kind_t *prop_operation,
488 apr_file_t *apr_file,
489 svn_boolean_t ignore_whitespace,
490 apr_pool_t *result_pool,
491 apr_pool_t *scratch_pool)
493 static const char * const minus = "--- ";
494 static const char * const text_atat = "@@";
495 static const char * const prop_atat = "##";
496 svn_stringbuf_t *line;
497 svn_boolean_t eof, in_hunk, hunk_seen;
498 apr_off_t pos, last_line;
499 apr_off_t start, end;
500 apr_off_t original_end;
501 apr_off_t modified_end;
502 svn_linenum_t original_lines;
503 svn_linenum_t modified_lines;
504 svn_linenum_t leading_context;
505 svn_linenum_t trailing_context;
506 svn_boolean_t changed_line_seen;
513 apr_pool_t *iterpool;
515 *prop_operation = svn_diff_op_unchanged;
517 /* We only set this if we have a property hunk header. */
519 *is_property = FALSE;
521 if (apr_file_eof(apr_file) == APR_EOF)
523 /* No more hunks here. */
531 trailing_context = 0;
532 changed_line_seen = FALSE;
535 *hunk = apr_pcalloc(result_pool, sizeof(**hunk));
537 /* Get current seek position -- APR has no ftell() :( */
539 SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, scratch_pool));
541 /* Start out assuming noise. */
542 last_line_type = noise_line;
544 iterpool = svn_pool_create(scratch_pool);
548 svn_pool_clear(iterpool);
550 /* Remember the current line's offset, and read the line. */
552 SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
553 iterpool, iterpool));
555 /* Update line offset for next iteration. */
557 SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, iterpool));
559 /* Lines starting with a backslash indicate a missing EOL:
560 * "\ No newline at end of file" or "end of property". */
561 if (line->data[0] == '\\')
568 apr_off_t hunk_text_end;
570 /* Comment terminates the hunk text and says the hunk text
571 * has no trailing EOL. Snip off trailing EOL which is part
572 * of the patch file but not part of the hunk text. */
574 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool));
575 len = sizeof(eolbuf);
576 SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len,
578 if (eolbuf[0] == '\r' && eolbuf[1] == '\n')
579 hunk_text_end = last_line - 2;
580 else if (eolbuf[1] == '\n' || eolbuf[1] == '\r')
581 hunk_text_end = last_line - 1;
583 hunk_text_end = last_line;
585 if (last_line_type == original_line && original_end == 0)
586 original_end = hunk_text_end;
587 else if (last_line_type == modified_line && modified_end == 0)
588 modified_end = hunk_text_end;
589 else if (last_line_type == context_line)
591 if (original_end == 0)
592 original_end = hunk_text_end;
593 if (modified_end == 0)
594 modified_end = hunk_text_end;
597 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool));
606 static const char add = '+';
607 static const char del = '-';
611 /* We're reading the first line of the hunk, so the start
612 * of the line just read is the hunk text's byte offset. */
617 if (original_lines > 0 && modified_lines > 0 &&
619 /* Tolerate chopped leading spaces on empty lines. */
620 || (! eof && line->len == 0)
621 /* Maybe tolerate chopped leading spaces on non-empty lines. */
622 || (ignore_whitespace && c != del && c != add)))
624 /* It's a "context" line in the hunk. */
628 if (changed_line_seen)
632 last_line_type = context_line;
634 else if (original_lines > 0 && c == del)
636 /* It's a "deleted" line in the hunk. */
638 changed_line_seen = TRUE;
640 /* A hunk may have context in the middle. We only want
641 trailing lines of context. */
642 if (trailing_context > 0)
643 trailing_context = 0;
646 last_line_type = original_line;
648 else if (modified_lines > 0 && c == add)
650 /* It's an "added" line in the hunk. */
652 changed_line_seen = TRUE;
654 /* A hunk may have context in the middle. We only want
655 trailing lines of context. */
656 if (trailing_context > 0)
657 trailing_context = 0;
660 last_line_type = modified_line;
666 /* The hunk ends at EOF. */
671 /* The start of the current line marks the first byte
672 * after the hunk text. */
676 if (original_end == 0)
678 if (modified_end == 0)
680 break; /* Hunk was empty or has been read. */
685 if (starts_with(line->data, text_atat))
687 /* Looks like we have a hunk header, try to rip it apart. */
688 in_hunk = parse_hunk_header(line->data, *hunk, text_atat,
692 original_lines = (*hunk)->original_length;
693 modified_lines = (*hunk)->modified_length;
694 *is_property = FALSE;
697 else if (starts_with(line->data, prop_atat))
699 /* Looks like we have a property hunk header, try to rip it
701 in_hunk = parse_hunk_header(line->data, *hunk, prop_atat,
705 original_lines = (*hunk)->original_length;
706 modified_lines = (*hunk)->modified_length;
710 else if (starts_with(line->data, "Added: "))
712 SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ",
715 *prop_operation = svn_diff_op_added;
717 else if (starts_with(line->data, "Deleted: "))
719 SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ",
722 *prop_operation = svn_diff_op_deleted;
724 else if (starts_with(line->data, "Modified: "))
726 SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ",
729 *prop_operation = svn_diff_op_modified;
731 else if (starts_with(line->data, minus)
732 || starts_with(line->data, "diff --git "))
733 /* This could be a header of another patch. Bail out. */
737 /* Check for the line length since a file may not have a newline at the
738 * end and we depend upon the last line to be an empty one. */
739 while (! eof || line->len > 0);
740 svn_pool_destroy(iterpool);
743 /* Rewind to the start of the line just read, so subsequent calls
744 * to this function or svn_diff_parse_next_patch() don't end
745 * up skipping the line -- it may contain a patch or hunk header. */
746 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
748 if (hunk_seen && start < end)
750 (*hunk)->patch = patch;
751 (*hunk)->apr_file = apr_file;
752 (*hunk)->leading_context = leading_context;
753 (*hunk)->trailing_context = trailing_context;
754 (*hunk)->diff_text_range.start = start;
755 (*hunk)->diff_text_range.current = start;
756 (*hunk)->diff_text_range.end = end;
757 (*hunk)->original_text_range.start = start;
758 (*hunk)->original_text_range.current = start;
759 (*hunk)->original_text_range.end = original_end;
760 (*hunk)->modified_text_range.start = start;
761 (*hunk)->modified_text_range.current = start;
762 (*hunk)->modified_text_range.end = modified_end;
765 /* Something went wrong, just discard the result. */
771 /* Compare function for sorting hunks after parsing.
772 * We sort hunks by their original line offset. */
774 compare_hunks(const void *a, const void *b)
776 const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a);
777 const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b);
779 if (ha->original_start < hb->original_start)
781 if (ha->original_start > hb->original_start)
786 /* Possible states of the diff header parser. */
789 state_start, /* initial */
790 state_git_diff_seen, /* diff --git */
791 state_git_tree_seen, /* a tree operation, rather then content change */
792 state_git_minus_seen, /* --- /dev/null; or --- a/ */
793 state_git_plus_seen, /* +++ /dev/null; or +++ a/ */
794 state_move_from_seen, /* rename from foo.c */
795 state_copy_from_seen, /* copy from foo.c */
796 state_minus_seen, /* --- foo.c */
797 state_unidiff_found, /* valid start of a regular unidiff header */
798 state_git_header_found /* valid start of a --git diff header */
801 /* Data type describing a valid state transition of the parser. */
804 const char *expected_input;
805 enum parse_state required_state;
807 /* A callback called upon each parser state transition. */
808 svn_error_t *(*fn)(enum parse_state *new_state, char *input,
809 svn_patch_t *patch, apr_pool_t *result_pool,
810 apr_pool_t *scratch_pool);
813 /* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */
815 grab_filename(const char **file_name, const char *line, apr_pool_t *result_pool,
816 apr_pool_t *scratch_pool)
818 const char *utf8_path;
819 const char *canon_path;
821 /* Grab the filename and encode it in UTF-8. */
822 /* TODO: Allow specifying the patch file's encoding.
823 * For now, we assume its encoding is native. */
824 /* ### This can fail if the filename cannot be represented in the current
825 * ### locale's encoding. */
826 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path,
830 /* Canonicalize the path name. */
831 canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool);
833 *file_name = apr_pstrdup(result_pool, canon_path);
838 /* Parse the '--- ' line of a regular unidiff. */
840 diff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
841 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
843 /* If we can find a tab, it separates the filename from
844 * the rest of the line which we can discard. */
845 char *tab = strchr(line, '\t');
849 SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "),
850 result_pool, scratch_pool));
852 *new_state = state_minus_seen;
857 /* Parse the '+++ ' line of a regular unidiff. */
859 diff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
860 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
862 /* If we can find a tab, it separates the filename from
863 * the rest of the line which we can discard. */
864 char *tab = strchr(line, '\t');
868 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "),
869 result_pool, scratch_pool));
871 *new_state = state_unidiff_found;
876 /* Parse the first line of a git extended unidiff. */
878 git_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
879 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
881 const char *old_path_start;
883 const char *new_path_start;
884 const char *new_path_end;
885 char *new_path_marker;
886 const char *old_path_marker;
888 /* ### Add handling of escaped paths
889 * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html:
891 * TAB, LF, double quote and backslash characters in pathnames are
892 * represented as \t, \n, \" and \\, respectively. If there is need for
893 * such substitution then the whole pathname is put in double quotes.
896 /* Our line should look like this: 'diff --git a/path b/path'.
898 * If we find any deviations from that format, we return with state reset
901 old_path_marker = strstr(line, " a/");
903 if (! old_path_marker)
905 *new_state = state_start;
909 if (! *(old_path_marker + 3))
911 *new_state = state_start;
915 new_path_marker = strstr(old_path_marker, " b/");
917 if (! new_path_marker)
919 *new_state = state_start;
923 if (! *(new_path_marker + 3))
925 *new_state = state_start;
929 /* By now, we know that we have a line on the form '--git diff a/.+ b/.+'
930 * We only need the filenames when we have deleted or added empty
931 * files. In those cases the old_path and new_path is identical on the
932 * 'diff --git' line. For all other cases we fetch the filenames from
933 * other header lines. */
934 old_path_start = line + STRLEN_LITERAL("diff --git a/");
935 new_path_end = line + strlen(line);
936 new_path_start = old_path_start;
943 new_path_marker = strstr(new_path_start, " b/");
945 /* No new path marker, bail out. */
946 if (! new_path_marker)
949 old_path_end = new_path_marker;
950 new_path_start = new_path_marker + STRLEN_LITERAL(" b/");
952 /* No path after the marker. */
953 if (! *new_path_start)
956 len_old = old_path_end - old_path_start;
957 len_new = new_path_end - new_path_start;
959 /* Are the paths before and after the " b/" marker the same? */
960 if (len_old == len_new
961 && ! strncmp(old_path_start, new_path_start, len_old))
963 *old_path_end = '\0';
964 SVN_ERR(grab_filename(&patch->old_filename, old_path_start,
965 result_pool, scratch_pool));
967 SVN_ERR(grab_filename(&patch->new_filename, new_path_start,
968 result_pool, scratch_pool));
973 /* We assume that the path is only modified until we've found a 'tree'
975 patch->operation = svn_diff_op_modified;
977 *new_state = state_git_diff_seen;
981 /* Parse the '--- ' line of a git extended unidiff. */
983 git_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
984 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
986 /* If we can find a tab, it separates the filename from
987 * the rest of the line which we can discard. */
988 char *tab = strchr(line, '\t');
992 if (starts_with(line, "--- /dev/null"))
993 SVN_ERR(grab_filename(&patch->old_filename, "/dev/null",
994 result_pool, scratch_pool));
996 SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"),
997 result_pool, scratch_pool));
999 *new_state = state_git_minus_seen;
1000 return SVN_NO_ERROR;
1003 /* Parse the '+++ ' line of a git extended unidiff. */
1004 static svn_error_t *
1005 git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1006 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1008 /* If we can find a tab, it separates the filename from
1009 * the rest of the line which we can discard. */
1010 char *tab = strchr(line, '\t');
1014 if (starts_with(line, "+++ /dev/null"))
1015 SVN_ERR(grab_filename(&patch->new_filename, "/dev/null",
1016 result_pool, scratch_pool));
1018 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"),
1019 result_pool, scratch_pool));
1021 *new_state = state_git_header_found;
1022 return SVN_NO_ERROR;
1025 /* Parse the 'rename from ' line of a git extended unidiff. */
1026 static svn_error_t *
1027 git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1028 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1030 SVN_ERR(grab_filename(&patch->old_filename,
1031 line + STRLEN_LITERAL("rename from "),
1032 result_pool, scratch_pool));
1034 *new_state = state_move_from_seen;
1035 return SVN_NO_ERROR;
1038 /* Parse the 'rename to ' line of a git extended unidiff. */
1039 static svn_error_t *
1040 git_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1041 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1043 SVN_ERR(grab_filename(&patch->new_filename,
1044 line + STRLEN_LITERAL("rename to "),
1045 result_pool, scratch_pool));
1047 patch->operation = svn_diff_op_moved;
1049 *new_state = state_git_tree_seen;
1050 return SVN_NO_ERROR;
1053 /* Parse the 'copy from ' line of a git extended unidiff. */
1054 static svn_error_t *
1055 git_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1056 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1058 SVN_ERR(grab_filename(&patch->old_filename,
1059 line + STRLEN_LITERAL("copy from "),
1060 result_pool, scratch_pool));
1062 *new_state = state_copy_from_seen;
1063 return SVN_NO_ERROR;
1066 /* Parse the 'copy to ' line of a git extended unidiff. */
1067 static svn_error_t *
1068 git_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1069 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1071 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "),
1072 result_pool, scratch_pool));
1074 patch->operation = svn_diff_op_copied;
1076 *new_state = state_git_tree_seen;
1077 return SVN_NO_ERROR;
1080 /* Parse the 'new file ' line of a git extended unidiff. */
1081 static svn_error_t *
1082 git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1083 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1085 patch->operation = svn_diff_op_added;
1087 /* Filename already retrieved from diff --git header. */
1089 *new_state = state_git_tree_seen;
1090 return SVN_NO_ERROR;
1093 /* Parse the 'deleted file ' line of a git extended unidiff. */
1094 static svn_error_t *
1095 git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1096 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1098 patch->operation = svn_diff_op_deleted;
1100 /* Filename already retrieved from diff --git header. */
1102 *new_state = state_git_tree_seen;
1103 return SVN_NO_ERROR;
1106 /* Add a HUNK associated with the property PROP_NAME to PATCH. */
1107 static svn_error_t *
1108 add_property_hunk(svn_patch_t *patch, const char *prop_name,
1109 svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation,
1110 apr_pool_t *result_pool)
1112 svn_prop_patch_t *prop_patch;
1114 prop_patch = svn_hash_gets(patch->prop_patches, prop_name);
1118 prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t));
1119 prop_patch->name = prop_name;
1120 prop_patch->operation = operation;
1121 prop_patch->hunks = apr_array_make(result_pool, 1,
1122 sizeof(svn_diff_hunk_t *));
1124 svn_hash_sets(patch->prop_patches, prop_name, prop_patch);
1127 APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk;
1129 return SVN_NO_ERROR;
1132 struct svn_patch_file_t
1134 /* The APR file handle to the patch file. */
1135 apr_file_t *apr_file;
1137 /* The file offset at which the next patch is expected. */
1138 apr_off_t next_patch_offset;
1142 svn_diff_open_patch_file(svn_patch_file_t **patch_file,
1143 const char *local_abspath,
1144 apr_pool_t *result_pool)
1146 svn_patch_file_t *p;
1148 p = apr_palloc(result_pool, sizeof(*p));
1149 SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath,
1150 APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
1152 p->next_patch_offset = 0;
1155 return SVN_NO_ERROR;
1158 /* Parse hunks from APR_FILE and store them in PATCH->HUNKS.
1159 * Parsing stops if no valid next hunk can be found.
1160 * If IGNORE_WHITESPACE is TRUE, lines without
1161 * leading spaces will be treated as context lines.
1162 * Allocate results in RESULT_POOL.
1163 * Use SCRATCH_POOL for temporary allocations. */
1164 static svn_error_t *
1165 parse_hunks(svn_patch_t *patch, apr_file_t *apr_file,
1166 svn_boolean_t ignore_whitespace,
1167 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1169 svn_diff_hunk_t *hunk;
1170 svn_boolean_t is_property;
1171 const char *last_prop_name;
1172 const char *prop_name;
1173 svn_diff_operation_kind_t prop_operation;
1174 apr_pool_t *iterpool;
1176 last_prop_name = NULL;
1178 patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *));
1179 patch->prop_patches = apr_hash_make(result_pool);
1180 iterpool = svn_pool_create(scratch_pool);
1183 svn_pool_clear(iterpool);
1185 SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation,
1186 patch, apr_file, ignore_whitespace, result_pool,
1189 if (hunk && is_property)
1192 prop_name = last_prop_name;
1194 last_prop_name = prop_name;
1195 SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation,
1200 APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk;
1201 last_prop_name = NULL;
1206 svn_pool_destroy(iterpool);
1208 return SVN_NO_ERROR;
1211 /* State machine for the diff header parser.
1212 * Expected Input Required state Function to call */
1213 static struct transition transitions[] =
1215 {"--- ", state_start, diff_minus},
1216 {"+++ ", state_minus_seen, diff_plus},
1217 {"diff --git", state_start, git_start},
1218 {"--- a/", state_git_diff_seen, git_minus},
1219 {"--- a/", state_git_tree_seen, git_minus},
1220 {"--- /dev/null", state_git_tree_seen, git_minus},
1221 {"+++ b/", state_git_minus_seen, git_plus},
1222 {"+++ /dev/null", state_git_minus_seen, git_plus},
1223 {"rename from ", state_git_diff_seen, git_move_from},
1224 {"rename to ", state_move_from_seen, git_move_to},
1225 {"copy from ", state_git_diff_seen, git_copy_from},
1226 {"copy to ", state_copy_from_seen, git_copy_to},
1227 {"new file ", state_git_diff_seen, git_new_file},
1228 {"deleted file ", state_git_diff_seen, git_deleted_file},
1232 svn_diff_parse_next_patch(svn_patch_t **patch,
1233 svn_patch_file_t *patch_file,
1234 svn_boolean_t reverse,
1235 svn_boolean_t ignore_whitespace,
1236 apr_pool_t *result_pool,
1237 apr_pool_t *scratch_pool)
1239 apr_off_t pos, last_line;
1241 svn_boolean_t line_after_tree_header_read = FALSE;
1242 apr_pool_t *iterpool;
1243 enum parse_state state = state_start;
1245 if (apr_file_eof(patch_file->apr_file) == APR_EOF)
1247 /* No more patches here. */
1249 return SVN_NO_ERROR;
1252 *patch = apr_pcalloc(result_pool, sizeof(**patch));
1254 pos = patch_file->next_patch_offset;
1255 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool));
1257 iterpool = svn_pool_create(scratch_pool);
1260 svn_stringbuf_t *line;
1261 svn_boolean_t valid_header_line = FALSE;
1264 svn_pool_clear(iterpool);
1266 /* Remember the current line's offset, and read the line. */
1268 SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof,
1269 APR_SIZE_MAX, iterpool, iterpool));
1273 /* Update line offset for next iteration. */
1275 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, &pos,
1279 /* Run the state machine. */
1280 for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++)
1282 if (starts_with(line->data, transitions[i].expected_input)
1283 && state == transitions[i].required_state)
1285 SVN_ERR(transitions[i].fn(&state, line->data, *patch,
1286 result_pool, iterpool));
1287 valid_header_line = TRUE;
1292 if (state == state_unidiff_found || state == state_git_header_found)
1294 /* We have a valid diff header, yay! */
1297 else if (state == state_git_tree_seen && line_after_tree_header_read)
1299 /* git patches can contain an index line after the file mode line */
1300 if (!starts_with(line->data, "index "))
1302 /* We have a valid diff header for a patch with only tree changes.
1303 * Rewind to the start of the line just read, so subsequent calls
1304 * to this function don't end up skipping the line -- it may
1305 * contain a patch. */
1306 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
1311 else if (state == state_git_tree_seen)
1313 line_after_tree_header_read = TRUE;
1315 else if (! valid_header_line && state != state_start
1316 && state != state_git_diff_seen
1317 && !starts_with(line->data, "index "))
1319 /* We've encountered an invalid diff header.
1321 * Rewind to the start of the line just read - it may be a new
1322 * header that begins there. */
1323 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
1325 state = state_start;
1331 (*patch)->reverse = reverse;
1335 temp = (*patch)->old_filename;
1336 (*patch)->old_filename = (*patch)->new_filename;
1337 (*patch)->new_filename = temp;
1340 if ((*patch)->old_filename == NULL || (*patch)->new_filename == NULL)
1342 /* Something went wrong, just discard the result. */
1346 SVN_ERR(parse_hunks(*patch, patch_file->apr_file, ignore_whitespace,
1347 result_pool, iterpool));
1349 svn_pool_destroy(iterpool);
1351 patch_file->next_patch_offset = 0;
1352 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR,
1353 &patch_file->next_patch_offset, scratch_pool));
1357 /* Usually, hunks appear in the patch sorted by their original line
1358 * offset. But just in case they weren't parsed in this order for
1359 * some reason, we sort them so that our caller can assume that hunks
1360 * are sorted as if parsed from a usual patch. */
1361 qsort((*patch)->hunks->elts, (*patch)->hunks->nelts,
1362 (*patch)->hunks->elt_size, compare_hunks);
1365 return SVN_NO_ERROR;
1369 svn_diff_close_patch_file(svn_patch_file_t *patch_file,
1370 apr_pool_t *scratch_pool)
1372 return svn_error_trace(svn_io_file_close(patch_file->apr_file,