contrib/subversion/subversion/libsvn_diff/util.c

   1 /*
   2  * util.c :  routines for doing diffs
   3  *
   4  * ====================================================================
   5  *    Licensed to the Apache Software Foundation (ASF) under one
   6  *    or more contributor license agreements.  See the NOTICE file
   7  *    distributed with this work for additional information
   8  *    regarding copyright ownership.  The ASF licenses this file
   9  *    to you under the Apache License, Version 2.0 (the
  10  *    "License"); you may not use this file except in compliance
  11  *    with the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  *    Unless required by applicable law or agreed to in writing,
  16  *    software distributed under the License is distributed on an
  17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  18  *    KIND, either express or implied.  See the License for the
  19  *    specific language governing permissions and limitations
  20  *    under the License.
  21  * ====================================================================
  22  */
  23
  24
  25 #include <apr.h>
  26 #include <apr_general.h>
  27
  28 #include "svn_hash.h"
  29 #include "svn_pools.h"
  30 #include "svn_dirent_uri.h"
  31 #include "svn_props.h"
  32 #include "svn_mergeinfo.h"
  33 #include "svn_error.h"
  34 #include "svn_diff.h"
  35 #include "svn_types.h"
  36 #include "svn_ctype.h"
  37 #include "svn_sorts.h"
  38 #include "svn_utf.h"
  39 #include "svn_version.h"
  40
  41 #include "private/svn_diff_private.h"
  42 #include "diff.h"
  43
  44 #include "svn_private_config.h"
  45
  46
  47 svn_boolean_t
  48 svn_diff_contains_conflicts(svn_diff_t *diff)
  49 {
  50   while (diff != NULL)
  51     {
  52       if (diff->type == svn_diff__type_conflict)
  53         {
  54           return TRUE;
  55         }
  56
  57       diff = diff->next;
  58     }
  59
  60   return FALSE;
  61 }
  62
  63 svn_boolean_t
  64 svn_diff_contains_diffs(svn_diff_t *diff)
  65 {
  66   while (diff != NULL)
  67     {
  68       if (diff->type != svn_diff__type_common)
  69         {
  70           return TRUE;
  71         }
  72
  73       diff = diff->next;
  74     }
  75
  76   return FALSE;
  77 }
  78
  79 svn_error_t *
  80 svn_diff_output(svn_diff_t *diff,
  81                 void *output_baton,
  82                 const svn_diff_output_fns_t *vtable)
  83 {
  84   svn_error_t *(*output_fn)(void *,
  85                             apr_off_t, apr_off_t,
  86                             apr_off_t, apr_off_t,
  87                             apr_off_t, apr_off_t);
  88
  89   while (diff != NULL)
  90     {
  91       switch (diff->type)
  92         {
  93         case svn_diff__type_common:
  94           output_fn = vtable->output_common;
  95           break;
  96
  97         case svn_diff__type_diff_common:
  98           output_fn = vtable->output_diff_common;
  99           break;
 100
 101         case svn_diff__type_diff_modified:
 102           output_fn = vtable->output_diff_modified;
 103           break;
 104
 105         case svn_diff__type_diff_latest:
 106           output_fn = vtable->output_diff_latest;
 107           break;
 108
 109         case svn_diff__type_conflict:
 110           output_fn = NULL;
 111           if (vtable->output_conflict != NULL)
 112             {
 113               SVN_ERR(vtable->output_conflict(output_baton,
 114                                diff->original_start, diff->original_length,
 115                                diff->modified_start, diff->modified_length,
 116                                diff->latest_start, diff->latest_length,
 117                                diff->resolved_diff));
 118             }
 119           break;
 120
 121         default:
 122           output_fn = NULL;
 123           break;
 124         }
 125
 126       if (output_fn != NULL)
 127         {
 128           SVN_ERR(output_fn(output_baton,
 129                             diff->original_start, diff->original_length,
 130                             diff->modified_start, diff->modified_length,
 131                             diff->latest_start, diff->latest_length));
 132         }
 133
 134       diff = diff->next;
 135     }
 136
 137   return SVN_NO_ERROR;
 138 }
 139
 140
 141 void
 142 svn_diff__normalize_buffer(char **tgt,
 143                            apr_off_t *lengthp,
 144                            svn_diff__normalize_state_t *statep,
 145                            const char *buf,
 146                            const svn_diff_file_options_t *opts)
 147 {
 148   /* Variables for looping through BUF */
 149   const char *curp, *endp;
 150
 151   /* Variable to record normalizing state */
 152   svn_diff__normalize_state_t state = *statep;
 153
 154   /* Variables to track what needs copying into the target buffer */
 155   const char *start = buf;
 156   apr_size_t include_len = 0;
 157   svn_boolean_t last_skipped = FALSE; /* makes sure we set 'start' */
 158
 159   /* Variable to record the state of the target buffer */
 160   char *tgt_newend = *tgt;
 161
 162   /* If this is a noop, then just get out of here. */
 163   if (! opts->ignore_space && ! opts->ignore_eol_style)
 164     {
 165       *tgt = (char *)buf;
 166       return;
 167     }
 168
 169
 170   /* It only took me forever to get this routine right,
 171      so here my thoughts go:
 172
 173     Below, we loop through the data, doing 2 things:
 174
 175      - Normalizing
 176      - Copying other data
 177
 178      The routine tries its hardest *not* to copy data, but instead
 179      returning a pointer into already normalized existing data.
 180
 181      To this end, a block 'other data' shouldn't be copied when found,
 182      but only as soon as it can't be returned in-place.
 183
 184      On a character level, there are 3 possible operations:
 185
 186      - Skip the character (don't include in the normalized data)
 187      - Include the character (do include in the normalizad data)
 188      - Include as another character
 189        This is essentially the same as skipping the current character
 190        and inserting a given character in the output data.
 191
 192     The macros below (SKIP, INCLUDE and INCLUDE_AS) are defined to
 193     handle the character based operations.  The macros themselves
 194     collect character level data into blocks.
 195
 196     At all times designate the START, INCLUDED_LEN and CURP pointers
 197     an included and and skipped block like this:
 198
 199       [ start, start + included_len ) [ start + included_len, curp )
 200              INCLUDED                        EXCLUDED
 201
 202     When the routine flips from skipping to including, the last
 203     included block has to be flushed to the output buffer.
 204   */
 205
 206   /* Going from including to skipping; only schedules the current
 207      included section for flushing.
 208      Also, simply chop off the character if it's the first in the buffer,
 209      so we can possibly just return the remainder of the buffer */
 210 #define SKIP             \
 211   do {                   \
 212     if (start == curp)   \
 213        ++start;          \
 214     last_skipped = TRUE; \
 215   } while (0)
 216
 217 #define INCLUDE                \
 218   do {                         \
 219     if (last_skipped)          \
 220       COPY_INCLUDED_SECTION;   \
 221     ++include_len;             \
 222     last_skipped = FALSE;      \
 223   } while (0)
 224
 225 #define COPY_INCLUDED_SECTION                     \
 226   do {                                            \
 227     if (include_len > 0)                          \
 228       {                                           \
 229          memmove(tgt_newend, start, include_len); \
 230          tgt_newend += include_len;               \
 231          include_len = 0;                         \
 232       }                                           \
 233     start = curp;                                 \
 234   } while (0)
 235
 236   /* Include the current character as character X.
 237      If the current character already *is* X, add it to the
 238      currently included region, increasing chances for consecutive
 239      fully normalized blocks. */
 240 #define INCLUDE_AS(x)          \
 241   do {                         \
 242     if (*curp == (x))          \
 243       INCLUDE;                 \
 244     else                       \
 245       {                        \
 246         INSERT((x));           \
 247         SKIP;                  \
 248       }                        \
 249   } while (0)
 250
 251   /* Insert character X in the output buffer */
 252 #define INSERT(x)              \
 253   do {                         \
 254     COPY_INCLUDED_SECTION;     \
 255     *tgt_newend++ = (x);       \
 256   } while (0)
 257
 258   for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp)
 259     {
 260       switch (*curp)
 261         {
 262         case '\r':
 263           if (opts->ignore_eol_style)
 264             INCLUDE_AS('\n');
 265           else
 266             INCLUDE;
 267           state = svn_diff__normalize_state_cr;
 268           break;
 269
 270         case '\n':
 271           if (state == svn_diff__normalize_state_cr
 272               && opts->ignore_eol_style)
 273             SKIP;
 274           else
 275             INCLUDE;
 276           state = svn_diff__normalize_state_normal;
 277           break;
 278
 279         default:
 280           if (svn_ctype_isspace(*curp)
 281               && opts->ignore_space != svn_diff_file_ignore_space_none)
 282             {
 283               /* Whitespace but not '\r' or '\n' */
 284               if (state != svn_diff__normalize_state_whitespace
 285                   && opts->ignore_space
 286                      == svn_diff_file_ignore_space_change)
 287                 /*### If we can postpone insertion of the space
 288                   until the next non-whitespace character,
 289                   we have a potential of reducing the number of copies:
 290                   If this space is followed by more spaces,
 291                   this will cause a block-copy.
 292                   If the next non-space block is considered normalized
 293                   *and* preceded by a space, we can take advantage of that. */
 294                 /* Note, the above optimization applies to 90% of the source
 295                    lines in our own code, since it (generally) doesn't use
 296                    more than one space per blank section, except for the
 297                    beginning of a line. */
 298                 INCLUDE_AS(' ');
 299               else
 300                 SKIP;
 301               state = svn_diff__normalize_state_whitespace;
 302             }
 303           else
 304             {
 305               /* Non-whitespace character, or whitespace character in
 306                  svn_diff_file_ignore_space_none mode. */
 307               INCLUDE;
 308               state = svn_diff__normalize_state_normal;
 309             }
 310         }
 311     }
 312
 313   /* If we're not in whitespace, flush the last chunk of data.
 314    * Note that this will work correctly when this is the last chunk of the
 315    * file:
 316    * * If there is an eol, it will either have been output when we entered
 317    *   the state_cr, or it will be output now.
 318    * * If there is no eol and we're not in whitespace, then we just output
 319    *   everything below.
 320    * * If there's no eol and we are in whitespace, we want to ignore
 321    *   whitespace unconditionally. */
 322
 323   if (*tgt == tgt_newend)
 324     {
 325       /* we haven't copied any data in to *tgt and our chunk consists
 326          only of one block of (already normalized) data.
 327          Just return the block. */
 328       *tgt = (char *)start;
 329       *lengthp = include_len;
 330     }
 331   else
 332     {
 333       COPY_INCLUDED_SECTION;
 334       *lengthp = tgt_newend - *tgt;
 335     }
 336
 337   *statep = state;
 338
 339 #undef SKIP
 340 #undef INCLUDE
 341 #undef INCLUDE_AS
 342 #undef INSERT
 343 #undef COPY_INCLUDED_SECTION
 344 }
 345
 346 svn_error_t *
 347 svn_diff__unified_append_no_newline_msg(svn_stringbuf_t *stringbuf,
 348                                         const char *header_encoding,
 349                                         apr_pool_t *scratch_pool)
 350 {
 351   const char *out_str;
 352
 353   SVN_ERR(svn_utf_cstring_from_utf8_ex2(
 354             &out_str,
 355             APR_EOL_STR
 356             SVN_DIFF__NO_NEWLINE_AT_END_OF_FILE APR_EOL_STR,
 357             header_encoding, scratch_pool));
 358   svn_stringbuf_appendcstr(stringbuf, out_str);
 359   return SVN_NO_ERROR;
 360 }
 361
 362 svn_error_t *
 363 svn_diff__unified_write_hunk_header(svn_stream_t *output_stream,
 364                                     const char *header_encoding,
 365                                     const char *hunk_delimiter,
 366                                     apr_off_t old_start,
 367                                     apr_off_t old_length,
 368                                     apr_off_t new_start,
 369                                     apr_off_t new_length,
 370                                     const char *hunk_extra_context,
 371                                     apr_pool_t *scratch_pool)
 372 {
 373   SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 374                                       scratch_pool,
 375                                       "%s -%" APR_OFF_T_FMT,
 376                                       hunk_delimiter, old_start));
 377   /* If the hunk length is 1, suppress the number of lines in the hunk
 378    * (it is 1 implicitly) */
 379   if (old_length != 1)
 380     {
 381       SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 382                                           scratch_pool,
 383                                           ",%" APR_OFF_T_FMT, old_length));
 384     }
 385
 386   SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 387                                       scratch_pool,
 388                                       " +%" APR_OFF_T_FMT, new_start));
 389   if (new_length != 1)
 390     {
 391       SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 392                                           scratch_pool,
 393                                           ",%" APR_OFF_T_FMT, new_length));
 394     }
 395
 396   if (hunk_extra_context == NULL)
 397       hunk_extra_context = "";
 398   SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 399                                       scratch_pool,
 400                                       " %s%s%s" APR_EOL_STR,
 401                                       hunk_delimiter,
 402                                       hunk_extra_context[0] ? " " : "",
 403                                       hunk_extra_context));
 404   return SVN_NO_ERROR;
 405 }
 406
 407 svn_error_t *
 408 svn_diff__unidiff_write_header(svn_stream_t *output_stream,
 409                                const char *header_encoding,
 410                                const char *old_header,
 411                                const char *new_header,
 412                                apr_pool_t *scratch_pool)
 413 {
 414   SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 415                                       scratch_pool,
 416                                       "--- %s" APR_EOL_STR
 417                                       "+++ %s" APR_EOL_STR,
 418                                       old_header,
 419                                       new_header));
 420   return SVN_NO_ERROR;
 421 }
 422
 423 /* A helper function for display_prop_diffs.  Output the differences between
 424    the mergeinfo stored in ORIG_MERGEINFO_VAL and NEW_MERGEINFO_VAL in a
 425    human-readable form to OUTSTREAM, using ENCODING.  Use POOL for temporary
 426    allocations. */
 427 static svn_error_t *
 428 display_mergeinfo_diff(const char *old_mergeinfo_val,
 429                        const char *new_mergeinfo_val,
 430                        const char *encoding,
 431                        svn_stream_t *outstream,
 432                        apr_pool_t *pool)
 433 {
 434   apr_hash_t *old_mergeinfo_hash, *new_mergeinfo_hash, *added, *deleted;
 435   apr_pool_t *iterpool = svn_pool_create(pool);
 436   apr_hash_index_t *hi;
 437
 438   if (old_mergeinfo_val)
 439     SVN_ERR(svn_mergeinfo_parse(&old_mergeinfo_hash, old_mergeinfo_val, pool));
 440   else
 441     old_mergeinfo_hash = NULL;
 442
 443   if (new_mergeinfo_val)
 444     SVN_ERR(svn_mergeinfo_parse(&new_mergeinfo_hash, new_mergeinfo_val, pool));
 445   else
 446     new_mergeinfo_hash = NULL;
 447
 448   SVN_ERR(svn_mergeinfo_diff2(&deleted, &added, old_mergeinfo_hash,
 449                               new_mergeinfo_hash,
 450                               TRUE, pool, pool));
 451
 452   for (hi = apr_hash_first(pool, deleted);
 453        hi; hi = apr_hash_next(hi))
 454     {
 455       const char *from_path = svn__apr_hash_index_key(hi);
 456       svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi);
 457       svn_string_t *merge_revstr;
 458
 459       svn_pool_clear(iterpool);
 460       SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
 461                                       iterpool));
 462
 463       SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
 464                                           _("   Reverse-merged %s:r%s%s"),
 465                                           from_path, merge_revstr->data,
 466                                           APR_EOL_STR));
 467     }
 468
 469   for (hi = apr_hash_first(pool, added);
 470        hi; hi = apr_hash_next(hi))
 471     {
 472       const char *from_path = svn__apr_hash_index_key(hi);
 473       svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi);
 474       svn_string_t *merge_revstr;
 475
 476       svn_pool_clear(iterpool);
 477       SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
 478                                       iterpool));
 479
 480       SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
 481                                           _("   Merged %s:r%s%s"),
 482                                           from_path, merge_revstr->data,
 483                                           APR_EOL_STR));
 484     }
 485
 486   svn_pool_destroy(iterpool);
 487   return SVN_NO_ERROR;
 488 }
 489
 490 /* qsort callback handling svn_prop_t by name */
 491 static int
 492 propchange_sort(const void *k1, const void *k2)
 493 {
 494   const svn_prop_t *propchange1 = k1;
 495   const svn_prop_t *propchange2 = k2;
 496
 497   return strcmp(propchange1->name, propchange2->name);
 498 }
 499
 500 svn_error_t *
 501 svn_diff__display_prop_diffs(svn_stream_t *outstream,
 502                              const char *encoding,
 503                              const apr_array_header_t *propchanges,
 504                              apr_hash_t *original_props,
 505                              svn_boolean_t pretty_print_mergeinfo,
 506                              apr_pool_t *scratch_pool)
 507 {
 508   apr_pool_t *pool = scratch_pool;
 509   apr_pool_t *iterpool = svn_pool_create(pool);
 510   apr_array_header_t *changes = apr_array_copy(scratch_pool, propchanges);
 511   int i;
 512
 513   qsort(changes->elts, changes->nelts, changes->elt_size, propchange_sort);
 514
 515   for (i = 0; i < changes->nelts; i++)
 516     {
 517       const char *action;
 518       const svn_string_t *original_value;
 519       const svn_prop_t *propchange
 520         = &APR_ARRAY_IDX(changes, i, svn_prop_t);
 521
 522       if (original_props)
 523         original_value = svn_hash_gets(original_props, propchange->name);
 524       else
 525         original_value = NULL;
 526
 527       /* If the property doesn't exist on either side, or if it exists
 528          with the same value, skip it.  This can happen if the client is
 529          hitting an old mod_dav_svn server that doesn't understand the
 530          "send-all" REPORT style. */
 531       if ((! (original_value || propchange->value))
 532           || (original_value && propchange->value
 533               && svn_string_compare(original_value, propchange->value)))
 534         continue;
 535
 536       svn_pool_clear(iterpool);
 537
 538       if (! original_value)
 539         action = "Added";
 540       else if (! propchange->value)
 541         action = "Deleted";
 542       else
 543         action = "Modified";
 544       SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
 545                                           "%s: %s%s", action,
 546                                           propchange->name, APR_EOL_STR));
 547
 548       if (pretty_print_mergeinfo
 549           && strcmp(propchange->name, SVN_PROP_MERGEINFO) == 0)
 550         {
 551           const char *orig = original_value ? original_value->data : NULL;
 552           const char *val = propchange->value ? propchange->value->data : NULL;
 553           svn_error_t *err = display_mergeinfo_diff(orig, val, encoding,
 554                                                     outstream, iterpool);
 555
 556           /* Issue #3896: If we can't pretty-print mergeinfo differences
 557              because invalid mergeinfo is present, then don't let the diff
 558              fail, just print the diff as any other property. */
 559           if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
 560             {
 561               svn_error_clear(err);
 562             }
 563           else
 564             {
 565               SVN_ERR(err);
 566               continue;
 567             }
 568         }
 569
 570       {
 571         svn_diff_t *diff;
 572         svn_diff_file_options_t options = { 0 };
 573         const svn_string_t *orig
 574           = original_value ? original_value
 575                            : svn_string_create_empty(iterpool);
 576         const svn_string_t *val
 577           = propchange->value ? propchange->value
 578                               : svn_string_create_empty(iterpool);
 579
 580         SVN_ERR(svn_diff_mem_string_diff(&diff, orig, val, &options,
 581                                          iterpool));
 582
 583         /* UNIX patch will try to apply a diff even if the diff header
 584          * is missing. It tries to be helpful by asking the user for a
 585          * target filename when it can't determine the target filename
 586          * from the diff header. But there usually are no files which
 587          * UNIX patch could apply the property diff to, so we use "##"
 588          * instead of "@@" as the default hunk delimiter for property diffs.
 589          * We also supress the diff header. */
 590         SVN_ERR(svn_diff_mem_string_output_unified2(
 591                   outstream, diff, FALSE /* no header */, "##", NULL, NULL,
 592                   encoding, orig, val, iterpool));
 593       }
 594     }
 595   svn_pool_destroy(iterpool);
 596
 597   return SVN_NO_ERROR;
 598 }
 599
 600
 601 /* Return the library version number. */
 602 const svn_version_t *
 603 svn_diff_version(void)
 604 {
 605   SVN_VERSION_BODY;
 606 }