contrib/subversion/subversion/libsvn_diff/util.c

   1 /*
   2  * util.c :  routines for doing diffs
   3  *
   4  * ====================================================================
   5  *    Licensed to the Apache Software Foundation (ASF) under one
   6  *    or more contributor license agreements.  See the NOTICE file
   7  *    distributed with this work for additional information
   8  *    regarding copyright ownership.  The ASF licenses this file
   9  *    to you under the Apache License, Version 2.0 (the
  10  *    "License"); you may not use this file except in compliance
  11  *    with the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  *    Unless required by applicable law or agreed to in writing,
  16  *    software distributed under the License is distributed on an
  17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  18  *    KIND, either express or implied.  See the License for the
  19  *    specific language governing permissions and limitations
  20  *    under the License.
  21  * ====================================================================
  22  */
  23
  24
  25 #include <apr.h>
  26 #include <apr_general.h>
  27
  28 #include "svn_hash.h"
  29 #include "svn_pools.h"
  30 #include "svn_dirent_uri.h"
  31 #include "svn_props.h"
  32 #include "svn_mergeinfo.h"
  33 #include "svn_error.h"
  34 #include "svn_diff.h"
  35 #include "svn_types.h"
  36 #include "svn_ctype.h"
  37 #include "svn_utf.h"
  38 #include "svn_version.h"
  39
  40 #include "private/svn_diff_private.h"
  41 #include "private/svn_sorts_private.h"
  42 #include "diff.h"
  43
  44 #include "svn_private_config.h"
  45
  46
  47 svn_boolean_t
  48 svn_diff_contains_conflicts(svn_diff_t *diff)
  49 {
  50   while (diff != NULL)
  51     {
  52       if (diff->type == svn_diff__type_conflict)
  53         {
  54           return TRUE;
  55         }
  56
  57       diff = diff->next;
  58     }
  59
  60   return FALSE;
  61 }
  62
  63 svn_boolean_t
  64 svn_diff_contains_diffs(svn_diff_t *diff)
  65 {
  66   while (diff != NULL)
  67     {
  68       if (diff->type != svn_diff__type_common)
  69         {
  70           return TRUE;
  71         }
  72
  73       diff = diff->next;
  74     }
  75
  76   return FALSE;
  77 }
  78
  79 svn_error_t *
  80 svn_diff_output2(svn_diff_t *diff,
  81                  void *output_baton,
  82                  const svn_diff_output_fns_t *vtable,
  83                  svn_cancel_func_t cancel_func,
  84                  void *cancel_baton)
  85 {
  86   svn_error_t *(*output_fn)(void *,
  87                             apr_off_t, apr_off_t,
  88                             apr_off_t, apr_off_t,
  89                             apr_off_t, apr_off_t);
  90
  91   while (diff != NULL)
  92     {
  93       if (cancel_func)
  94         SVN_ERR(cancel_func(cancel_baton));
  95
  96       switch (diff->type)
  97         {
  98         case svn_diff__type_common:
  99           output_fn = vtable->output_common;
 100           break;
 101
 102         case svn_diff__type_diff_common:
 103           output_fn = vtable->output_diff_common;
 104           break;
 105
 106         case svn_diff__type_diff_modified:
 107           output_fn = vtable->output_diff_modified;
 108           break;
 109
 110         case svn_diff__type_diff_latest:
 111           output_fn = vtable->output_diff_latest;
 112           break;
 113
 114         case svn_diff__type_conflict:
 115           output_fn = NULL;
 116           if (vtable->output_conflict != NULL)
 117             {
 118               SVN_ERR(vtable->output_conflict(output_baton,
 119                                diff->original_start, diff->original_length,
 120                                diff->modified_start, diff->modified_length,
 121                                diff->latest_start, diff->latest_length,
 122                                diff->resolved_diff));
 123             }
 124           break;
 125
 126         default:
 127           output_fn = NULL;
 128           break;
 129         }
 130
 131       if (output_fn != NULL)
 132         {
 133           SVN_ERR(output_fn(output_baton,
 134                             diff->original_start, diff->original_length,
 135                             diff->modified_start, diff->modified_length,
 136                             diff->latest_start, diff->latest_length));
 137         }
 138
 139       diff = diff->next;
 140     }
 141
 142   return SVN_NO_ERROR;
 143 }
 144
 145
 146 void
 147 svn_diff__normalize_buffer(char **tgt,
 148                            apr_off_t *lengthp,
 149                            svn_diff__normalize_state_t *statep,
 150                            const char *buf,
 151                            const svn_diff_file_options_t *opts)
 152 {
 153   /* Variables for looping through BUF */
 154   const char *curp, *endp;
 155
 156   /* Variable to record normalizing state */
 157   svn_diff__normalize_state_t state = *statep;
 158
 159   /* Variables to track what needs copying into the target buffer */
 160   const char *start = buf;
 161   apr_size_t include_len = 0;
 162   svn_boolean_t last_skipped = FALSE; /* makes sure we set 'start' */
 163
 164   /* Variable to record the state of the target buffer */
 165   char *tgt_newend = *tgt;
 166
 167   /* If this is a noop, then just get out of here. */
 168   if (! opts->ignore_space && ! opts->ignore_eol_style)
 169     {
 170       *tgt = (char *)buf;
 171       return;
 172     }
 173
 174
 175   /* It only took me forever to get this routine right,
 176      so here my thoughts go:
 177
 178     Below, we loop through the data, doing 2 things:
 179
 180      - Normalizing
 181      - Copying other data
 182
 183      The routine tries its hardest *not* to copy data, but instead
 184      returning a pointer into already normalized existing data.
 185
 186      To this end, a block 'other data' shouldn't be copied when found,
 187      but only as soon as it can't be returned in-place.
 188
 189      On a character level, there are 3 possible operations:
 190
 191      - Skip the character (don't include in the normalized data)
 192      - Include the character (do include in the normalizad data)
 193      - Include as another character
 194        This is essentially the same as skipping the current character
 195        and inserting a given character in the output data.
 196
 197     The macros below (SKIP, INCLUDE and INCLUDE_AS) are defined to
 198     handle the character based operations.  The macros themselves
 199     collect character level data into blocks.
 200
 201     At all times designate the START, INCLUDED_LEN and CURP pointers
 202     an included and and skipped block like this:
 203
 204       [ start, start + included_len ) [ start + included_len, curp )
 205              INCLUDED                        EXCLUDED
 206
 207     When the routine flips from skipping to including, the last
 208     included block has to be flushed to the output buffer.
 209   */
 210
 211   /* Going from including to skipping; only schedules the current
 212      included section for flushing.
 213      Also, simply chop off the character if it's the first in the buffer,
 214      so we can possibly just return the remainder of the buffer */
 215 #define SKIP             \
 216   do {                   \
 217     if (start == curp)   \
 218        ++start;          \
 219     last_skipped = TRUE; \
 220   } while (0)
 221
 222 #define INCLUDE                \
 223   do {                         \
 224     if (last_skipped)          \
 225       COPY_INCLUDED_SECTION;   \
 226     ++include_len;             \
 227     last_skipped = FALSE;      \
 228   } while (0)
 229
 230 #define COPY_INCLUDED_SECTION                     \
 231   do {                                            \
 232     if (include_len > 0)                          \
 233       {                                           \
 234          memmove(tgt_newend, start, include_len); \
 235          tgt_newend += include_len;               \
 236          include_len = 0;                         \
 237       }                                           \
 238     start = curp;                                 \
 239   } while (0)
 240
 241   /* Include the current character as character X.
 242      If the current character already *is* X, add it to the
 243      currently included region, increasing chances for consecutive
 244      fully normalized blocks. */
 245 #define INCLUDE_AS(x)          \
 246   do {                         \
 247     if (*curp == (x))          \
 248       INCLUDE;                 \
 249     else                       \
 250       {                        \
 251         INSERT((x));           \
 252         SKIP;                  \
 253       }                        \
 254   } while (0)
 255
 256   /* Insert character X in the output buffer */
 257 #define INSERT(x)              \
 258   do {                         \
 259     COPY_INCLUDED_SECTION;     \
 260     *tgt_newend++ = (x);       \
 261   } while (0)
 262
 263   for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp)
 264     {
 265       switch (*curp)
 266         {
 267         case '\r':
 268           if (opts->ignore_eol_style)
 269             INCLUDE_AS('\n');
 270           else
 271             INCLUDE;
 272           state = svn_diff__normalize_state_cr;
 273           break;
 274
 275         case '\n':
 276           if (state == svn_diff__normalize_state_cr
 277               && opts->ignore_eol_style)
 278             SKIP;
 279           else
 280             INCLUDE;
 281           state = svn_diff__normalize_state_normal;
 282           break;
 283
 284         default:
 285           if (svn_ctype_isspace(*curp)
 286               && opts->ignore_space != svn_diff_file_ignore_space_none)
 287             {
 288               /* Whitespace but not '\r' or '\n' */
 289               if (state != svn_diff__normalize_state_whitespace
 290                   && opts->ignore_space
 291                      == svn_diff_file_ignore_space_change)
 292                 /*### If we can postpone insertion of the space
 293                   until the next non-whitespace character,
 294                   we have a potential of reducing the number of copies:
 295                   If this space is followed by more spaces,
 296                   this will cause a block-copy.
 297                   If the next non-space block is considered normalized
 298                   *and* preceded by a space, we can take advantage of that. */
 299                 /* Note, the above optimization applies to 90% of the source
 300                    lines in our own code, since it (generally) doesn't use
 301                    more than one space per blank section, except for the
 302                    beginning of a line. */
 303                 INCLUDE_AS(' ');
 304               else
 305                 SKIP;
 306               state = svn_diff__normalize_state_whitespace;
 307             }
 308           else
 309             {
 310               /* Non-whitespace character, or whitespace character in
 311                  svn_diff_file_ignore_space_none mode. */
 312               INCLUDE;
 313               state = svn_diff__normalize_state_normal;
 314             }
 315         }
 316     }
 317
 318   /* If we're not in whitespace, flush the last chunk of data.
 319    * Note that this will work correctly when this is the last chunk of the
 320    * file:
 321    * * If there is an eol, it will either have been output when we entered
 322    *   the state_cr, or it will be output now.
 323    * * If there is no eol and we're not in whitespace, then we just output
 324    *   everything below.
 325    * * If there's no eol and we are in whitespace, we want to ignore
 326    *   whitespace unconditionally. */
 327
 328   if (*tgt == tgt_newend)
 329     {
 330       /* we haven't copied any data in to *tgt and our chunk consists
 331          only of one block of (already normalized) data.
 332          Just return the block. */
 333       *tgt = (char *)start;
 334       *lengthp = include_len;
 335     }
 336   else
 337     {
 338       COPY_INCLUDED_SECTION;
 339       *lengthp = tgt_newend - *tgt;
 340     }
 341
 342   *statep = state;
 343
 344 #undef SKIP
 345 #undef INCLUDE
 346 #undef INCLUDE_AS
 347 #undef INSERT
 348 #undef COPY_INCLUDED_SECTION
 349 }
 350
 351 svn_error_t *
 352 svn_diff__unified_append_no_newline_msg(svn_stringbuf_t *stringbuf,
 353                                         const char *header_encoding,
 354                                         apr_pool_t *scratch_pool)
 355 {
 356   const char *out_str;
 357
 358   SVN_ERR(svn_utf_cstring_from_utf8_ex2(
 359             &out_str,
 360             APR_EOL_STR
 361             SVN_DIFF__NO_NEWLINE_AT_END_OF_FILE APR_EOL_STR,
 362             header_encoding, scratch_pool));
 363   svn_stringbuf_appendcstr(stringbuf, out_str);
 364   return SVN_NO_ERROR;
 365 }
 366
 367 svn_error_t *
 368 svn_diff__unified_write_hunk_header(svn_stream_t *output_stream,
 369                                     const char *header_encoding,
 370                                     const char *hunk_delimiter,
 371                                     apr_off_t old_start,
 372                                     apr_off_t old_length,
 373                                     apr_off_t new_start,
 374                                     apr_off_t new_length,
 375                                     const char *hunk_extra_context,
 376                                     apr_pool_t *scratch_pool)
 377 {
 378   SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 379                                       scratch_pool,
 380                                       "%s -%" APR_OFF_T_FMT,
 381                                       hunk_delimiter, old_start));
 382   /* If the hunk length is 1, suppress the number of lines in the hunk
 383    * (it is 1 implicitly) */
 384   if (old_length != 1)
 385     {
 386       SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 387                                           scratch_pool,
 388                                           ",%" APR_OFF_T_FMT, old_length));
 389     }
 390
 391   SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 392                                       scratch_pool,
 393                                       " +%" APR_OFF_T_FMT, new_start));
 394   if (new_length != 1)
 395     {
 396       SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 397                                           scratch_pool,
 398                                           ",%" APR_OFF_T_FMT, new_length));
 399     }
 400
 401   if (hunk_extra_context == NULL)
 402       hunk_extra_context = "";
 403   SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 404                                       scratch_pool,
 405                                       " %s%s%s" APR_EOL_STR,
 406                                       hunk_delimiter,
 407                                       hunk_extra_context[0] ? " " : "",
 408                                       hunk_extra_context));
 409   return SVN_NO_ERROR;
 410 }
 411
 412 svn_error_t *
 413 svn_diff__unidiff_write_header(svn_stream_t *output_stream,
 414                                const char *header_encoding,
 415                                const char *old_header,
 416                                const char *new_header,
 417                                apr_pool_t *scratch_pool)
 418 {
 419   SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
 420                                       scratch_pool,
 421                                       "--- %s" APR_EOL_STR
 422                                       "+++ %s" APR_EOL_STR,
 423                                       old_header,
 424                                       new_header));
 425   return SVN_NO_ERROR;
 426 }
 427
 428 /* A helper function for display_prop_diffs.  Output the differences between
 429    the mergeinfo stored in ORIG_MERGEINFO_VAL and NEW_MERGEINFO_VAL in a
 430    human-readable form to OUTSTREAM, using ENCODING.  Use POOL for temporary
 431    allocations. */
 432 static svn_error_t *
 433 display_mergeinfo_diff(const char *old_mergeinfo_val,
 434                        const char *new_mergeinfo_val,
 435                        const char *encoding,
 436                        svn_stream_t *outstream,
 437                        apr_pool_t *pool)
 438 {
 439   apr_hash_t *old_mergeinfo_hash, *new_mergeinfo_hash, *added, *deleted;
 440   apr_pool_t *iterpool = svn_pool_create(pool);
 441   apr_hash_index_t *hi;
 442
 443   if (old_mergeinfo_val)
 444     SVN_ERR(svn_mergeinfo_parse(&old_mergeinfo_hash, old_mergeinfo_val, pool));
 445   else
 446     old_mergeinfo_hash = NULL;
 447
 448   if (new_mergeinfo_val)
 449     SVN_ERR(svn_mergeinfo_parse(&new_mergeinfo_hash, new_mergeinfo_val, pool));
 450   else
 451     new_mergeinfo_hash = NULL;
 452
 453   SVN_ERR(svn_mergeinfo_diff2(&deleted, &added, old_mergeinfo_hash,
 454                               new_mergeinfo_hash,
 455                               TRUE, pool, pool));
 456
 457   /* Print a hint for 'svn patch' or smilar tools, indicating the
 458    * number of reverse-merges and forward-merges. */
 459   SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, pool,
 460                                       "## -0,%u +0,%u ##%s",
 461                                       apr_hash_count(deleted),
 462                                       apr_hash_count(added),
 463                                       APR_EOL_STR));
 464
 465   for (hi = apr_hash_first(pool, deleted);
 466        hi; hi = apr_hash_next(hi))
 467     {
 468       const char *from_path = apr_hash_this_key(hi);
 469       svn_rangelist_t *merge_revarray = apr_hash_this_val(hi);
 470       svn_string_t *merge_revstr;
 471
 472       svn_pool_clear(iterpool);
 473       SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
 474                                       iterpool));
 475
 476       SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
 477                                           _("   Reverse-merged %s:r%s%s"),
 478                                           from_path, merge_revstr->data,
 479                                           APR_EOL_STR));
 480     }
 481
 482   for (hi = apr_hash_first(pool, added);
 483        hi; hi = apr_hash_next(hi))
 484     {
 485       const char *from_path = apr_hash_this_key(hi);
 486       svn_rangelist_t *merge_revarray = apr_hash_this_val(hi);
 487       svn_string_t *merge_revstr;
 488
 489       svn_pool_clear(iterpool);
 490       SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
 491                                       iterpool));
 492
 493       SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
 494                                           _("   Merged %s:r%s%s"),
 495                                           from_path, merge_revstr->data,
 496                                           APR_EOL_STR));
 497     }
 498
 499   svn_pool_destroy(iterpool);
 500   return SVN_NO_ERROR;
 501 }
 502
 503 /* svn_sort__array callback handling svn_prop_t by name */
 504 static int
 505 propchange_sort(const void *k1, const void *k2)
 506 {
 507   const svn_prop_t *propchange1 = k1;
 508   const svn_prop_t *propchange2 = k2;
 509
 510   return strcmp(propchange1->name, propchange2->name);
 511 }
 512
 513 svn_error_t *
 514 svn_diff__display_prop_diffs(svn_stream_t *outstream,
 515                              const char *encoding,
 516                              const apr_array_header_t *propchanges,
 517                              apr_hash_t *original_props,
 518                              svn_boolean_t pretty_print_mergeinfo,
 519                              int context_size,
 520                              svn_cancel_func_t cancel_func,
 521                              void *cancel_baton,
 522                              apr_pool_t *scratch_pool)
 523 {
 524   apr_pool_t *pool = scratch_pool;
 525   apr_pool_t *iterpool = svn_pool_create(pool);
 526   apr_array_header_t *changes = apr_array_copy(scratch_pool, propchanges);
 527   int i;
 528
 529   svn_sort__array(changes, propchange_sort);
 530
 531   for (i = 0; i < changes->nelts; i++)
 532     {
 533       const char *action;
 534       const svn_string_t *original_value;
 535       const svn_prop_t *propchange
 536         = &APR_ARRAY_IDX(changes, i, svn_prop_t);
 537
 538       if (original_props)
 539         original_value = svn_hash_gets(original_props, propchange->name);
 540       else
 541         original_value = NULL;
 542
 543       /* If the property doesn't exist on either side, or if it exists
 544          with the same value, skip it.  This can happen if the client is
 545          hitting an old mod_dav_svn server that doesn't understand the
 546          "send-all" REPORT style. */
 547       if ((! (original_value || propchange->value))
 548           || (original_value && propchange->value
 549               && svn_string_compare(original_value, propchange->value)))
 550         continue;
 551
 552       svn_pool_clear(iterpool);
 553
 554       if (! original_value)
 555         action = "Added";
 556       else if (! propchange->value)
 557         action = "Deleted";
 558       else
 559         action = "Modified";
 560       SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
 561                                           "%s: %s%s", action,
 562                                           propchange->name, APR_EOL_STR));
 563
 564       if (pretty_print_mergeinfo
 565           && strcmp(propchange->name, SVN_PROP_MERGEINFO) == 0)
 566         {
 567           const char *orig = original_value ? original_value->data : NULL;
 568           const char *val = propchange->value ? propchange->value->data : NULL;
 569           svn_error_t *err = display_mergeinfo_diff(orig, val, encoding,
 570                                                     outstream, iterpool);
 571
 572           /* Issue #3896: If we can't pretty-print mergeinfo differences
 573              because invalid mergeinfo is present, then don't let the diff
 574              fail, just print the diff as any other property. */
 575           if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
 576             {
 577               svn_error_clear(err);
 578             }
 579           else
 580             {
 581               SVN_ERR(err);
 582               continue;
 583             }
 584         }
 585
 586       {
 587         svn_diff_t *diff;
 588         svn_diff_file_options_t options = { 0 };
 589         const svn_string_t *orig
 590           = original_value ? original_value
 591                            : svn_string_create_empty(iterpool);
 592         const svn_string_t *val
 593           = propchange->value ? propchange->value
 594                               : svn_string_create_empty(iterpool);
 595
 596         SVN_ERR(svn_diff_mem_string_diff(&diff, orig, val, &options,
 597                                          iterpool));
 598
 599         /* UNIX patch will try to apply a diff even if the diff header
 600          * is missing. It tries to be helpful by asking the user for a
 601          * target filename when it can't determine the target filename
 602          * from the diff header. But there usually are no files which
 603          * UNIX patch could apply the property diff to, so we use "##"
 604          * instead of "@@" as the default hunk delimiter for property diffs.
 605          * We also suppress the diff header. */
 606         SVN_ERR(svn_diff_mem_string_output_unified3(
 607                   outstream, diff, FALSE /* no header */, "##", NULL, NULL,
 608                   encoding, orig, val, context_size,
 609                   cancel_func, cancel_baton, iterpool));
 610       }
 611     }
 612   svn_pool_destroy(iterpool);
 613
 614   return SVN_NO_ERROR;
 615 }
 616
 617
 618 /* Return the library version number. */
 619 const svn_version_t *
 620 svn_diff_version(void)
 621 {
 622   SVN_VERSION_BODY;
 623 }