contrib/subversion/subversion/libsvn_delta/xdelta.c

   1 /*
   2  * xdelta.c:  xdelta generator.
   3  *
   4  * ====================================================================
   5  *    Licensed to the Apache Software Foundation (ASF) under one
   6  *    or more contributor license agreements.  See the NOTICE file
   7  *    distributed with this work for additional information
   8  *    regarding copyright ownership.  The ASF licenses this file
   9  *    to you under the Apache License, Version 2.0 (the
  10  *    "License"); you may not use this file except in compliance
  11  *    with the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  *    Unless required by applicable law or agreed to in writing,
  16  *    software distributed under the License is distributed on an
  17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  18  *    KIND, either express or implied.  See the License for the
  19  *    specific language governing permissions and limitations
  20  *    under the License.
  21  * ====================================================================
  22  */
  23
  24
  25 #include <assert.h>
  26
  27 #include <apr_general.h>        /* for APR_INLINE */
  28 #include <apr_hash.h>
  29
  30 #include "svn_hash.h"
  31 #include "svn_delta.h"
  32 #include "delta.h"
  33 \f
  34 /* This is pseudo-adler32. It is adler32 without the prime modulus.
  35    The idea is borrowed from monotone, and is a translation of the C++
  36    code.  Graydon Hoare, the author of the original code, gave his
  37    explicit permission to use it under these terms at 8:02pm on
  38    Friday, February 11th, 2005.  */
  39
  40 /* Size of the blocks we compute checksums for. This was chosen out of
  41    thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.
  42    However, later optimizations assume it to be 256 or less.
  43  */
  44 #define MATCH_BLOCKSIZE 64
  45
  46 /* "no" / "invalid" / "unused" value for positions within the delta windows
  47  */
  48 #define NO_POSITION ((apr_uint32_t)-1)
  49
  50 /* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
  51  * This function may (and will) only be called for characters that are
  52  * MATCH_BLOCKSIZE positions apart.
  53  *
  54  * Please note that the lower 16 bits cannot overflow in neither direction.
  55  * Therefore, we don't need to split the value into separate values for
  56  * sum(char) and sum(sum(char)).
  57  */
  58 static APR_INLINE apr_uint32_t
  59 adler32_replace(apr_uint32_t adler32, const char c_out, const char c_in)
  60 {
  61   adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out));
  62
  63   adler32 -= (unsigned char)c_out;
  64   adler32 += (unsigned char)c_in;
  65
  66   return adler32 + adler32 * 0x10000;
  67 }
  68
  69 /* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting
  70    at DATA.  Return the checksum value.  */
  71
  72 static APR_INLINE apr_uint32_t
  73 init_adler32(const char *data)
  74 {
  75   const unsigned char *input = (const unsigned char *)data;
  76   const unsigned char *last = input + MATCH_BLOCKSIZE;
  77
  78   apr_uint32_t s1 = 0;
  79   apr_uint32_t s2 = 0;
  80
  81   for (; input < last; input += 8)
  82     {
  83       s1 += input[0]; s2 += s1;
  84       s1 += input[1]; s2 += s1;
  85       s1 += input[2]; s2 += s1;
  86       s1 += input[3]; s2 += s1;
  87       s1 += input[4]; s2 += s1;
  88       s1 += input[5]; s2 += s1;
  89       s1 += input[6]; s2 += s1;
  90       s1 += input[7]; s2 += s1;
  91     }
  92
  93   return s2 * 0x10000 + s1;
  94 }
  95
  96 /* Information for a block of the delta source.  The length of the
  97    block is the smaller of MATCH_BLOCKSIZE and the difference between
  98    the size of the source data and the position of this block. */
  99 struct block
 100 {
 101   apr_uint32_t adlersum;
 102
 103 /* Even in 64 bit systems, store only 32 bit offsets in our hash table
 104    (our delta window size much much smaller then 4GB).
 105    That reduces the hash table size by 50% from 32to 16KB
 106    and makes it easier to fit into the CPU's L1 cache. */
 107   apr_uint32_t pos;                     /* NO_POSITION -> block is not used */
 108 };
 109
 110 /* A hash table, using open addressing, of the blocks of the source. */
 111 struct blocks
 112 {
 113   /* The largest valid index of slots.
 114      This value has an upper bound proportionate to the text delta
 115      window size, so unless we dramatically increase the window size,
 116      it's safe to make this a 32-bit value.  In any case, it has to be
 117      hte same width as the block position index, (struct
 118      block).pos. */
 119   apr_uint32_t max;
 120   /* Source buffer that the positions in SLOTS refer to. */
 121   const char* data;
 122   /* The vector of blocks.  A pos value of NO_POSITION represents an unused
 123      slot. */
 124   struct block *slots;
 125 };
 126
 127
 128 /* Return a hash value calculated from the adler32 SUM, suitable for use with
 129    our hash table. */
 130 static apr_uint32_t hash_func(apr_uint32_t sum)
 131 {
 132   /* Since the adl32 checksum have a bad distribution for the 11th to 16th
 133      bits when used for our small block size, we add some bits from the
 134      other half of the checksum. */
 135   return sum ^ (sum >> 12);
 136 }
 137
 138 /* Insert a block with the checksum ADLERSUM at position POS in the source
 139    data into the table BLOCKS.  Ignore true duplicates, i.e. blocks with
 140    actually the same content. */
 141 static void
 142 add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos)
 143 {
 144   apr_uint32_t h = hash_func(adlersum) & blocks->max;
 145
 146   /* This will terminate, since we know that we will not fill the table. */
 147   for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
 148     if (blocks->slots[h].adlersum == adlersum)
 149       if (memcmp(blocks->data + blocks->slots[h].pos, blocks->data + pos,
 150                  MATCH_BLOCKSIZE) == 0)
 151         return;
 152
 153   blocks->slots[h].adlersum = adlersum;
 154   blocks->slots[h].pos = pos;
 155 }
 156
 157 /* Find a block in BLOCKS with the checksum ADLERSUM and matching the content
 158    at DATA, returning its position in the source data.  If there is no such
 159    block, return NO_POSITION. */
 160 static apr_uint32_t
 161 find_block(const struct blocks *blocks,
 162            apr_uint32_t adlersum,
 163            const char* data)
 164 {
 165   apr_uint32_t h = hash_func(adlersum) & blocks->max;
 166
 167   for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
 168     if (blocks->slots[h].adlersum == adlersum)
 169       if (memcmp(blocks->data + blocks->slots[h].pos, data,
 170                  MATCH_BLOCKSIZE) == 0)
 171         return blocks->slots[h].pos;
 172
 173   return NO_POSITION;
 174 }
 175
 176 /* Initialize the matches table from DATA of size DATALEN.  This goes
 177    through every block of MATCH_BLOCKSIZE bytes in the source and
 178    checksums it, inserting the result into the BLOCKS table.  */
 179 static void
 180 init_blocks_table(const char *data,
 181                   apr_size_t datalen,
 182                   struct blocks *blocks,
 183                   apr_pool_t *pool)
 184 {
 185   apr_size_t nblocks;
 186   apr_size_t wnslots = 1;
 187   apr_uint32_t nslots;
 188   apr_uint32_t i;
 189
 190   /* Be pessimistic about the block count. */
 191   nblocks = datalen / MATCH_BLOCKSIZE + 1;
 192   /* Find nearest larger power of two. */
 193   while (wnslots <= nblocks)
 194     wnslots *= 2;
 195   /* Double the number of slots to avoid a too high load. */
 196   wnslots *= 2;
 197   /* Narrow the number of slots to 32 bits, which is the size of the
 198      block position index in the hash table.
 199      Sanity check: On 64-bit platforms, apr_size_t is likely to be
 200      larger than apr_uint32_t. Make sure that the number of slots
 201      actually fits into blocks->max.  It's safe to use a hard assert
 202      here, because the largest possible value for nslots is
 203      proportional to the text delta window size and is therefore much
 204      smaller than the range of an apr_uint32_t.  If we ever happen to
 205      increase the window size too much, this assertion will get
 206      triggered by the test suite. */
 207   nslots = (apr_uint32_t) wnslots;
 208   SVN_ERR_ASSERT_NO_RETURN(wnslots == nslots);
 209   blocks->max = nslots - 1;
 210   blocks->data = data;
 211   blocks->slots = apr_palloc(pool, nslots * sizeof(*(blocks->slots)));
 212   for (i = 0; i < nslots; ++i)
 213     {
 214       /* Avoid using an indeterminate value in the lookup. */
 215       blocks->slots[i].adlersum = 0;
 216       blocks->slots[i].pos = NO_POSITION;
 217     }
 218
 219   /* If there is an odd block at the end of the buffer, we will
 220      not use that shorter block for deltification (only indirectly
 221      as an extension of some previous block). */
 222   for (i = 0; i + MATCH_BLOCKSIZE <= datalen; i += MATCH_BLOCKSIZE)
 223     add_block(blocks, init_adler32(data + i), i);
 224 }
 225
 226 /* Return the lowest position at which A and B differ. If no difference
 227  * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
 228  */
 229 static apr_size_t
 230 match_length(const char *a, const char *b, apr_size_t max_len)
 231 {
 232   apr_size_t pos = 0;
 233
 234 #if SVN_UNALIGNED_ACCESS_IS_OK
 235
 236   /* Chunky processing is so much faster ...
 237    *
 238    * We can't make this work on architectures that require aligned access
 239    * because A and B will probably have different alignment. So, skipping
 240    * the first few chars until alignment is reached is not an option.
 241    */
 242   for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t))
 243     if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
 244       break;
 245
 246 #endif
 247
 248   for (; pos < max_len; ++pos)
 249     if (a[pos] != b[pos])
 250       break;
 251
 252   return pos;
 253 }
 254
 255 /* Return the number of bytes before A and B that don't differ.  If no
 256  * difference can be found in the first MAX_LEN characters,  MAX_LEN will
 257  * be returned.  Please note that A-MAX_LEN and B-MAX_LEN must both be
 258  * valid addresses.
 259  */
 260 static apr_size_t
 261 reverse_match_length(const char *a, const char *b, apr_size_t max_len)
 262 {
 263   apr_size_t pos = 0;
 264
 265 #if SVN_UNALIGNED_ACCESS_IS_OK
 266
 267   /* Chunky processing is so much faster ...
 268    *
 269    * We can't make this work on architectures that require aligned access
 270    * because A and B will probably have different alignment. So, skipping
 271    * the first few chars until alignment is reached is not an option.
 272    */
 273   for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t))
 274     if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos))
 275       break;
 276
 277   pos -= sizeof(apr_size_t);
 278
 279 #endif
 280
 281   /* If we find a mismatch at -pos, pos-1 characters matched.
 282    */
 283   while (++pos <= max_len)
 284     if (a[0-pos] != b[0-pos])
 285       return pos - 1;
 286
 287   /* No mismatch found -> at least MAX_LEN matching chars.
 288    */
 289   return max_len;
 290 }
 291
 292
 293 /* Try to find a match for the target data B in BLOCKS, and then
 294    extend the match as long as data in A and B at the match position
 295    continues to match.  We set the position in A we ended up in (in
 296    case we extended it backwards) in APOSP and update the corresponding
 297    position within B given in BPOSP. PENDING_INSERT_START sets the
 298    lower limit to BPOSP.
 299    Return number of matching bytes starting at ASOP.  Return 0 if
 300    no match has been found.
 301  */
 302 static apr_size_t
 303 find_match(const struct blocks *blocks,
 304            const apr_uint32_t rolling,
 305            const char *a,
 306            apr_size_t asize,
 307            const char *b,
 308            apr_size_t bsize,
 309            apr_size_t *bposp,
 310            apr_size_t *aposp,
 311            apr_size_t pending_insert_start)
 312 {
 313   apr_size_t apos, bpos = *bposp;
 314   apr_size_t delta, max_delta;
 315
 316   apos = find_block(blocks, rolling, b + bpos);
 317
 318   /* See if we have a match.  */
 319   if (apos == NO_POSITION)
 320     return 0;
 321
 322   /* Extend the match forward as far as possible */
 323   max_delta = asize - apos - MATCH_BLOCKSIZE < bsize - bpos - MATCH_BLOCKSIZE
 324             ? asize - apos - MATCH_BLOCKSIZE
 325             : bsize - bpos - MATCH_BLOCKSIZE;
 326   delta = match_length(a + apos + MATCH_BLOCKSIZE,
 327                        b + bpos + MATCH_BLOCKSIZE,
 328                        max_delta);
 329
 330   /* See if we can extend backwards (max MATCH_BLOCKSIZE-1 steps because A's
 331      content has been sampled only every MATCH_BLOCKSIZE positions).  */
 332   while (apos > 0 && bpos > pending_insert_start && a[apos-1] == b[bpos-1])
 333     {
 334       --apos;
 335       --bpos;
 336       ++delta;
 337     }
 338
 339   *aposp = apos;
 340   *bposp = bpos;
 341
 342   return MATCH_BLOCKSIZE + delta;
 343 }
 344
 345 /* Utility for compute_delta() that compares the range B[START,BSIZE) with
 346  * the range of similar size before A[ASIZE]. Create corresponding copy and
 347  * insert operations.
 348  *
 349  * BUILD_BATON and POOL will be passed through from compute_delta().
 350  */
 351 static void
 352 store_delta_trailer(svn_txdelta__ops_baton_t *build_baton,
 353                     const char *a,
 354                     apr_size_t asize,
 355                     const char *b,
 356                     apr_size_t bsize,
 357                     apr_size_t start,
 358                     apr_pool_t *pool)
 359 {
 360   apr_size_t end_match;
 361   apr_size_t max_len = asize > (bsize - start) ? bsize - start : asize;
 362   if (max_len == 0)
 363     return;
 364
 365   end_match = reverse_match_length(a + asize, b + bsize, max_len);
 366   if (end_match <= 4)
 367     end_match = 0;
 368
 369   if (bsize - start > end_match)
 370     svn_txdelta__insert_op(build_baton, svn_txdelta_new,
 371                            start, bsize - start - end_match, b + start, pool);
 372   if (end_match)
 373     svn_txdelta__insert_op(build_baton, svn_txdelta_source,
 374                            asize - end_match, end_match, NULL, pool);
 375 }
 376
 377
 378 /* Compute a delta from A to B using xdelta.
 379
 380    The basic xdelta algorithm is as follows:
 381
 382    1. Go through the source data, checksumming every MATCH_BLOCKSIZE
 383       block of bytes using adler32, and inserting the checksum into a
 384       match table with the position of the match.
 385    2. Go through the target byte by byte, seeing if that byte starts a
 386       match that we have in the match table.
 387       2a. If so, try to extend the match as far as possible both
 388           forwards and backwards, and then insert a source copy
 389           operation into the delta ops builder for the match.
 390       2b. If not, insert the byte as new data using an insert delta op.
 391
 392    Our implementation doesn't immediately insert "insert" operations,
 393    it waits until we have another copy, or we are done.  The reasoning
 394    is twofold:
 395
 396    1. Otherwise, we would just be building a ton of 1 byte insert
 397       operations
 398    2. So that we can extend a source match backwards into a pending
 399      insert operation, and possibly remove the need for the insert
 400      entirely.  This can happen due to stream alignment.
 401 */
 402 static void
 403 compute_delta(svn_txdelta__ops_baton_t *build_baton,
 404               const char *a,
 405               apr_size_t asize,
 406               const char *b,
 407               apr_size_t bsize,
 408               apr_pool_t *pool)
 409 {
 410   struct blocks blocks;
 411   apr_uint32_t rolling;
 412   apr_size_t lo = 0, pending_insert_start = 0;
 413
 414   /* Optimization: directly compare window starts. If more than 4
 415    * bytes match, we can immediately create a matching windows.
 416    * Shorter sequences result in a net data increase. */
 417   lo = match_length(a, b, asize > bsize ? bsize : asize);
 418   if ((lo > 4) || (lo == bsize))
 419     {
 420       svn_txdelta__insert_op(build_baton, svn_txdelta_source,
 421                              0, lo, NULL, pool);
 422       pending_insert_start = lo;
 423     }
 424   else
 425     lo = 0;
 426
 427   /* If the size of the target is smaller than the match blocksize, just
 428      insert the entire target.  */
 429   if ((bsize - lo < MATCH_BLOCKSIZE) || (asize < MATCH_BLOCKSIZE))
 430     {
 431       store_delta_trailer(build_baton, a, asize, b, bsize, lo, pool);
 432       return;
 433     }
 434
 435   /* Initialize the matches table.  */
 436   init_blocks_table(a, asize, &blocks, pool);
 437
 438   /* Initialize our rolling checksum.  */
 439   rolling = init_adler32(b + lo);
 440   while (lo < bsize)
 441     {
 442       apr_size_t matchlen = 0;
 443       apr_size_t apos;
 444
 445       if (lo + MATCH_BLOCKSIZE <= bsize)
 446         matchlen = find_match(&blocks, rolling, a, asize, b, bsize,
 447                               &lo, &apos, pending_insert_start);
 448
 449       /* If we didn't find a real match, insert the byte at the target
 450          position into the pending insert.  */
 451       if (matchlen == 0)
 452         {
 453           /* move block one position forward. Short blocks at the end of
 454              the buffer cannot be used as the beginning of a new match */
 455           if (lo + MATCH_BLOCKSIZE < bsize)
 456             rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]);
 457
 458           lo++;
 459         }
 460       else
 461         {
 462           /* store the sequence of B that is between the matches */
 463           if (lo - pending_insert_start > 0)
 464             svn_txdelta__insert_op(build_baton, svn_txdelta_new,
 465                                    0, lo - pending_insert_start,
 466                                    b + pending_insert_start, pool);
 467           else
 468             {
 469               /* the match borders on the previous op. Maybe, we found a
 470                * match that is better than / overlapping the previous one. */
 471               apr_size_t len = reverse_match_length(a + apos, b + lo, apos < lo ? apos : lo);
 472               if (len > 0)
 473                 {
 474                   len = svn_txdelta__remove_copy(build_baton, len);
 475                   apos -= len;
 476                   matchlen += len;
 477                   lo -= len;
 478                 }
 479             }
 480
 481           /* Reset the pending insert start to immediately after the
 482              match. */
 483           lo += matchlen;
 484           pending_insert_start = lo;
 485           svn_txdelta__insert_op(build_baton, svn_txdelta_source,
 486                                  apos, matchlen, NULL, pool);
 487
 488           /* Calculate the Adler32 sum for the first block behind the match.
 489            * Ignore short buffers at the end of B.
 490            */
 491           if (lo + MATCH_BLOCKSIZE <= bsize)
 492             rolling = init_adler32(b + lo);
 493         }
 494     }
 495
 496   /* If we still have an insert pending at the end, throw it in.  */
 497   store_delta_trailer(build_baton, a, asize, b, bsize, pending_insert_start, pool);
 498 }
 499
 500 void
 501 svn_txdelta__xdelta(svn_txdelta__ops_baton_t *build_baton,
 502                     const char *data,
 503                     apr_size_t source_len,
 504                     apr_size_t target_len,
 505                     apr_pool_t *pool)
 506 {
 507   /*  We should never be asked to compute something when the source_len is 0;
 508       we just use a single insert op there (and rely on zlib for
 509       compression). */
 510   assert(source_len != 0);
 511   compute_delta(build_baton, data, source_len,
 512                 data + source_len, target_len,
 513                 pool);
 514 }