2 * xdelta.c: xdelta generator.
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
27 #include <apr_general.h> /* for APR_INLINE */
31 #include "svn_delta.h"
32 #include "private/svn_string_private.h"
35 /* This is pseudo-adler32. It is adler32 without the prime modulus.
36 The idea is borrowed from monotone, and is a translation of the C++
37 code. Graydon Hoare, the author of the original code, gave his
38 explicit permission to use it under these terms at 8:02pm on
39 Friday, February 11th, 2005. */
41 /* Size of the blocks we compute checksums for. This was chosen out of
42 thin air. Monotone used 64, xdelta1 used 64, rsync uses 128.
43 However, later optimizations assume it to be 256 or less.
45 #define MATCH_BLOCKSIZE 64
47 /* Size of the checksum presence FLAGS array in BLOCKS_T. With standard
48 MATCH_BLOCKSIZE and SVN_DELTA_WINDOW_SIZE, 32k entries is about 20x
49 the number of checksums that actually occur, i.e. we expect a >95%
50 probability that non-matching checksums get already detected by checking
51 against the FLAGS array.
54 #define FLAGS_COUNT (32 * 1024)
56 /* "no" / "invalid" / "unused" value for positions within the delta windows
58 #define NO_POSITION ((apr_uint32_t)-1)
60 /* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
61 * This function may (and will) only be called for characters that are
62 * MATCH_BLOCKSIZE positions apart.
64 * Please note that the lower 16 bits cannot overflow in neither direction.
65 * Therefore, we don't need to split the value into separate values for
66 * sum(char) and sum(sum(char)).
68 static APR_INLINE apr_uint32_t
69 adler32_replace(apr_uint32_t adler32, const char c_out, const char c_in)
71 adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out));
73 adler32 -= (unsigned char)c_out;
74 adler32 += (unsigned char)c_in;
76 return adler32 + adler32 * 0x10000;
79 /* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting
80 at DATA. Return the checksum value. */
82 static APR_INLINE apr_uint32_t
83 init_adler32(const char *data)
85 const unsigned char *input = (const unsigned char *)data;
86 const unsigned char *last = input + MATCH_BLOCKSIZE;
91 for (; input < last; input += 8)
93 s1 += input[0]; s2 += s1;
94 s1 += input[1]; s2 += s1;
95 s1 += input[2]; s2 += s1;
96 s1 += input[3]; s2 += s1;
97 s1 += input[4]; s2 += s1;
98 s1 += input[5]; s2 += s1;
99 s1 += input[6]; s2 += s1;
100 s1 += input[7]; s2 += s1;
103 return s2 * 0x10000 + s1;
106 /* Information for a block of the delta source. The length of the
107 block is the smaller of MATCH_BLOCKSIZE and the difference between
108 the size of the source data and the position of this block. */
111 apr_uint32_t adlersum;
113 /* Even in 64 bit systems, store only 32 bit offsets in our hash table
114 (our delta window size much much smaller then 4GB).
115 That reduces the hash table size by 50% from 32to 16KB
116 and makes it easier to fit into the CPU's L1 cache. */
117 apr_uint32_t pos; /* NO_POSITION -> block is not used */
120 /* A hash table, using open addressing, of the blocks of the source. */
123 /* The largest valid index of slots.
124 This value has an upper bound proportionate to the text delta
125 window size, so unless we dramatically increase the window size,
126 it's safe to make this a 32-bit value. In any case, it has to be
127 hte same width as the block position index, (struct
131 /* Source buffer that the positions in SLOTS refer to. */
134 /* Bit array indicating whether there may be a matching slot for a given
135 adler32 checksum. Since FLAGS has much more entries than SLOTS, this
136 will indicate most cases of non-matching checksums with a "0" bit, i.e.
137 as "known not to have a match".
138 The mapping of adler32 checksum bits is [0..2][16..27] (LSB -> MSB),
139 i.e. address the byte by the multiplicative part of adler32 and address
140 the bits in that byte by the additive part of adler32. */
141 char flags[FLAGS_COUNT / 8];
143 /* The vector of blocks. A pos value of NO_POSITION represents an unused
149 /* Return a hash value calculated from the adler32 SUM, suitable for use with
151 static apr_uint32_t hash_func(apr_uint32_t sum)
153 /* Since the adl32 checksum have a bad distribution for the 11th to 16th
154 bits when used for our small block size, we add some bits from the
155 other half of the checksum. */
156 return sum ^ (sum >> 12);
159 /* Return the offset in BLOCKS.FLAGS for the adler32 SUM. */
160 static apr_uint32_t hash_flags(apr_uint32_t sum)
162 /* The upper half of SUM has a wider value range than the lower 16 bit.
163 Also, we want to a different folding than HASH_FUNC to minimize
164 correlation between different hash levels. */
165 return (sum >> 16) & ((FLAGS_COUNT / 8) - 1);
168 /* Insert a block with the checksum ADLERSUM at position POS in the source
169 data into the table BLOCKS. Ignore true duplicates, i.e. blocks with
170 actually the same content. */
172 add_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos)
174 apr_uint32_t h = hash_func(adlersum) & blocks->max;
176 /* This will terminate, since we know that we will not fill the table. */
177 for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
178 if (blocks->slots[h].adlersum == adlersum)
179 if (memcmp(blocks->data + blocks->slots[h].pos, blocks->data + pos,
180 MATCH_BLOCKSIZE) == 0)
183 blocks->slots[h].adlersum = adlersum;
184 blocks->slots[h].pos = pos;
185 blocks->flags[hash_flags(adlersum)] |= 1 << (adlersum & 7);
188 /* Find a block in BLOCKS with the checksum ADLERSUM and matching the content
189 at DATA, returning its position in the source data. If there is no such
190 block, return NO_POSITION. */
192 find_block(const struct blocks *blocks,
193 apr_uint32_t adlersum,
196 apr_uint32_t h = hash_func(adlersum) & blocks->max;
198 for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
199 if (blocks->slots[h].adlersum == adlersum)
200 if (memcmp(blocks->data + blocks->slots[h].pos, data,
201 MATCH_BLOCKSIZE) == 0)
202 return blocks->slots[h].pos;
207 /* Initialize the matches table from DATA of size DATALEN. This goes
208 through every block of MATCH_BLOCKSIZE bytes in the source and
209 checksums it, inserting the result into the BLOCKS table. */
211 init_blocks_table(const char *data,
213 struct blocks *blocks,
217 apr_size_t wnslots = 1;
221 /* Be pessimistic about the block count. */
222 nblocks = datalen / MATCH_BLOCKSIZE + 1;
223 /* Find nearest larger power of two. */
224 while (wnslots <= nblocks)
226 /* Double the number of slots to avoid a too high load. */
228 /* Narrow the number of slots to 32 bits, which is the size of the
229 block position index in the hash table.
230 Sanity check: On 64-bit platforms, apr_size_t is likely to be
231 larger than apr_uint32_t. Make sure that the number of slots
232 actually fits into blocks->max. It's safe to use a hard assert
233 here, because the largest possible value for nslots is
234 proportional to the text delta window size and is therefore much
235 smaller than the range of an apr_uint32_t. If we ever happen to
236 increase the window size too much, this assertion will get
237 triggered by the test suite. */
238 nslots = (apr_uint32_t) wnslots;
239 SVN_ERR_ASSERT_NO_RETURN(wnslots == nslots);
240 blocks->max = nslots - 1;
242 blocks->slots = apr_palloc(pool, nslots * sizeof(*(blocks->slots)));
243 for (i = 0; i < nslots; ++i)
245 /* Avoid using an indeterminate value in the lookup. */
246 blocks->slots[i].adlersum = 0;
247 blocks->slots[i].pos = NO_POSITION;
250 /* No checksum entries in SLOTS, yet => reset all checksum flags. */
251 memset(blocks->flags, 0, sizeof(blocks->flags));
253 /* If there is an odd block at the end of the buffer, we will
254 not use that shorter block for deltification (only indirectly
255 as an extension of some previous block). */
256 for (i = 0; i + MATCH_BLOCKSIZE <= datalen; i += MATCH_BLOCKSIZE)
257 add_block(blocks, init_adler32(data + i), i);
260 /* Try to find a match for the target data B in BLOCKS, and then
261 extend the match as long as data in A and B at the match position
262 continues to match. We set the position in A we ended up in (in
263 case we extended it backwards) in APOSP and update the corresponding
264 position within B given in BPOSP. PENDING_INSERT_START sets the
265 lower limit to BPOSP.
266 Return number of matching bytes starting at ASOP. Return 0 if
267 no match has been found.
270 find_match(const struct blocks *blocks,
271 const apr_uint32_t rolling,
278 apr_size_t pending_insert_start)
280 apr_size_t apos, bpos = *bposp;
281 apr_size_t delta, max_delta;
283 apos = find_block(blocks, rolling, b + bpos);
285 /* See if we have a match. */
286 if (apos == NO_POSITION)
289 /* Extend the match forward as far as possible */
290 max_delta = asize - apos - MATCH_BLOCKSIZE < bsize - bpos - MATCH_BLOCKSIZE
291 ? asize - apos - MATCH_BLOCKSIZE
292 : bsize - bpos - MATCH_BLOCKSIZE;
293 delta = svn_cstring__match_length(a + apos + MATCH_BLOCKSIZE,
294 b + bpos + MATCH_BLOCKSIZE,
297 /* See if we can extend backwards (max MATCH_BLOCKSIZE-1 steps because A's
298 content has been sampled only every MATCH_BLOCKSIZE positions). */
299 while (apos > 0 && bpos > pending_insert_start && a[apos-1] == b[bpos-1])
309 return MATCH_BLOCKSIZE + delta;
312 /* Utility for compute_delta() that compares the range B[START,BSIZE) with
313 * the range of similar size before A[ASIZE]. Create corresponding copy and
316 * BUILD_BATON and POOL will be passed through from compute_delta().
319 store_delta_trailer(svn_txdelta__ops_baton_t *build_baton,
327 apr_size_t end_match;
328 apr_size_t max_len = asize > (bsize - start) ? bsize - start : asize;
332 end_match = svn_cstring__reverse_match_length(a + asize, b + bsize,
337 if (bsize - start > end_match)
338 svn_txdelta__insert_op(build_baton, svn_txdelta_new,
339 start, bsize - start - end_match, b + start, pool);
341 svn_txdelta__insert_op(build_baton, svn_txdelta_source,
342 asize - end_match, end_match, NULL, pool);
346 /* Compute a delta from A to B using xdelta.
348 The basic xdelta algorithm is as follows:
350 1. Go through the source data, checksumming every MATCH_BLOCKSIZE
351 block of bytes using adler32, and inserting the checksum into a
352 match table with the position of the match.
353 2. Go through the target byte by byte, seeing if that byte starts a
354 match that we have in the match table.
355 2a. If so, try to extend the match as far as possible both
356 forwards and backwards, and then insert a source copy
357 operation into the delta ops builder for the match.
358 2b. If not, insert the byte as new data using an insert delta op.
360 Our implementation doesn't immediately insert "insert" operations,
361 it waits until we have another copy, or we are done. The reasoning
364 1. Otherwise, we would just be building a ton of 1 byte insert
366 2. So that we can extend a source match backwards into a pending
367 insert operation, and possibly remove the need for the insert
368 entirely. This can happen due to stream alignment.
371 compute_delta(svn_txdelta__ops_baton_t *build_baton,
378 struct blocks blocks;
379 apr_uint32_t rolling;
380 apr_size_t lo = 0, pending_insert_start = 0, upper;
382 /* Optimization: directly compare window starts. If more than 4
383 * bytes match, we can immediately create a matching windows.
384 * Shorter sequences result in a net data increase. */
385 lo = svn_cstring__match_length(a, b, asize > bsize ? bsize : asize);
386 if ((lo > 4) || (lo == bsize))
388 svn_txdelta__insert_op(build_baton, svn_txdelta_source,
390 pending_insert_start = lo;
395 /* If the size of the target is smaller than the match blocksize, just
396 insert the entire target. */
397 if ((bsize - lo < MATCH_BLOCKSIZE) || (asize < MATCH_BLOCKSIZE))
399 store_delta_trailer(build_baton, a, asize, b, bsize, lo, pool);
403 upper = bsize - MATCH_BLOCKSIZE; /* this is now known to be >= LO */
405 /* Initialize the matches table. */
406 init_blocks_table(a, asize, &blocks, pool);
408 /* Initialize our rolling checksum. */
409 rolling = init_adler32(b + lo);
415 /* Quickly skip positions whose respective ROLLING checksums
416 definitely do not match any SLOT in BLOCKS. */
417 while (!(blocks.flags[hash_flags(rolling)] & (1 << (rolling & 7)))
420 rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]);
424 /* LO is still <= UPPER, i.e. the following lookup is legal:
425 Closely check whether we've got a match for the current location.
426 Due to the above pre-filter, chances are that we find one. */
427 matchlen = find_match(&blocks, rolling, a, asize, b, bsize,
428 &lo, &apos, pending_insert_start);
430 /* If we didn't find a real match, insert the byte at the target
431 position into the pending insert. */
434 /* move block one position forward. Short blocks at the end of
435 the buffer cannot be used as the beginning of a new match */
436 if (lo + MATCH_BLOCKSIZE < bsize)
437 rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]);
443 /* store the sequence of B that is between the matches */
444 if (lo - pending_insert_start > 0)
445 svn_txdelta__insert_op(build_baton, svn_txdelta_new,
446 0, lo - pending_insert_start,
447 b + pending_insert_start, pool);
450 /* the match borders on the previous op. Maybe, we found a
451 * match that is better than / overlapping the previous one. */
452 apr_size_t len = svn_cstring__reverse_match_length
453 (a + apos, b + lo, apos < lo ? apos : lo);
456 len = svn_txdelta__remove_copy(build_baton, len);
463 /* Reset the pending insert start to immediately after the
466 pending_insert_start = lo;
467 svn_txdelta__insert_op(build_baton, svn_txdelta_source,
468 apos, matchlen, NULL, pool);
470 /* Calculate the Adler32 sum for the first block behind the match.
471 * Ignore short buffers at the end of B.
473 if (lo + MATCH_BLOCKSIZE <= bsize)
474 rolling = init_adler32(b + lo);
478 /* If we still have an insert pending at the end, throw it in. */
479 store_delta_trailer(build_baton, a, asize, b, bsize, pending_insert_start, pool);
483 svn_txdelta__xdelta(svn_txdelta__ops_baton_t *build_baton,
485 apr_size_t source_len,
486 apr_size_t target_len,
489 /* We should never be asked to compute something when the source_len is 0;
490 we just use a single insert op there (and rely on zlib for
492 assert(source_len != 0);
493 compute_delta(build_baton, data, source_len,
494 data + source_len, target_len,