2 * compose_delta.c: Delta window composition.
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
27 #include <apr_general.h> /* For APR_INLINE */
29 #include "svn_delta.h"
30 #include "svn_pools.h"
33 /* Define a MIN macro if this platform doesn't already have one. */
35 #define MIN(a, b) ((a) < (b) ? (a) : (b))
39 /* ==================================================================== */
40 /* Support for efficient small-block allocation from pools. */
42 /* The following structs will be allocated and freed often: */
44 /* A node in the range index tree. */
45 typedef struct range_index_node_t range_index_node_t;
46 struct range_index_node_t
48 /* 'offset' and 'limit' define the range in the source window. */
52 /* 'target_offset' is where that range is represented in the target. */
53 apr_size_t target_offset;
55 /* 'left' and 'right' link the node into a splay tree. */
56 range_index_node_t *left, *right;
58 /* 'prev' and 'next' link it into an ordered, doubly-linked list. */
59 range_index_node_t *prev, *next;
62 /* A node in a list of ranges for source and target op copies. */
69 typedef struct range_list_node_t range_list_node_t;
70 struct range_list_node_t
72 /* Where does the range come from?
73 'offset' and 'limit' always refer to the "virtual" source data
74 for the second delta window. For a target range, the actual
75 offset to use for generating the target op is 'target_offset';
76 that field isn't used by source ranges. */
79 /* 'offset' and 'limit' define the range. */
83 /* 'target_offset' is the start of the range in the target. */
84 apr_size_t target_offset;
86 /* 'prev' and 'next' link the node into an ordered, doubly-linked list. */
87 range_list_node_t *prev, *next;
91 /* This is what will be allocated: */
92 typedef union alloc_block_t alloc_block_t;
95 range_index_node_t index_node;
96 range_list_node_t list_node;
98 /* Links free blocks into a freelist. */
99 alloc_block_t *next_free;
103 /* Allocate a block. */
104 static APR_INLINE void *
105 alloc_block(apr_pool_t *pool, alloc_block_t **free_list)
107 alloc_block_t *block;
108 if (*free_list == NULL)
109 block = apr_palloc(pool, sizeof(*block));
113 *free_list = block->next_free;
118 /* Return the block back to the free list. */
119 static APR_INLINE void
120 free_block(void *ptr, alloc_block_t **free_list)
122 /* Wrapper functions take care of type safety. */
123 alloc_block_t *const block = ptr;
124 block->next_free = *free_list;
130 /* ==================================================================== */
131 /* Mapping offsets in the target streem to txdelta ops. */
133 typedef struct offset_index_t
139 /* Create an index mapping target stream offsets to delta ops in
140 WINDOW. Allocate from POOL. */
142 static offset_index_t *
143 create_offset_index(const svn_txdelta_window_t *window, apr_pool_t *pool)
145 offset_index_t *ndx = apr_palloc(pool, sizeof(*ndx));
146 apr_size_t offset = 0;
149 ndx->length = window->num_ops;
150 ndx->offs = apr_palloc(pool, (ndx->length + 1) * sizeof(*ndx->offs));
152 for (i = 0; i < ndx->length; ++i)
154 ndx->offs[i] = offset;
155 offset += window->ops[i].length;
157 ndx->offs[ndx->length] = offset;
162 /* Find the index of the delta op thet defines that data at OFFSET in
163 NDX. HINT is an arbitrary positin within NDX and doesn't even need
164 to be valid. To effectively speed up the search, use the last result
165 as hint because most lookups come as a sequence of decreasing values
166 for OFFSET and they concentrate on the lower end of the array. */
169 search_offset_index(const offset_index_t *ndx,
173 apr_size_t lo, hi, op;
175 assert(offset < ndx->offs[ndx->length]);
180 /* If we got a valid hint, use it to reduce the range to cover.
181 Note that this will only be useful if either the hint is a
182 hit (i.e. equals the desired result) or narrows the range
183 length by a factor larger than 2. */
187 if (offset < ndx->offs[hint])
189 else if (offset < ndx->offs[hint+1])
195 /* ordinary binary search */
197 for (op = (lo + hi)/2; lo != hi; op = (lo + hi)/2)
199 if (offset < ndx->offs[op])
206 assert(ndx->offs[lo] <= offset && offset < ndx->offs[lo + 1]);
212 /* ==================================================================== */
213 /* Mapping ranges in the source stream to ranges in the composed delta. */
215 /* The range index tree. */
216 typedef struct range_index_t
218 range_index_node_t *tree;
219 alloc_block_t *free_list;
223 /* Create a range index tree. Allocate from POOL. */
224 static range_index_t *
225 create_range_index(apr_pool_t *pool)
227 range_index_t *ndx = apr_palloc(pool, sizeof(*ndx));
230 ndx->free_list = NULL;
234 /* Allocate a node for the range index tree. */
235 static range_index_node_t *
236 alloc_range_index_node(range_index_t *ndx,
239 apr_size_t target_offset)
241 range_index_node_t *const node = alloc_block(ndx->pool, &ndx->free_list);
242 node->offset = offset;
244 node->target_offset = target_offset;
245 node->left = node->right = NULL;
246 node->prev = node->next = NULL;
250 /* Free a node from the range index tree. */
252 free_range_index_node(range_index_t *ndx, range_index_node_t *node)
255 node->next->prev = node->prev;
257 node->prev->next = node->next;
258 free_block(node, &ndx->free_list);
262 /* Splay the index tree, using OFFSET as the key. */
265 splay_range_index(apr_size_t offset, range_index_t *ndx)
267 range_index_node_t *tree = ndx->tree;
268 range_index_node_t scratch_node;
269 range_index_node_t *left, *right;
274 scratch_node.left = scratch_node.right = NULL;
275 left = right = &scratch_node;
279 if (offset < tree->offset)
281 if (tree->left != NULL
282 && offset < tree->left->offset)
285 range_index_node_t *const node = tree->left;
286 tree->left = node->right;
290 if (tree->left == NULL)
293 /* Remember the right subtree */
298 else if (offset > tree->offset)
300 if (tree->right != NULL
301 && offset > tree->right->offset)
304 range_index_node_t *const node = tree->right;
305 tree->right = node->left;
309 if (tree->right == NULL)
312 /* Remember the left subtree */
321 /* Link in the left and right subtrees */
322 left->right = tree->left;
323 right->left = tree->right;
324 tree->left = scratch_node.right;
325 tree->right = scratch_node.left;
327 /* The basic top-down splay is finished, but we may still need to
328 turn the tree around. What we want is to put the node with the
329 largest offset where node->offset <= offset at the top of the
330 tree, so that we can insert the new data (or search for existing
331 ranges) to the right of the root. This makes cleaning up the
332 tree after an insert much simpler, and -- incidentally -- makes
333 the whole range index magic work. */
334 if (offset < tree->offset && tree->left != NULL)
336 if (tree->left->right == NULL)
338 /* A single right rotation is enough. */
339 range_index_node_t *const node = tree->left;
340 tree->left = node->right; /* Which is always NULL. */
346 /* Slide down to the rightmost node in the left subtree. */
347 range_index_node_t **nodep = &tree->left;
348 while ((*nodep)->right != NULL)
349 nodep = &(*nodep)->right;
351 /* Now move this node to root in one giant promotion. */
356 right->left = tree->right; /* Which is always NULL, too. */
362 /* Sanity check ... */
363 assert((offset >= tree->offset)
364 || ((tree->left == NULL)
365 && (tree->prev == NULL)));
370 /* Remove all ranges from NDX that fall into the root's range. To
371 keep the range index as small as possible, we must also remove
372 nodes that don't fall into the new range, but have become redundant
373 because the new range overlaps the beginning of the next range.
376 new-range: |-----------------|
377 range-1: |-----------------|
378 range-2: |--------------------|
380 Before new-range was inserted, range-1 and range-2 were both
381 necessary. Now the union of new-range and range-2 completely covers
382 range-1, which has become redundant now.
384 FIXME: But, of course, there's a catch. range-1 must still remain
385 in the tree if we want to optimize the number of target copy ops in
386 the case were a copy falls within range-1, but starts before
387 range-2 and ends after new-range. */
390 delete_subtree(range_index_t *ndx, range_index_node_t *node)
394 delete_subtree(ndx, node->left);
395 delete_subtree(ndx, node->right);
396 free_range_index_node(ndx, node);
401 clean_tree(range_index_t *ndx, apr_size_t limit)
403 apr_size_t top_offset = limit + 1;
404 range_index_node_t **nodep = &ndx->tree->right;
405 while (*nodep != NULL)
407 range_index_node_t *const node = *nodep;
408 apr_size_t const offset =
409 (node->right != NULL && node->right->offset < top_offset
410 ? node->right->offset
413 if (node->limit <= limit
414 || (node->offset < limit && offset < limit))
416 *nodep = node->right;
418 delete_subtree(ndx, node);
422 top_offset = node->offset;
429 /* Add a range [OFFSET, LIMIT) into NDX. If NDX already contains a
430 range that encloses [OFFSET, LIMIT), do nothing. Otherwise, remove
431 all ranges from NDX that are superseded by the new range.
432 NOTE: The range index must be splayed to OFFSET! */
435 insert_range(apr_size_t offset, apr_size_t limit, apr_size_t target_offset,
438 range_index_node_t *node = NULL;
440 if (ndx->tree == NULL)
442 node = alloc_range_index_node(ndx, offset, limit, target_offset);
447 if (offset == ndx->tree->offset
448 && limit > ndx->tree->limit)
450 ndx->tree->limit = limit;
451 ndx->tree->target_offset = target_offset;
452 clean_tree(ndx, limit);
454 else if (offset > ndx->tree->offset
455 && limit > ndx->tree->limit)
457 /* We have to make the same sort of checks as clean_tree()
458 does for superseded ranges. Have to merge these someday. */
460 const svn_boolean_t insert_range_p =
462 || ndx->tree->limit < ndx->tree->next->offset
463 || limit > ndx->tree->next->limit);
467 /* Again, we have to check if the new node and the one
468 to the left of the root override root's range. */
469 if (ndx->tree->prev && ndx->tree->prev->limit > offset)
471 /* Replace the data in the splayed node. */
472 ndx->tree->offset = offset;
473 ndx->tree->limit = limit;
474 ndx->tree->target_offset = target_offset;
478 /* Insert the range to the right of the splayed node. */
479 node = alloc_range_index_node(ndx, offset, limit,
481 if ((node->next = ndx->tree->next) != NULL)
482 node->next->prev = node;
483 ndx->tree->next = node;
484 node->prev = ndx->tree;
486 node->right = ndx->tree->right;
487 ndx->tree->right = NULL;
488 node->left = ndx->tree;
491 clean_tree(ndx, limit);
494 /* Ignore the range */;
496 else if (offset < ndx->tree->offset)
498 assert(ndx->tree->left == NULL);
500 /* Insert the range left of the splayed node */
501 node = alloc_range_index_node(ndx, offset, limit, target_offset);
502 node->left = node->prev = NULL;
503 node->right = node->next = ndx->tree;
504 ndx->tree = node->next->prev = node;
505 clean_tree(ndx, limit);
508 /* Ignore the range */;
514 /* ==================================================================== */
515 /* Juggling with lists of ranges. */
517 /* Allocate a node and add it to the range list. LIST is the head of
518 the range list, TAIL is the last node in the list. NDX holds the
519 freelist; OFFSET, LIMIT and KIND are node data. */
520 static range_list_node_t *
521 alloc_range_list(range_list_node_t **list,
522 range_list_node_t **tail,
524 enum range_kind kind,
527 apr_size_t target_offset)
529 range_list_node_t *const node = alloc_block(ndx->pool, &ndx->free_list);
531 node->offset = offset;
533 node->target_offset = target_offset;
536 node->prev = node->next = NULL;
537 *list = *tail = node;
543 (*tail)->next = node;
549 /* Free a range list. LIST is the head of the list, NDX holds the freelist. */
551 free_range_list(range_list_node_t *list, range_index_t *ndx)
555 range_list_node_t *const node = list;
557 free_block(node, &ndx->free_list);
562 /* Based on the data in NDX, build a list of ranges that cover
563 [OFFSET, LIMIT) in the "virtual" source data.
564 NOTE: The range index must be splayed to OFFSET! */
566 static range_list_node_t *
567 build_range_list(apr_size_t offset, apr_size_t limit, range_index_t *ndx)
569 range_list_node_t *range_list = NULL;
570 range_list_node_t *last_range = NULL;
571 range_index_node_t *node = ndx->tree;
573 while (offset < limit)
576 return alloc_range_list(&range_list, &last_range, ndx,
580 if (offset < node->offset)
582 if (limit <= node->offset)
583 return alloc_range_list(&range_list, &last_range, ndx,
588 alloc_range_list(&range_list, &last_range, ndx,
590 offset, node->offset, 0);
591 offset = node->offset;
596 /* TODO: (Potential optimization) Investigate if it would
597 make sense to forbid short range_from_target lengths
598 (this comment originally said "shorter than, say,
599 VD_KEY_SIZE (see vdelta.c)", but Subversion no longer
602 if (offset >= node->limit)
606 const apr_size_t target_offset =
607 offset - node->offset + node->target_offset;
609 if (limit <= node->limit)
610 return alloc_range_list(&range_list, &last_range, ndx,
612 offset, limit, target_offset);
615 alloc_range_list(&range_list, &last_range, ndx,
617 offset, node->limit, target_offset);
618 offset = node->limit;
625 /* A range's offset isn't smaller than its limit? Impossible! */
626 SVN_ERR_MALFUNCTION_NO_RETURN();
630 /* Copy the instructions from WINDOW that define the range [OFFSET,
631 LIMIT) in WINDOW's target stream to TARGET_OFFSET in the window
632 represented by BUILD_BATON. HINT is a position in the instructions
633 array that helps finding the position for OFFSET. A safe default
634 is 0. Use NDX to find the instructions in WINDOW. Allocate space
635 in BUILD_BATON from POOL. */
638 copy_source_ops(apr_size_t offset, apr_size_t limit,
639 apr_size_t target_offset,
641 svn_txdelta__ops_baton_t *build_baton,
642 const svn_txdelta_window_t *window,
643 const offset_index_t *ndx,
646 apr_size_t op_ndx = search_offset_index(ndx, offset, hint);
649 const svn_txdelta_op_t *const op = &window->ops[op_ndx];
650 const apr_size_t *const off = &ndx->offs[op_ndx];
651 apr_size_t fix_offset;
652 apr_size_t fix_limit;
657 fix_offset = (offset > off[0] ? offset - off[0] : 0);
658 fix_limit = (off[1] > limit ? off[1] - limit : 0);
660 /* It would be extremely weird if the fixed-up op had zero length. */
661 assert(fix_offset + fix_limit < op->length);
663 if (op->action_code != svn_txdelta_target)
665 /* Delta ops that don't depend on the virtual target can be
666 copied to the composite unchanged. */
667 const char *const new_data = (op->action_code == svn_txdelta_new
668 ? (window->new_data->data
669 + op->offset + fix_offset)
672 svn_txdelta__insert_op(build_baton, op->action_code,
673 op->offset + fix_offset,
674 op->length - fix_offset - fix_limit,
679 /* The source of a target copy must start before the current
680 offset in the (virtual) target stream. */
681 assert(op->offset < off[0]);
683 if (op->offset + op->length - fix_limit <= off[0])
685 /* The recursion _must_ end, otherwise the delta has
686 circular references, and that is not possible. */
687 copy_source_ops(op->offset + fix_offset,
688 op->offset + op->length - fix_limit,
691 build_baton, window, ndx, pool);
695 /* This is an overlapping target copy.
696 The idea here is to transpose the pattern, then generate
697 another overlapping copy. */
698 const apr_size_t ptn_length = off[0] - op->offset;
699 const apr_size_t ptn_overlap = fix_offset % ptn_length;
700 apr_size_t fix_off = fix_offset;
701 apr_size_t tgt_off = target_offset;
702 assert(ptn_length > ptn_overlap);
704 /* ### FIXME: ptn_overlap is unsigned, so the if() condition
705 below is always true! Either it should be '> 0', or the
706 code block should be unconditional. See also r842362. */
707 if (ptn_overlap >= 0)
709 /* Issue second subrange in the pattern. */
710 const apr_size_t length =
711 MIN(op->length - fix_off - fix_limit,
712 ptn_length - ptn_overlap);
713 copy_source_ops(op->offset + ptn_overlap,
714 op->offset + ptn_overlap + length,
717 build_baton, window, ndx, pool);
722 assert(fix_off + fix_limit <= op->length);
724 && fix_off + fix_limit < op->length)
726 /* Issue the first subrange in the pattern. */
727 const apr_size_t length =
728 MIN(op->length - fix_off - fix_limit, ptn_overlap);
729 copy_source_ops(op->offset,
733 build_baton, window, ndx, pool);
738 assert(fix_off + fix_limit <= op->length);
739 if (fix_off + fix_limit < op->length)
741 /* Now multiply the pattern */
742 svn_txdelta__insert_op(build_baton, svn_txdelta_target,
743 tgt_off - ptn_length,
744 op->length - fix_off - fix_limit,
750 /* Adjust the target offset for the next op in the list. */
751 target_offset += op->length - fix_offset - fix_limit;
757 /* ==================================================================== */
758 /* Bringing it all together. */
761 svn_txdelta_window_t *
762 svn_txdelta_compose_windows(const svn_txdelta_window_t *window_A,
763 const svn_txdelta_window_t *window_B,
766 svn_txdelta__ops_baton_t build_baton = { 0 };
767 svn_txdelta_window_t *composite;
768 apr_pool_t *subpool = svn_pool_create(pool);
769 offset_index_t *offset_index = create_offset_index(window_A, subpool);
770 range_index_t *range_index = create_range_index(subpool);
771 apr_size_t target_offset = 0;
774 /* Read the description of the delta composition algorithm in
775 notes/fs-improvements.txt before going any further.
776 You have been warned. */
777 build_baton.new_data = svn_stringbuf_create_empty(pool);
778 for (i = 0; i < window_B->num_ops; ++i)
780 const svn_txdelta_op_t *const op = &window_B->ops[i];
781 if (op->action_code != svn_txdelta_source)
783 /* Delta ops that don't depend on the source can be copied
784 to the composite unchanged. */
785 const char *const new_data =
786 (op->action_code == svn_txdelta_new
787 ? window_B->new_data->data + op->offset
789 svn_txdelta__insert_op(&build_baton, op->action_code,
790 op->offset, op->length,
795 /* NOTE: Remember that `offset' and `limit' refer to
796 positions in window_B's _source_ stream, which is the
797 same as window_A's _target_ stream! */
798 const apr_size_t offset = op->offset;
799 const apr_size_t limit = op->offset + op->length;
800 range_list_node_t *range_list, *range;
801 apr_size_t tgt_off = target_offset;
803 splay_range_index(offset, range_index);
804 range_list = build_range_list(offset, limit, range_index);
806 for (range = range_list; range; range = range->next)
808 if (range->kind == range_from_target)
809 svn_txdelta__insert_op(&build_baton, svn_txdelta_target,
810 range->target_offset,
811 range->limit - range->offset,
814 copy_source_ops(range->offset, range->limit, tgt_off, 0,
815 &build_baton, window_A, offset_index,
818 tgt_off += range->limit - range->offset;
820 assert(tgt_off == target_offset + op->length);
822 free_range_list(range_list, range_index);
823 insert_range(offset, limit, target_offset, range_index);
826 /* Remember the new offset in the would-be target stream. */
827 target_offset += op->length;
830 svn_pool_destroy(subpool);
832 composite = svn_txdelta__make_window(&build_baton, pool);
833 composite->sview_offset = window_A->sview_offset;
834 composite->sview_len = window_A->sview_len;
835 composite->tview_len = window_B->tview_len;