1 /* pack.c --- FSFS shard packing functionality
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
25 #include "svn_pools.h"
26 #include "svn_dirent_uri.h"
27 #include "svn_sorts.h"
28 #include "private/svn_temp_serializer.h"
29 #include "private/svn_sorts_private.h"
30 #include "private/svn_subr_private.h"
31 #include "private/svn_string_private.h"
32 #include "private/svn_io_private.h"
39 #include "low_level.h"
41 #include "transaction.h"
43 #include "../libsvn_fs/fs-loader.h"
45 #include "svn_private_config.h"
46 #include "temp_serializer.h"
48 /* Logical addressing packing logic:
50 * We pack files on a pack file basis (e.g. 1000 revs) without changing
51 * existing pack files nor the revision files outside the range to pack.
53 * First, we will scan the revision file indexes to determine the number
54 * of items to "place" (i.e. determine their optimal position within the
55 * future pack file). For each item, we will need a constant amount of
56 * memory to track it. A MAX_MEM parameter sets a limit to the number of
57 * items we may place in one go. That means, we may not be able to add
58 * all revisions at once. Instead, we will run the placement for a subset
59 * of revisions at a time. The very unlikely worst case will simply append
60 * all revision data with just a little reshuffling inside each revision.
62 * In a second step, we read all revisions in the selected range, build
63 * the item tracking information and copy the items themselves from the
64 * revision files to temporary files. The latter serve as buckets for a
65 * very coarse bucket presort: Separate change lists, file properties,
66 * directory properties and noderevs + representations from one another.
68 * The third step will determine an optimized placement for the items in
69 * each of the 4 buckets separately. The first three will simply order
70 * their items by revision, starting with the newest once. Placing rep
71 * and noderev items is a more elaborate process documented in the code.
73 * In short, we store items in the following order:
74 * - changed paths lists
76 * - directory properties
77 * - directory representations corresponding noderevs, lexical path order
78 * with special treatment of "trunk" and "branches"
79 * - same for file representations
81 * Step 4 copies the items from the temporary buckets into the final
82 * pack file and writes the temporary index files.
84 * Finally, after the last range of revisions, create the final indexes.
87 /* Maximum amount of memory we allocate for placement information during
90 #define DEFAULT_MAX_MEM (64 * 1024 * 1024)
92 /* Data structure describing a node change at PATH, REVISION.
93 * We will sort these instances by PATH and NODE_ID such that we can combine
94 * similar nodes in the same reps container and store containers in path
97 typedef struct path_order_t
100 svn_prefix_string__t *path;
102 /* node ID for this PATH in REVISION */
103 svn_fs_fs__id_part_t node_id;
105 /* when this change happened */
106 svn_revnum_t revision;
108 /* noderev predecessor count */
109 int predecessor_count;
111 /* this is a directory node */
112 svn_boolean_t is_dir;
114 /* length of the expanded representation content */
115 apr_int64_t expanded_size;
117 /* item ID of the noderev linked to the change. May be (0, 0). */
118 svn_fs_fs__id_part_t noderev_id;
120 /* item ID of the representation containing the new data. May be (0, 0). */
121 svn_fs_fs__id_part_t rep_id;
124 /* Represents a reference from item FROM to item TO. FROM may be a noderev
125 * or rep_id while TO is (currently) always a representation. We will sort
126 * them by TO which allows us to collect all dependent items.
128 typedef struct reference_t
130 svn_fs_fs__id_part_t to;
131 svn_fs_fs__id_part_t from;
134 /* This structure keeps track of all the temporary data and status that
135 * needs to be kept around during the creation of one pack file. After
136 * each revision range (in case we can't process all revs at once due to
137 * memory restrictions), parts of the data will get re-initialized.
139 typedef struct pack_context_t
141 /* file system that we operate on */
144 /* cancel function to invoke at regular intervals. May be NULL */
145 svn_cancel_func_t cancel_func;
147 /* baton to pass to CANCEL_FUNC */
150 /* first revision in the shard (and future pack file) */
151 svn_revnum_t shard_rev;
153 /* first revision in the range to process (>= SHARD_REV) */
154 svn_revnum_t start_rev;
156 /* first revision after the range to process (<= SHARD_END_REV) */
157 svn_revnum_t end_rev;
159 /* first revision after the current shard */
160 svn_revnum_t shard_end_rev;
162 /* log-to-phys proto index for the whole pack file */
163 apr_file_t *proto_l2p_index;
165 /* phys-to-log proto index for the whole pack file */
166 apr_file_t *proto_p2l_index;
168 /* full shard directory path (containing the unpacked revisions) */
169 const char *shard_dir;
171 /* full packed shard directory path (containing the pack file + indexes) */
172 const char *pack_file_dir;
174 /* full pack file path (including PACK_FILE_DIR) */
175 const char *pack_file_path;
177 /* current write position (i.e. file length) in the pack file */
178 apr_off_t pack_offset;
180 /* the pack file to ultimately write all data to */
181 apr_file_t *pack_file;
183 /* array of svn_fs_fs__p2l_entry_t *, all referring to change lists.
184 * Will be filled in phase 2 and be cleared after each revision range. */
185 apr_array_header_t *changes;
187 /* temp file receiving all change list items (referenced by CHANGES).
188 * Will be filled in phase 2 and be cleared after each revision range. */
189 apr_file_t *changes_file;
191 /* array of svn_fs_fs__p2l_entry_t *, all referring to file properties.
192 * Will be filled in phase 2 and be cleared after each revision range. */
193 apr_array_header_t *file_props;
195 /* temp file receiving all file prop items (referenced by FILE_PROPS).
196 * Will be filled in phase 2 and be cleared after each revision range.*/
197 apr_file_t *file_props_file;
199 /* array of svn_fs_fs__p2l_entry_t *, all referring to directory properties.
200 * Will be filled in phase 2 and be cleared after each revision range. */
201 apr_array_header_t *dir_props;
203 /* temp file receiving all directory prop items (referenced by DIR_PROPS).
204 * Will be filled in phase 2 and be cleared after each revision range.*/
205 apr_file_t *dir_props_file;
207 /* container for all PATH members in PATH_ORDER. */
208 svn_prefix_tree__t *paths;
210 /* array of path_order_t *. Will be filled in phase 2 and be cleared
211 * after each revision range. Sorted by PATH, NODE_ID. */
212 apr_array_header_t *path_order;
214 /* array of reference_t* linking representations to their delta bases.
215 * Will be filled in phase 2 and be cleared after each revision range.
216 * It will be sorted by the FROM members (for rep->base rep lookup). */
217 apr_array_header_t *references;
219 /* array of svn_fs_fs__p2l_entry_t*. Will be filled in phase 2 and be
220 * cleared after each revision range. During phase 3, we will set items
221 * to NULL that we already processed. */
222 apr_array_header_t *reps;
224 /* array of int, marking for each revision, the which offset their items
225 * begin in REPS. Will be filled in phase 2 and be cleared after
226 * each revision range. */
227 apr_array_header_t *rev_offsets;
229 /* temp file receiving all items referenced by REPS.
230 * Will be filled in phase 2 and be cleared after each revision range.*/
231 apr_file_t *reps_file;
233 /* pool used for temporary data structures that will be cleaned up when
234 * the next range of revisions is being processed */
235 apr_pool_t *info_pool;
238 /* Create and initialize a new pack context for packing shard SHARD_REV in
239 * SHARD_DIR into PACK_FILE_DIR within filesystem FS. Allocate it in POOL
240 * and return the structure in *CONTEXT.
242 * Limit the number of items being copied per iteration to MAX_ITEMS.
243 * Set CANCEL_FUNC and CANCEL_BATON as well.
246 initialize_pack_context(pack_context_t *context,
248 const char *pack_file_dir,
249 const char *shard_dir,
250 svn_revnum_t shard_rev,
252 svn_cancel_func_t cancel_func,
256 fs_fs_data_t *ffd = fs->fsap_data;
257 const char *temp_dir;
258 int max_revs = MIN(ffd->max_files_per_dir, max_items);
260 SVN_ERR_ASSERT(ffd->format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT);
261 SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0);
263 /* where we will place our various temp files */
264 SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
266 /* store parameters */
268 context->cancel_func = cancel_func;
269 context->cancel_baton = cancel_baton;
271 context->shard_rev = shard_rev;
272 context->start_rev = shard_rev;
273 context->end_rev = shard_rev;
274 context->shard_end_rev = shard_rev + ffd->max_files_per_dir;
276 /* Create the new directory and pack file. */
277 context->shard_dir = shard_dir;
278 context->pack_file_dir = pack_file_dir;
279 context->pack_file_path
280 = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
281 SVN_ERR(svn_io_file_open(&context->pack_file, context->pack_file_path,
282 APR_WRITE | APR_BUFFERED | APR_BINARY | APR_EXCL
283 | APR_CREATE, APR_OS_DEFAULT, pool));
285 /* Proto index files */
286 SVN_ERR(svn_fs_fs__l2p_proto_index_open(
287 &context->proto_l2p_index,
288 svn_dirent_join(pack_file_dir,
289 PATH_INDEX PATH_EXT_L2P_INDEX,
292 SVN_ERR(svn_fs_fs__p2l_proto_index_open(
293 &context->proto_p2l_index,
294 svn_dirent_join(pack_file_dir,
295 PATH_INDEX PATH_EXT_P2L_INDEX,
299 /* item buckets: one item info array and one temp file per bucket */
300 context->changes = apr_array_make(pool, max_items,
301 sizeof(svn_fs_fs__p2l_entry_t *));
302 SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
303 svn_io_file_del_on_close, pool, pool));
304 context->file_props = apr_array_make(pool, max_items,
305 sizeof(svn_fs_fs__p2l_entry_t *));
306 SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
307 svn_io_file_del_on_close, pool, pool));
308 context->dir_props = apr_array_make(pool, max_items,
309 sizeof(svn_fs_fs__p2l_entry_t *));
310 SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
311 svn_io_file_del_on_close, pool, pool));
313 /* noderev and representation item bucket */
314 context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int));
315 context->path_order = apr_array_make(pool, max_items,
316 sizeof(path_order_t *));
317 context->references = apr_array_make(pool, max_items,
318 sizeof(reference_t *));
319 context->reps = apr_array_make(pool, max_items,
320 sizeof(svn_fs_fs__p2l_entry_t *));
321 SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
322 svn_io_file_del_on_close, pool, pool));
324 /* the pool used for temp structures */
325 context->info_pool = svn_pool_create(pool);
326 context->paths = svn_prefix_tree__create(context->info_pool);
331 /* Clean up / free all revision range specific data and files in CONTEXT.
332 * Use POOL for temporary allocations.
335 reset_pack_context(pack_context_t *context,
338 const char *temp_dir;
340 apr_array_clear(context->changes);
341 SVN_ERR(svn_io_file_close(context->changes_file, pool));
342 apr_array_clear(context->file_props);
343 SVN_ERR(svn_io_file_close(context->file_props_file, pool));
344 apr_array_clear(context->dir_props);
345 SVN_ERR(svn_io_file_close(context->dir_props_file, pool));
347 apr_array_clear(context->rev_offsets);
348 apr_array_clear(context->path_order);
349 apr_array_clear(context->references);
350 apr_array_clear(context->reps);
351 SVN_ERR(svn_io_file_close(context->reps_file, pool));
353 svn_pool_clear(context->info_pool);
355 /* The new temporary files must live at least as long as any other info
356 * object in CONTEXT. */
357 SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
358 SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
359 svn_io_file_del_on_close,
360 context->info_pool, pool));
361 SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
362 svn_io_file_del_on_close,
363 context->info_pool, pool));
364 SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
365 svn_io_file_del_on_close,
366 context->info_pool, pool));
367 SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
368 svn_io_file_del_on_close,
369 context->info_pool, pool));
370 context->paths = svn_prefix_tree__create(context->info_pool);
375 /* Call this after the last revision range. It will finalize all index files
376 * for CONTEXT and close any open files. Use POOL for temporary allocations.
379 close_pack_context(pack_context_t *context,
382 const char *proto_l2p_index_path;
383 const char *proto_p2l_index_path;
385 /* need the file names for the actual index creation call further down */
386 SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path,
387 context->proto_l2p_index, pool));
388 SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path,
389 context->proto_p2l_index, pool));
391 /* finalize proto index files */
392 SVN_ERR(svn_io_file_close(context->proto_l2p_index, pool));
393 SVN_ERR(svn_io_file_close(context->proto_p2l_index, pool));
395 /* Append the actual index data to the pack file. */
396 SVN_ERR(svn_fs_fs__add_index_data(context->fs, context->pack_file,
397 proto_l2p_index_path,
398 proto_p2l_index_path,
402 /* remove proto index files */
403 SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, pool));
404 SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, pool));
406 /* Ensure that packed file is written to disk.*/
407 SVN_ERR(svn_io_file_flush_to_disk(context->pack_file, pool));
408 SVN_ERR(svn_io_file_close(context->pack_file, pool));
413 /* Efficiently copy SIZE bytes from SOURCE to DEST. Invoke the CANCEL_FUNC
414 * from CONTEXT at regular intervals. Use POOL for allocations.
417 copy_file_data(pack_context_t *context,
423 /* most non-representation items will be small. Minimize the buffer
424 * and infrastructure overhead in that case. */
425 enum { STACK_BUFFER_SIZE = 1024 };
427 if (size < STACK_BUFFER_SIZE)
429 /* copy small data using a fixed-size buffer on stack */
430 char buffer[STACK_BUFFER_SIZE];
431 SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size,
433 SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size,
438 /* use streaming copies for larger data blocks. That may require
439 * the allocation of larger buffers and we should make sure that
440 * this extra memory is released asap. */
441 fs_fs_data_t *ffd = context->fs->fsap_data;
442 apr_pool_t *copypool = svn_pool_create(pool);
443 char *buffer = apr_palloc(copypool, ffd->block_size);
447 apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size));
448 if (context->cancel_func)
449 SVN_ERR(context->cancel_func(context->cancel_baton));
451 SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy,
453 SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy,
459 svn_pool_destroy(copypool);
465 /* Writes SIZE bytes, all 0, to DEST. Uses POOL for allocations.
468 write_null_bytes(apr_file_t *dest,
472 /* Have a collection of high-quality, easy to access NUL bytes handy. */
473 enum { BUFFER_SIZE = 1024 };
474 static const char buffer[BUFFER_SIZE] = { 0 };
476 /* copy SIZE of them into the file's buffer */
479 apr_size_t to_write = MIN(size, BUFFER_SIZE);
480 SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL, pool));
487 /* Copy the "simple" item (changed paths list or property representation)
488 * from the current position in REV_FILE to TEMP_FILE using CONTEXT. Add
489 * a copy of ENTRY to ENTRIES but with an updated offset value that points
490 * to the copy destination in TEMP_FILE. Use POOL for allocations.
493 copy_item_to_temp(pack_context_t *context,
494 apr_array_header_t *entries,
495 apr_file_t *temp_file,
496 apr_file_t *rev_file,
497 svn_fs_fs__p2l_entry_t *entry,
500 svn_fs_fs__p2l_entry_t *new_entry
501 = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
503 SVN_ERR(svn_fs_fs__get_file_offset(&new_entry->offset, temp_file, pool));
504 APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t *) = new_entry;
506 SVN_ERR(copy_file_data(context, temp_file, rev_file, entry->size, pool));
511 /* Return the offset within CONTEXT->REPS that corresponds to item
512 * ITEM_INDEX in REVISION.
515 get_item_array_index(pack_context_t *context,
516 svn_revnum_t revision,
517 apr_int64_t item_index)
519 assert(revision >= context->start_rev);
520 return (int)item_index + APR_ARRAY_IDX(context->rev_offsets,
521 revision - context->start_rev,
525 /* Write INFO to the correct position in CONTEXT->REP_INFOS. The latter
526 * may need auto-expanding. Overwriting an array element is not allowed.
529 add_item_rep_mapping(pack_context_t *context,
530 svn_fs_fs__p2l_entry_t *entry)
535 idx = get_item_array_index(context,
536 entry->item.revision,
539 /* make sure the index exists in the array */
540 while (context->reps->nelts <= idx)
541 APR_ARRAY_PUSH(context->reps, void *) = NULL;
543 /* set the element. If there is already an entry, there are probably
544 * two items claiming to be the same -> bail out */
545 assert(!APR_ARRAY_IDX(context->reps, idx, void *));
546 APR_ARRAY_IDX(context->reps, idx, void *) = entry;
549 /* Return the P2L entry from CONTEXT->REPS for the given ID. If there is
550 * none (or not anymore), return NULL. If RESET has been specified, set
551 * the array entry to NULL after returning the entry.
553 static svn_fs_fs__p2l_entry_t *
554 get_item(pack_context_t *context,
555 const svn_fs_fs__id_part_t *id,
558 svn_fs_fs__p2l_entry_t *result = NULL;
559 if (id->number && id->revision >= context->start_rev)
561 int idx = get_item_array_index(context, id->revision, id->number);
562 if (context->reps->nelts > idx)
564 result = APR_ARRAY_IDX(context->reps, idx, void *);
566 APR_ARRAY_IDX(context->reps, idx, void *) = NULL;
573 /* Copy representation item identified by ENTRY from the current position
574 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by
575 * our placement algorithm to CONTEXT. Use POOL for temporary allocations.
578 copy_rep_to_temp(pack_context_t *context,
579 apr_file_t *rev_file,
580 svn_fs_fs__p2l_entry_t *entry,
583 svn_fs_fs__rep_header_t *rep_header;
584 svn_stream_t *stream;
585 apr_off_t source_offset = entry->offset;
587 /* create a copy of ENTRY, make it point to the copy destination and
588 * store it in CONTEXT */
589 entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
590 SVN_ERR(svn_fs_fs__get_file_offset(&entry->offset, context->reps_file, pool));
591 add_item_rep_mapping(context, entry);
593 /* read & parse the representation header */
594 stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
595 SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, stream, pool, pool));
596 svn_stream_close(stream);
598 /* if the representation is a delta against some other rep, link the two */
599 if ( rep_header->type == svn_fs_fs__rep_delta
600 && rep_header->base_revision >= context->start_rev)
602 reference_t *reference = apr_pcalloc(context->info_pool,
604 reference->from = entry->item;
605 reference->to.revision = rep_header->base_revision;
606 reference->to.number = rep_header->base_item_index;
607 APR_ARRAY_PUSH(context->references, reference_t *) = reference;
610 /* copy the whole rep (including header!) to our temp file */
611 SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool));
612 SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size,
618 /* Directories first, dirs / files sorted by name in reverse lexical order.
619 * This maximizes the chance of two items being located close to one another
620 * in *all* pack files independent of their change order. It also groups
621 * multi-project repos nicely according to their sub-projects. The reverse
622 * order aspect gives "trunk" preference over "tags" and "branches", so
623 * trunk-related items are more likely to be contiguous.
626 compare_dir_entries_format7(const svn_sort__item_t *a,
627 const svn_sort__item_t *b)
629 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
630 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
632 if (lhs->kind != rhs->kind)
633 return lhs->kind == svn_node_dir ? -1 : 1;
635 return strcmp(lhs->name, rhs->name);
638 /* Directories entries sorted by revision (decreasing - to max cache hits)
639 * and offset (increasing - to max benefit from APR file buffering).
642 compare_dir_entries_format6(const svn_sort__item_t *a,
643 const svn_sort__item_t *b)
645 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
646 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
648 const svn_fs_fs__id_part_t *lhs_rev_item
649 = svn_fs_fs__id_rev_item(lhs->id);
650 const svn_fs_fs__id_part_t *rhs_rev_item
651 = svn_fs_fs__id_rev_item(rhs->id);
653 /* decreasing ("reverse") order on revs */
654 if (lhs_rev_item->revision != rhs_rev_item->revision)
655 return lhs_rev_item->revision > rhs_rev_item->revision ? -1 : 1;
657 /* increasing order on offsets */
658 if (lhs_rev_item->number != rhs_rev_item->number)
659 return lhs_rev_item->number > rhs_rev_item->number ? 1 : -1;
665 svn_fs_fs__order_dir_entries(svn_fs_t *fs,
666 apr_hash_t *directory,
667 apr_pool_t *result_pool,
668 apr_pool_t *scratch_pool)
670 apr_array_header_t *ordered
671 = svn_sort__hash(directory,
672 svn_fs_fs__use_log_addressing(fs)
673 ? compare_dir_entries_format7
674 : compare_dir_entries_format6,
677 apr_array_header_t *result
678 = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *));
681 for (i = 0; i < ordered->nelts; ++i)
682 APR_ARRAY_PUSH(result, svn_fs_dirent_t *)
683 = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value;
688 /* Return a duplicate of the the ORIGINAL path and with special sub-strins
689 * (e.g. "trunk") modified in such a way that have a lower lexicographic
690 * value than any other "normal" file name.
693 tweak_path_for_ordering(const char *original,
696 /* We may add further special cases as needed. */
697 enum {SPECIAL_COUNT = 2};
698 static const char *special[SPECIAL_COUNT] = {"trunk", "branch"};
700 char *path = apr_pstrdup(pool, original);
703 /* Replace the first char of any "special" sub-string we find by
704 * a control char, i.e. '\1' .. '\31'. In the rare event that this
705 * would clash with existing paths, no data will be lost but merely
706 * the node ordering will be sub-optimal.
708 for (i = 0; i < SPECIAL_COUNT; ++i)
709 for (pos = strstr(path, special[i]);
711 pos = strstr(pos + 1, special[i]))
713 *pos = (char)(i + '\1');
719 /* Copy node revision item identified by ENTRY from the current position
720 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by
721 * our placement algorithm to CONTEXT. Use POOL for temporary allocations.
724 copy_node_to_temp(pack_context_t *context,
725 apr_file_t *rev_file,
726 svn_fs_fs__p2l_entry_t *entry,
729 path_order_t *path_order = apr_pcalloc(context->info_pool,
730 sizeof(*path_order));
731 node_revision_t *noderev;
732 const char *sort_path;
733 svn_stream_t *stream;
734 apr_off_t source_offset = entry->offset;
736 /* read & parse noderev */
737 stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
738 SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, pool, pool));
739 svn_stream_close(stream);
741 /* create a copy of ENTRY, make it point to the copy destination and
742 * store it in CONTEXT */
743 entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
744 SVN_ERR(svn_fs_fs__get_file_offset(&entry->offset, context->reps_file,
746 add_item_rep_mapping(context, entry);
748 /* copy the noderev to our temp file */
749 SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool));
750 SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size,
753 /* if the node has a data representation, make that the node's "base".
754 * This will (often) cause the noderev to be placed right in front of
755 * its data representation. */
757 if (noderev->data_rep && noderev->data_rep->revision >= context->start_rev)
759 path_order->rep_id.revision = noderev->data_rep->revision;
760 path_order->rep_id.number = noderev->data_rep->item_index;
761 path_order->expanded_size = noderev->data_rep->expanded_size
762 ? noderev->data_rep->expanded_size
763 : noderev->data_rep->size;
766 /* Sort path is the key used for ordering noderevs and associated reps.
767 * It will not be stored in the final pack file. */
768 sort_path = tweak_path_for_ordering(noderev->created_path, pool);
769 path_order->path = svn_prefix_string__create(context->paths, sort_path);
770 path_order->node_id = *svn_fs_fs__id_node_id(noderev->id);
771 path_order->revision = svn_fs_fs__id_rev(noderev->id);
772 path_order->predecessor_count = noderev->predecessor_count;
773 path_order->is_dir = noderev->kind == svn_node_dir;
774 path_order->noderev_id = *svn_fs_fs__id_rev_item(noderev->id);
775 APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order;
780 /* implements compare_fn_t. Bring all directories in front of the files
781 and sort descendingly by PATH, NODE_ID and REVISION.
784 compare_path_order(const path_order_t * const * lhs_p,
785 const path_order_t * const * rhs_p)
787 const path_order_t * lhs = *lhs_p;
788 const path_order_t * rhs = *rhs_p;
790 /* cluster all directories */
791 int diff = rhs->is_dir - lhs->is_dir;
795 /* lexicographic order on path and node (i.e. latest first) */
796 diff = svn_prefix_string__compare(lhs->path, rhs->path);
800 /* reverse order on node (i.e. latest first) */
801 diff = svn_fs_fs__id_part_compare(&rhs->node_id, &lhs->node_id);
805 /* reverse order on revision (i.e. latest first) */
806 if (lhs->revision != rhs->revision)
807 return lhs->revision < rhs->revision ? 1 : -1;
812 /* implements compare_fn_t. Sort ascendingly by FROM, TO.
815 compare_references(const reference_t * const * lhs_p,
816 const reference_t * const * rhs_p)
818 const reference_t * lhs = *lhs_p;
819 const reference_t * rhs = *rhs_p;
821 int diff = svn_fs_fs__id_part_compare(&lhs->from, &rhs->from);
822 return diff ? diff : svn_fs_fs__id_part_compare(&lhs->to, &rhs->to);
825 /* implements compare_fn_t. Assume ascending order by FROM.
828 compare_ref_to_item(const reference_t * const * lhs_p,
829 const svn_fs_fs__id_part_t * rhs_p)
831 return svn_fs_fs__id_part_compare(&(*lhs_p)->from, rhs_p);
834 /* implements compare_fn_t. Finds the DIR / FILE boundary.
837 compare_is_dir(const path_order_t * const * lhs_p,
840 return (*lhs_p)->is_dir ? -1 : 0;
843 /* Look for the least significant bit set in VALUE and return the smallest
844 * number with the same property, i.e. the largest power of 2 that is a
845 * factor in VALUE. */
849 return value ? value - (value & (value - 1)) : INT_MAX;
852 /* Order a range of data collected in CONTEXT such that we can place them
853 * in the desired order. The input is taken from *PATH_ORDER, offsets FIRST
854 * to LAST and then written in the final order to the same range in *TEMP.
857 sort_reps_range(pack_context_t *context,
858 const path_order_t **path_order,
859 const path_order_t **temp,
863 const svn_prefix_string__t *path;
865 svn_fs_fs__id_part_t rep_id;
866 fs_fs_data_t *ffd = context->fs->fsap_data;
868 /* The logic below would fail for empty ranges. */
872 /* Re-order noderevs like this:
874 * (1) Most likely to be referenced by future pack files, in path order.
875 * (2) highest revision rep per path + dependency chain
876 * (3) Remaining reps in path, rev order
878 * We simply pick & chose from the existing path, rev order.
881 path = path_order[first]->path;
884 /* (1) For each path, pick the "roundest" representation and put it in
885 * front of all other nodes in the pack file. The "roundest" rep is
886 * the one most likely to be referenced from future pack files, i.e. we
887 * concentrate those potential "foreign link targets" in one section of
890 * And we only apply this to reps outside the linear deltification
891 * sections because references *into* linear deltification ranges are
894 for (i = first; i < last; ++i)
896 /* Investigated all nodes for the current path? */
897 if (svn_prefix_string__compare(path, path_order[i]->path))
900 path = path_order[i]->path;
902 /* Pick roundest non-linear deltified node. */
903 if (roundness(path_order[best]->predecessor_count)
904 >= ffd->max_linear_deltification)
906 temp[dest++] = path_order[best];
907 path_order[best] = NULL;
913 if ( roundness(path_order[best]->predecessor_count)
914 < roundness(path_order[i]->predecessor_count))
918 /* Treat the last path the same as all others. */
919 if (roundness(path_order[best]->predecessor_count)
920 >= ffd->max_linear_deltification)
922 temp[dest++] = path_order[best];
923 path_order[best] = NULL;
926 /* (2) For each (remaining) path, pick the nodes along the delta chain
927 * for the highest revision. Due to our ordering, this is the first
928 * node we encounter for any path.
930 * Most references that don't hit a delta base picked in (1), will
931 * access HEAD of the respective path. Keeping all its dependency chain
932 * in one place turns reconstruction into a linear scan of minimal length.
934 for (i = first; i < last; ++i)
937 /* This is the first path we still have to handle. */
938 path = path_order[i]->path;
939 rep_id = path_order[i]->rep_id;
943 for (i = first; i < last; ++i)
947 if (svn_prefix_string__compare(path, path_order[i]->path))
949 path = path_order[i]->path;
950 rep_id = path_order[i]->rep_id;
953 /* Pick nodes along the deltification chain. Skip side-branches. */
954 if (svn_fs_fs__id_part_eq(&path_order[i]->rep_id, &rep_id))
956 reference_t **reference;
958 temp[dest++] = path_order[i];
959 path_order[i] = NULL;
961 reference = svn_sort__array_lookup(context->references,
963 (int (*)(const void *, const void *))compare_ref_to_item);
965 rep_id = (*reference)->to;
969 /* (3) All remaining nodes in path, rev order. Linear deltification
970 * makes HEAD delta chains from (2) cover all or most of their deltas
971 * in a given pack file. So, this is just a few remnants that we put
972 * at the end of the pack file.
974 for (i = first; i < last; ++i)
976 temp[dest++] = path_order[i];
978 /* We now know the final ordering. */
979 assert(dest == last);
982 /* Order the data collected in CONTEXT such that we can place them in the
986 sort_reps(pack_context_t *context)
988 apr_pool_t *temp_pool;
989 const path_order_t **temp, **path_order;
990 int i, count, dir_count;
992 /* We will later assume that there is at least one node / path.
994 if (context->path_order->nelts == 0)
996 assert(context->references->nelts == 0);
1000 /* Sort containers by path and IDs, respectively.
1002 svn_sort__array(context->path_order,
1003 (int (*)(const void *, const void *))compare_path_order);
1004 svn_sort__array(context->references,
1005 (int (*)(const void *, const void *))compare_references);
1007 /* Directories are already in front; sort directories section and files
1008 * section separately but use the same heuristics (see sub-function).
1010 temp_pool = svn_pool_create(context->info_pool);
1011 count = context->path_order->nelts;
1012 temp = apr_pcalloc(temp_pool, count * sizeof(*temp));
1013 path_order = (void *)context->path_order->elts;
1015 /* Find the boundary between DIR and FILE section. */
1016 dir_count = svn_sort__bsearch_lower_bound(context->path_order, NULL,
1017 (int (*)(const void *, const void *))compare_is_dir);
1019 /* Sort those sub-sections separately. */
1020 sort_reps_range(context, path_order, temp, 0, dir_count);
1021 sort_reps_range(context, path_order, temp, dir_count, count);
1023 /* We now know the final ordering. */
1024 for (i = 0; i < count; ++i)
1025 path_order[i] = temp[i];
1027 svn_pool_destroy(temp_pool);
1030 /* implements compare_fn_t. Place LHS before RHS, if the latter is older.
1033 compare_p2l_info(const svn_fs_fs__p2l_entry_t * const * lhs,
1034 const svn_fs_fs__p2l_entry_t * const * rhs)
1036 assert(*lhs != *rhs);
1038 if ((*lhs)->item.revision == (*rhs)->item.revision)
1039 return (*lhs)->item.number > (*rhs)->item.number ? -1 : 1;
1041 return (*lhs)->item.revision > (*rhs)->item.revision ? -1 : 1;
1044 /* Sort svn_fs_fs__p2l_entry_t * array ENTRIES by age. Place the latest
1048 sort_items(apr_array_header_t *entries)
1050 svn_sort__array(entries,
1051 (int (*)(const void *, const void *))compare_p2l_info);
1054 /* Return the remaining unused bytes in the current block in CONTEXT's
1058 get_block_left(pack_context_t *context)
1060 fs_fs_data_t *ffd = context->fs->fsap_data;
1061 return ffd->block_size - (context->pack_offset % ffd->block_size);
1064 /* To prevent items from overlapping a block boundary, we will usually
1065 * put them into the next block and top up the old one with NUL bytes.
1066 * Pad CONTEXT's pack file to the end of the current block, if TO_ADD does
1067 * not fit into the current block and the padding is short enough.
1068 * Use POOL for allocations.
1070 static svn_error_t *
1071 auto_pad_block(pack_context_t *context,
1075 fs_fs_data_t *ffd = context->fs->fsap_data;
1077 /* This is the maximum number of bytes "wasted" that way per block.
1078 * Larger items will cross the block boundaries. */
1079 const apr_off_t max_padding = MAX(ffd->block_size / 50, 512);
1081 /* Is wasted space small enough to align the current item to the next
1083 apr_off_t padding = get_block_left(context);
1085 if (padding < to_add && padding < max_padding)
1087 /* Yes. To up with NUL bytes and don't forget to create
1088 * an P2L index entry marking this section as unused. */
1089 svn_fs_fs__p2l_entry_t null_entry;
1091 null_entry.offset = context->pack_offset;
1092 null_entry.size = padding;
1093 null_entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED;
1094 null_entry.item.revision = SVN_INVALID_REVNUM;
1095 null_entry.item.number = SVN_FS_FS__ITEM_INDEX_UNUSED;
1096 null_entry.fnv1_checksum = 0;
1098 SVN_ERR(write_null_bytes(context->pack_file, padding, pool));
1099 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
1100 context->proto_p2l_index, &null_entry, pool));
1101 context->pack_offset += padding;
1104 return SVN_NO_ERROR;
1107 /* Read the contents of ITEM, if not empty, from TEMP_FILE and write it
1108 * to CONTEXT->PACK_FILE. Use POOL for allocations.
1110 static svn_error_t *
1111 store_item(pack_context_t *context,
1112 apr_file_t *temp_file,
1113 svn_fs_fs__p2l_entry_t *item,
1116 apr_off_t safety_margin;
1118 /* skip empty entries */
1119 if (item->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
1120 return SVN_NO_ERROR;
1122 /* If the next item does not fit into the current block, auto-pad it.
1123 Take special care of textual noderevs since their parsers may
1124 prefetch up to 80 bytes and we don't want them to cross block
1126 safety_margin = item->type == SVN_FS_FS__ITEM_TYPE_NODEREV
1127 ? SVN__LINE_CHUNK_SIZE
1129 SVN_ERR(auto_pad_block(context, item->size + safety_margin, pool));
1131 /* select the item in the source file and copy it into the target
1133 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &item->offset, pool));
1134 SVN_ERR(copy_file_data(context, context->pack_file, temp_file,
1137 /* write index entry and update current position */
1138 item->offset = context->pack_offset;
1139 context->pack_offset += item->size;
1141 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(context->proto_p2l_index,
1144 APR_ARRAY_PUSH(context->reps, svn_fs_fs__p2l_entry_t *) = item;
1146 return SVN_NO_ERROR;
1149 /* Read the contents of the non-empty items in ITEMS from TEMP_FILE and
1150 * write them to CONTEXT->PACK_FILE. Use POOL for allocations.
1152 static svn_error_t *
1153 store_items(pack_context_t *context,
1154 apr_file_t *temp_file,
1155 apr_array_header_t *items,
1159 apr_pool_t *iterpool = svn_pool_create(pool);
1161 /* copy all items in strict order */
1162 for (i = 0; i < items->nelts; ++i)
1164 svn_pool_clear(iterpool);
1165 SVN_ERR(store_item(context, temp_file,
1166 APR_ARRAY_IDX(items, i, svn_fs_fs__p2l_entry_t *),
1170 svn_pool_destroy(iterpool);
1172 return SVN_NO_ERROR;
1175 /* Copy (append) the items identified by svn_fs_fs__p2l_entry_t * elements
1176 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE.
1177 * Use POOL for temporary allocations.
1179 static svn_error_t *
1180 copy_reps_from_temp(pack_context_t *context,
1181 apr_file_t *temp_file,
1184 apr_pool_t *iterpool = svn_pool_create(pool);
1185 apr_array_header_t *path_order = context->path_order;
1188 /* copy items in path order. */
1189 for (i = 0; i < path_order->nelts; ++i)
1191 path_order_t *current_path;
1192 svn_fs_fs__p2l_entry_t *node_part;
1193 svn_fs_fs__p2l_entry_t *rep_part;
1195 svn_pool_clear(iterpool);
1197 current_path = APR_ARRAY_IDX(path_order, i, path_order_t *);
1198 node_part = get_item(context, ¤t_path->noderev_id, TRUE);
1199 rep_part = get_item(context, ¤t_path->rep_id, TRUE);
1202 SVN_ERR(store_item(context, temp_file, node_part, iterpool));
1204 SVN_ERR(store_item(context, temp_file, rep_part, iterpool));
1207 svn_pool_destroy(iterpool);
1209 return SVN_NO_ERROR;
1212 /* implements compare_fn_t. Place LHS before RHS, if the latter belongs to
1216 compare_p2l_info_rev(const svn_fs_fs__p2l_entry_t * const * lhs_p,
1217 const svn_fs_fs__p2l_entry_t * const * rhs_p)
1219 const svn_fs_fs__p2l_entry_t * lhs = *lhs_p;
1220 const svn_fs_fs__p2l_entry_t * rhs = *rhs_p;
1222 if (lhs->item.revision == rhs->item.revision)
1225 return lhs->item.revision < rhs->item.revision ? -1 : 1;
1228 /* Write the log-to-phys proto index file for CONTEXT and use POOL for
1229 * temporary allocations. All items in all buckets must have been placed
1232 static svn_error_t *
1233 write_l2p_index(pack_context_t *context,
1236 apr_pool_t *iterpool = svn_pool_create(pool);
1237 svn_revnum_t prev_rev = SVN_INVALID_REVNUM;
1240 /* eliminate empty entries from CONTEXT->REPS */
1241 for (i = 0, dest = 0; i < context->reps->nelts; ++i)
1243 svn_fs_fs__p2l_entry_t *entry
1244 = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *);
1246 APR_ARRAY_IDX(context->reps, dest++, svn_fs_fs__p2l_entry_t *)
1249 context->reps->nelts = dest;
1251 /* we need to write the l2p index revision by revision */
1252 svn_sort__array(context->reps,
1253 (int (*)(const void *, const void *))compare_p2l_info_rev);
1255 /* write index entries */
1256 for (i = 0; i < context->reps->nelts; ++i)
1258 svn_fs_fs__p2l_entry_t *p2l_entry
1259 = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *);
1260 if (p2l_entry == NULL)
1263 /* next revision? */
1264 if (prev_rev != p2l_entry->item.revision)
1266 prev_rev = p2l_entry->item.revision;
1267 SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(
1268 context->proto_l2p_index, iterpool));
1272 SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(context->proto_l2p_index,
1274 p2l_entry->item.number,
1277 /* keep memory usage in check */
1279 svn_pool_clear(iterpool);
1282 svn_pool_destroy(iterpool);
1284 return SVN_NO_ERROR;
1287 /* Pack the current revision range of CONTEXT, i.e. this covers phases 2
1288 * to 4. Use POOL for allocations.
1290 static svn_error_t *
1291 pack_range(pack_context_t *context,
1294 fs_fs_data_t *ffd = context->fs->fsap_data;
1295 apr_pool_t *revpool = svn_pool_create(pool);
1296 apr_pool_t *iterpool = svn_pool_create(pool);
1297 apr_pool_t *iterpool2 = svn_pool_create(pool);
1299 /* Phase 2: Copy items into various buckets and build tracking info */
1300 svn_revnum_t revision;
1301 for (revision = context->start_rev; revision < context->end_rev; ++revision)
1303 apr_off_t offset = 0;
1304 svn_fs_fs__revision_file_t *rev_file;
1306 svn_pool_clear(revpool);
1308 /* Get the rev file dimensions (mainly index locations). */
1309 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs,
1310 revision, revpool, iterpool));
1311 SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
1313 /* store the indirect array index */
1314 APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts;
1316 /* read the phys-to-log index file until we covered the whole rev file.
1317 * That index contains enough info to build both target indexes from it. */
1318 while (offset < rev_file->l2p_offset)
1320 /* read one cluster */
1322 apr_array_header_t *entries;
1324 svn_pool_clear(iterpool);
1326 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs,
1327 rev_file, revision, offset,
1328 ffd->p2l_page_size, iterpool,
1331 for (i = 0; i < entries->nelts; ++i)
1333 svn_fs_fs__p2l_entry_t *entry
1334 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
1336 /* skip first entry if that was duplicated due crossing a
1338 if (offset > entry->offset)
1341 svn_pool_clear(iterpool2);
1343 /* process entry while inside the rev file */
1344 offset = entry->offset;
1345 if (offset < rev_file->l2p_offset)
1347 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset,
1350 if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES)
1351 SVN_ERR(copy_item_to_temp(context,
1353 context->changes_file,
1354 rev_file->file, entry,
1356 else if (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS)
1357 SVN_ERR(copy_item_to_temp(context,
1358 context->file_props,
1359 context->file_props_file,
1360 rev_file->file, entry,
1362 else if (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
1363 SVN_ERR(copy_item_to_temp(context,
1365 context->dir_props_file,
1366 rev_file->file, entry,
1368 else if ( entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP
1369 || entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP)
1370 SVN_ERR(copy_rep_to_temp(context, rev_file->file, entry,
1372 else if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV)
1373 SVN_ERR(copy_node_to_temp(context, rev_file->file, entry,
1376 SVN_ERR_ASSERT(entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED);
1378 offset += entry->size;
1382 if (context->cancel_func)
1383 SVN_ERR(context->cancel_func(context->cancel_baton));
1387 svn_pool_destroy(iterpool2);
1388 svn_pool_destroy(iterpool);
1390 /* phase 3: placement.
1391 * Use "newest first" placement for simple items. */
1392 sort_items(context->changes);
1393 sort_items(context->file_props);
1394 sort_items(context->dir_props);
1396 /* follow dependencies recursively for noderevs and data representations */
1399 /* phase 4: copy bucket data to pack file. Write P2L index. */
1400 SVN_ERR(store_items(context, context->changes_file, context->changes,
1402 svn_pool_clear(revpool);
1403 SVN_ERR(store_items(context, context->file_props_file, context->file_props,
1405 svn_pool_clear(revpool);
1406 SVN_ERR(store_items(context, context->dir_props_file, context->dir_props,
1408 svn_pool_clear(revpool);
1409 SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool));
1410 svn_pool_clear(revpool);
1412 /* write L2P index as well (now that we know all target offsets) */
1413 SVN_ERR(write_l2p_index(context, revpool));
1415 svn_pool_destroy(revpool);
1417 return SVN_NO_ERROR;
1420 /* Append CONTEXT->START_REV to the context's pack file with no re-ordering.
1421 * This function will only be used for very large revisions (>>100k changes).
1422 * Use POOL for temporary allocations.
1424 static svn_error_t *
1425 append_revision(pack_context_t *context,
1428 fs_fs_data_t *ffd = context->fs->fsap_data;
1429 apr_off_t offset = 0;
1430 apr_pool_t *iterpool = svn_pool_create(pool);
1431 svn_fs_fs__revision_file_t *rev_file;
1432 svn_filesize_t revdata_size;
1434 /* Copy all non-index contents the rev file to the end of the pack file. */
1435 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs,
1436 context->start_rev, pool,
1439 SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
1440 revdata_size = rev_file->l2p_offset;
1442 SVN_ERR(svn_io_file_aligned_seek(rev_file->file, ffd->block_size, NULL, 0,
1444 SVN_ERR(copy_file_data(context, context->pack_file, rev_file->file,
1445 revdata_size, iterpool));
1447 /* mark the start of a new revision */
1448 SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(context->proto_l2p_index,
1451 /* read the phys-to-log index file until we covered the whole rev file.
1452 * That index contains enough info to build both target indexes from it. */
1453 while (offset < revdata_size)
1455 /* read one cluster */
1457 apr_array_header_t *entries;
1459 svn_pool_clear(iterpool);
1460 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs, rev_file,
1461 context->start_rev, offset,
1462 ffd->p2l_page_size, iterpool,
1465 for (i = 0; i < entries->nelts; ++i)
1467 svn_fs_fs__p2l_entry_t *entry
1468 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
1470 /* skip first entry if that was duplicated due crossing a
1472 if (offset > entry->offset)
1475 /* process entry while inside the rev file */
1476 offset = entry->offset;
1477 if (offset < revdata_size)
1479 entry->offset += context->pack_offset;
1480 offset += entry->size;
1481 SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(
1482 context->proto_l2p_index, entry->offset,
1483 entry->item.number, iterpool));
1484 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
1485 context->proto_p2l_index, entry, iterpool));
1490 svn_pool_destroy(iterpool);
1491 context->pack_offset += revdata_size;
1493 SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
1495 return SVN_NO_ERROR;
1498 /* Logical addressing mode packing logic.
1500 * Pack the revision shard starting at SHARD_REV in filesystem FS from
1501 * SHARD_DIR into the PACK_FILE_DIR, using POOL for allocations. Limit
1502 * the extra memory consumption to MAX_MEM bytes. CANCEL_FUNC and
1503 * CANCEL_BATON are what you think they are.
1505 static svn_error_t *
1506 pack_log_addressed(svn_fs_t *fs,
1507 const char *pack_file_dir,
1508 const char *shard_dir,
1509 svn_revnum_t shard_rev,
1511 svn_cancel_func_t cancel_func,
1517 /* estimated amount of memory used to represent one item in memory
1518 * during rev file packing */
1519 PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t))
1520 + APR_ALIGN_DEFAULT(2 *sizeof(void*))
1521 + APR_ALIGN_DEFAULT(sizeof(reference_t))
1522 + APR_ALIGN_DEFAULT(sizeof(svn_fs_fs__p2l_entry_t))
1527 apr_array_header_t *max_ids;
1528 pack_context_t context = { 0 };
1530 apr_size_t item_count = 0;
1531 apr_pool_t *iterpool = svn_pool_create(pool);
1533 /* Prevent integer overflow. We use apr arrays to process the items so
1534 * the maximum number of items is INT_MAX. */
1536 apr_size_t temp = max_mem / PER_ITEM_MEM;
1537 SVN_ERR_ASSERT(temp <= INT_MAX);
1538 max_items = (int)temp;
1541 /* set up a pack context */
1542 SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir,
1543 shard_rev, max_items, cancel_func,
1544 cancel_baton, pool));
1546 /* phase 1: determine the size of the revisions to pack */
1547 SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, shard_rev,
1548 context.shard_end_rev - shard_rev,
1551 /* pack revisions in ranges that don't exceed MAX_MEM */
1552 for (i = 0; i < max_ids->nelts; ++i)
1553 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) + item_count <= max_items)
1555 item_count += APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1560 svn_pool_clear(iterpool);
1562 /* some unpacked revisions before this one? */
1563 if (context.start_rev < context.end_rev)
1565 /* pack them intelligently (might be just 1 rev but still ...) */
1566 SVN_ERR(pack_range(&context, iterpool));
1567 SVN_ERR(reset_pack_context(&context, iterpool));
1571 /* next revision range is to start with the current revision */
1572 context.start_rev = i + context.shard_rev;
1573 context.end_rev = context.start_rev + 1;
1575 /* if this is a very large revision, we must place it as is */
1576 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items)
1578 SVN_ERR(append_revision(&context, iterpool));
1579 context.start_rev++;
1582 item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1585 /* non-empty revision range at the end? */
1586 if (context.start_rev < context.end_rev)
1587 SVN_ERR(pack_range(&context, iterpool));
1589 /* last phase: finalize indexes and clean up */
1590 SVN_ERR(reset_pack_context(&context, iterpool));
1591 SVN_ERR(close_pack_context(&context, iterpool));
1592 svn_pool_destroy(iterpool);
1594 return SVN_NO_ERROR;
1597 /* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file.
1598 Use POOL for temporary allocations. */
1600 svn_fs_fs__get_packed_offset(apr_off_t *rev_offset,
1605 fs_fs_data_t *ffd = fs->fsap_data;
1606 svn_stream_t *manifest_stream;
1607 svn_boolean_t is_cached;
1609 apr_int64_t shard_pos;
1610 apr_array_header_t *manifest;
1611 apr_pool_t *iterpool;
1613 shard = rev / ffd->max_files_per_dir;
1615 /* position of the shard within the manifest */
1616 shard_pos = rev % ffd->max_files_per_dir;
1618 /* fetch exactly that element into *rev_offset, if the manifest is found
1620 SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached,
1621 ffd->packed_offset_cache, &shard,
1622 svn_fs_fs__get_sharded_offset, &shard_pos,
1626 return SVN_NO_ERROR;
1628 /* Open the manifest file. */
1629 SVN_ERR(svn_stream_open_readonly(&manifest_stream,
1630 svn_fs_fs__path_rev_packed(fs, rev,
1635 /* While we're here, let's just read the entire manifest file into an array,
1636 so we can cache the entire thing. */
1637 iterpool = svn_pool_create(pool);
1638 manifest = apr_array_make(pool, ffd->max_files_per_dir, sizeof(apr_off_t));
1644 svn_pool_clear(iterpool);
1645 SVN_ERR(svn_fs_fs__read_number_from_stream(&val, &eof, manifest_stream,
1650 APR_ARRAY_PUSH(manifest, apr_off_t) = (apr_off_t)val;
1652 svn_pool_destroy(iterpool);
1654 *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir,
1657 /* Close up shop and cache the array. */
1658 SVN_ERR(svn_stream_close(manifest_stream));
1659 return svn_cache__set(ffd->packed_offset_cache, &shard, manifest, pool);
1662 /* Packing logic for physical addresssing mode:
1663 * Simply concatenate all revision contents.
1665 * Pack the revision shard starting at SHARD_REV containing exactly
1666 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
1667 * using POOL for allocations. CANCEL_FUNC and CANCEL_BATON are what you
1670 static svn_error_t *
1671 pack_phys_addressed(const char *pack_file_dir,
1672 const char *shard_path,
1673 svn_revnum_t start_rev,
1674 int max_files_per_dir,
1675 svn_cancel_func_t cancel_func,
1679 const char *pack_file_path, *manifest_file_path;
1680 apr_file_t *pack_file;
1681 apr_file_t *manifest_file;
1682 svn_stream_t *manifest_stream;
1683 svn_revnum_t end_rev, rev;
1684 apr_off_t next_offset;
1685 apr_pool_t *iterpool;
1687 /* Some useful paths. */
1688 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
1689 manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST, pool);
1691 /* Create the new directory and pack file.
1692 * Use unbuffered apr_file_t since we're going to write using 16kb
1694 SVN_ERR(svn_io_file_open(&pack_file, pack_file_path,
1695 APR_WRITE | APR_CREATE | APR_EXCL,
1696 APR_OS_DEFAULT, pool));
1698 /* Create the manifest file. */
1699 SVN_ERR(svn_io_file_open(&manifest_file, manifest_file_path,
1700 APR_WRITE | APR_BUFFERED | APR_CREATE | APR_EXCL,
1701 APR_OS_DEFAULT, pool));
1702 manifest_stream = svn_stream_from_aprfile2(manifest_file, TRUE, pool);
1704 end_rev = start_rev + max_files_per_dir - 1;
1706 iterpool = svn_pool_create(pool);
1708 /* Iterate over the revisions in this shard, squashing them together. */
1709 for (rev = start_rev; rev <= end_rev; rev++)
1711 svn_stream_t *rev_stream;
1715 svn_pool_clear(iterpool);
1717 /* Get the size of the file. */
1718 path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev),
1720 SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, iterpool));
1722 /* build manifest */
1723 SVN_ERR(svn_stream_printf(manifest_stream, iterpool,
1724 "%" APR_OFF_T_FMT "\n", next_offset));
1725 next_offset += finfo.size;
1727 /* Copy all the bits from the rev file to the end of the pack file. */
1728 SVN_ERR(svn_stream_open_readonly(&rev_stream, path, iterpool, iterpool));
1729 SVN_ERR(svn_stream_copy3(rev_stream,
1730 svn_stream_from_aprfile2(pack_file, TRUE, pool),
1731 cancel_func, cancel_baton, iterpool));
1734 /* Close stream over APR file. */
1735 SVN_ERR(svn_stream_close(manifest_stream));
1737 /* Ensure that pack file is written to disk. */
1738 SVN_ERR(svn_io_file_flush_to_disk(manifest_file, pool));
1739 SVN_ERR(svn_io_file_close(manifest_file, pool));
1741 /* disallow write access to the manifest file */
1742 SVN_ERR(svn_io_set_file_read_only(manifest_file_path, FALSE, iterpool));
1744 /* Ensure that pack file is written to disk. */
1745 SVN_ERR(svn_io_file_flush_to_disk(pack_file, pool));
1746 SVN_ERR(svn_io_file_close(pack_file, pool));
1748 svn_pool_destroy(iterpool);
1750 return SVN_NO_ERROR;
1753 /* In filesystem FS, pack the revision SHARD containing exactly
1754 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
1755 * using POOL for allocations. Try to limit the amount of temporary
1756 * memory needed to MAX_MEM bytes. CANCEL_FUNC and CANCEL_BATON are what
1757 * you think they are.
1759 * If for some reason we detect a partial packing already performed, we
1760 * remove the pack file and start again.
1762 * The actual packing will be done in a format-specific sub-function.
1764 static svn_error_t *
1765 pack_rev_shard(svn_fs_t *fs,
1766 const char *pack_file_dir,
1767 const char *shard_path,
1769 int max_files_per_dir,
1771 svn_cancel_func_t cancel_func,
1775 const char *pack_file_path;
1776 svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir);
1778 /* Some useful paths. */
1779 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
1781 /* Remove any existing pack file for this shard, since it is incomplete. */
1782 SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton,
1785 /* Create the new directory and pack file. */
1786 SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, pool));
1788 /* Index information files */
1789 if (svn_fs_fs__use_log_addressing(fs))
1790 SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev,
1791 max_mem, cancel_func, cancel_baton, pool));
1793 SVN_ERR(pack_phys_addressed(pack_file_dir, shard_path, shard_rev,
1794 max_files_per_dir, cancel_func,
1795 cancel_baton, pool));
1797 SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, pool));
1798 SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, pool));
1800 return SVN_NO_ERROR;
1803 /* Baton struct used by pack_body(), pack_shard() and synced_pack_shard().
1804 These calls are nested and for every level additional fields will be
1808 /* Valid when entering pack_body(). */
1810 svn_fs_pack_notify_t notify_func;
1812 svn_cancel_func_t cancel_func;
1816 /* Additional entries valid when entering pack_shard(). */
1817 const char *revs_dir;
1818 const char *revsprops_dir;
1821 /* Additional entries valid when entering synced_pack_shard(). */
1822 const char *rev_shard_path;
1826 /* Part of the pack process that requires global (write) synchronization.
1827 * We pack the revision properties of the shard described by BATON and
1828 * In the file system at FS_PATH, pack the SHARD in REVS_DIR and replace
1829 * the non-packed revprop & rev shard folder(s) with the packed ones.
1830 * The packed rev folder has been created prior to calling this function.
1832 static svn_error_t *
1833 synced_pack_shard(void *baton,
1836 struct pack_baton *pb = baton;
1837 fs_fs_data_t *ffd = pb->fs->fsap_data;
1838 const char *revprops_shard_path, *revprops_pack_file_dir;
1840 /* if enabled, pack the revprops in an equivalent way */
1841 if (pb->revsprops_dir)
1843 revprops_pack_file_dir = svn_dirent_join(pb->revsprops_dir,
1845 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
1848 revprops_shard_path = svn_dirent_join(pb->revsprops_dir,
1849 apr_psprintf(pool, "%" APR_INT64_T_FMT, pb->shard),
1852 SVN_ERR(svn_fs_fs__pack_revprops_shard(revprops_pack_file_dir,
1853 revprops_shard_path,
1855 ffd->max_files_per_dir,
1856 (int)(0.9*ffd->revprop_pack_size),
1857 ffd->compress_packed_revprops
1858 ? SVN__COMPRESSION_ZLIB_DEFAULT
1859 : SVN__COMPRESSION_NONE,
1865 /* Update the min-unpacked-rev file to reflect our newly packed shard. */
1866 SVN_ERR(svn_fs_fs__write_min_unpacked_rev(pb->fs,
1867 (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir),
1869 ffd->min_unpacked_rev
1870 = (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir);
1872 /* Finally, remove the existing shard directories.
1873 * For revprops, clean up older obsolete shards as well as they might
1874 * have been left over from an interrupted FS upgrade. */
1875 SVN_ERR(svn_io_remove_dir2(pb->rev_shard_path, TRUE,
1876 pb->cancel_func, pb->cancel_baton, pool));
1877 if (pb->revsprops_dir)
1879 svn_node_kind_t kind = svn_node_dir;
1880 apr_int64_t to_cleanup = pb->shard;
1883 SVN_ERR(svn_fs_fs__delete_revprops_shard(revprops_shard_path,
1885 ffd->max_files_per_dir,
1890 /* If the previous shard exists, clean it up as well.
1891 Don't try to clean up shard 0 as it we can't tell quickly
1892 whether it actually needs cleaning up. */
1893 revprops_shard_path = svn_dirent_join(pb->revsprops_dir,
1894 apr_psprintf(pool, "%" APR_INT64_T_FMT, --to_cleanup),
1896 SVN_ERR(svn_io_check_path(revprops_shard_path, &kind, pool));
1898 while (kind == svn_node_dir && to_cleanup > 0);
1901 return SVN_NO_ERROR;
1904 /* Pack the shard described by BATON.
1906 * If for some reason we detect a partial packing already performed,
1907 * we remove the pack file and start again.
1909 static svn_error_t *
1910 pack_shard(struct pack_baton *baton,
1913 fs_fs_data_t *ffd = baton->fs->fsap_data;
1914 const char *rev_pack_file_dir;
1916 /* Notify caller we're starting to pack this shard. */
1917 if (baton->notify_func)
1918 SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard,
1919 svn_fs_pack_notify_start, pool));
1921 /* Some useful paths. */
1922 rev_pack_file_dir = svn_dirent_join(baton->revs_dir,
1924 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
1927 baton->rev_shard_path = svn_dirent_join(baton->revs_dir,
1929 "%" APR_INT64_T_FMT,
1933 /* pack the revision content */
1934 SVN_ERR(pack_rev_shard(baton->fs, rev_pack_file_dir, baton->rev_shard_path,
1935 baton->shard, ffd->max_files_per_dir,
1936 baton->max_mem, baton->cancel_func,
1937 baton->cancel_baton, pool));
1939 /* For newer repo formats, we only acquired the pack lock so far.
1940 Before modifying the repo state by switching over to the packed
1941 data, we need to acquire the global (write) lock. */
1942 if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT)
1943 SVN_ERR(svn_fs_fs__with_write_lock(baton->fs, synced_pack_shard, baton,
1946 SVN_ERR(synced_pack_shard(baton, pool));
1948 /* Notify caller we're starting to pack this shard. */
1949 if (baton->notify_func)
1950 SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard,
1951 svn_fs_pack_notify_end, pool));
1953 return SVN_NO_ERROR;
1956 /* The work-horse for svn_fs_fs__pack, called with the FS write lock.
1957 This implements the svn_fs_fs__with_write_lock() 'body' callback
1958 type. BATON is a 'struct pack_baton *'.
1960 WARNING: if you add a call to this function, please note:
1961 The code currently assumes that any piece of code running with
1962 the write-lock set can rely on the ffd->min_unpacked_rev and
1963 ffd->min_unpacked_revprop caches to be up-to-date (and, by
1964 extension, on not having to use a retry when calling
1965 svn_fs_fs__path_rev_absolute() and friends). If you add a call
1966 to this function, consider whether you have to call
1967 svn_fs_fs__update_min_unpacked_rev().
1968 See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith
1970 static svn_error_t *
1971 pack_body(void *baton,
1974 struct pack_baton *pb = baton;
1975 fs_fs_data_t *ffd = pb->fs->fsap_data;
1976 apr_int64_t completed_shards;
1977 svn_revnum_t youngest;
1978 apr_pool_t *iterpool;
1980 /* If the repository isn't a new enough format, we don't support packing.
1981 Return a friendly error to that effect. */
1982 if (ffd->format < SVN_FS_FS__MIN_PACKED_FORMAT)
1983 return svn_error_createf(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
1984 _("FSFS format (%d) too old to pack; please upgrade the filesystem."),
1987 /* If we aren't using sharding, we can't do any packing, so quit. */
1988 if (!ffd->max_files_per_dir)
1989 return SVN_NO_ERROR;
1991 SVN_ERR(svn_fs_fs__read_min_unpacked_rev(&ffd->min_unpacked_rev, pb->fs,
1994 SVN_ERR(svn_fs_fs__youngest_rev(&youngest, pb->fs, pool));
1995 completed_shards = (youngest + 1) / ffd->max_files_per_dir;
1997 /* See if we've already completed all possible shards thus far. */
1998 if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir))
1999 return SVN_NO_ERROR;
2001 pb->revs_dir = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, pool);
2002 if (ffd->format >= SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT)
2003 pb->revsprops_dir = svn_dirent_join(pb->fs->path, PATH_REVPROPS_DIR,
2006 iterpool = svn_pool_create(pool);
2007 for (pb->shard = ffd->min_unpacked_rev / ffd->max_files_per_dir;
2008 pb->shard < completed_shards;
2011 svn_pool_clear(iterpool);
2013 if (pb->cancel_func)
2014 SVN_ERR(pb->cancel_func(pb->cancel_baton));
2016 SVN_ERR(pack_shard(pb, iterpool));
2019 svn_pool_destroy(iterpool);
2020 return SVN_NO_ERROR;
2024 svn_fs_fs__pack(svn_fs_t *fs,
2026 svn_fs_pack_notify_t notify_func,
2028 svn_cancel_func_t cancel_func,
2032 struct pack_baton pb = { 0 };
2033 fs_fs_data_t *ffd = fs->fsap_data;
2037 pb.notify_func = notify_func;
2038 pb.notify_baton = notify_baton;
2039 pb.cancel_func = cancel_func;
2040 pb.cancel_baton = cancel_baton;
2041 pb.max_mem = max_mem ? max_mem : DEFAULT_MAX_MEM;
2043 if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT)
2045 /* Newer repositories provide a pack operation specific lock.
2046 Acquire it to prevent concurrent packs.
2048 Since the file lock's lifetime is bound to a pool, we create a
2049 separate subpool here to release the lock immediately after the
2052 err = svn_fs_fs__with_pack_lock(fs, pack_body, &pb, pool);
2056 /* Use the global write lock for older repos. */
2057 err = svn_fs_fs__with_write_lock(fs, pack_body, &pb, pool);
2060 return svn_error_trace(err);