1 /* pack.c --- FSFS shard packing functionality
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
25 #include "svn_pools.h"
26 #include "svn_dirent_uri.h"
27 #include "svn_sorts.h"
28 #include "private/svn_temp_serializer.h"
29 #include "private/svn_sorts_private.h"
30 #include "private/svn_subr_private.h"
31 #include "private/svn_string_private.h"
32 #include "private/svn_io_private.h"
39 #include "low_level.h"
41 #include "transaction.h"
43 #include "../libsvn_fs/fs-loader.h"
45 #include "svn_private_config.h"
46 #include "temp_serializer.h"
48 /* Logical addressing packing logic:
50 * We pack files on a pack file basis (e.g. 1000 revs) without changing
51 * existing pack files nor the revision files outside the range to pack.
53 * First, we will scan the revision file indexes to determine the number
54 * of items to "place" (i.e. determine their optimal position within the
55 * future pack file). For each item, we will need a constant amount of
56 * memory to track it. A MAX_MEM parameter sets a limit to the number of
57 * items we may place in one go. That means, we may not be able to add
58 * all revisions at once. Instead, we will run the placement for a subset
59 * of revisions at a time. The very unlikely worst case will simply append
60 * all revision data with just a little reshuffling inside each revision.
62 * In a second step, we read all revisions in the selected range, build
63 * the item tracking information and copy the items themselves from the
64 * revision files to temporary files. The latter serve as buckets for a
65 * very coarse bucket presort: Separate change lists, file properties,
66 * directory properties and noderevs + representations from one another.
68 * The third step will determine an optimized placement for the items in
69 * each of the 4 buckets separately. The first three will simply order
70 * their items by revision, starting with the newest once. Placing rep
71 * and noderev items is a more elaborate process documented in the code.
73 * In short, we store items in the following order:
74 * - changed paths lists
76 * - directory properties
77 * - directory representations corresponding noderevs, lexical path order
78 * with special treatment of "trunk" and "branches"
79 * - same for file representations
81 * Step 4 copies the items from the temporary buckets into the final
82 * pack file and writes the temporary index files.
84 * Finally, after the last range of revisions, create the final indexes.
87 /* Maximum amount of memory we allocate for placement information during
90 #define DEFAULT_MAX_MEM (64 * 1024 * 1024)
92 /* Data structure describing a node change at PATH, REVISION.
93 * We will sort these instances by PATH and NODE_ID such that we can combine
94 * similar nodes in the same reps container and store containers in path
97 typedef struct path_order_t
100 svn_prefix_string__t *path;
102 /* node ID for this PATH in REVISION */
103 svn_fs_fs__id_part_t node_id;
105 /* when this change happened */
106 svn_revnum_t revision;
108 /* noderev predecessor count */
109 int predecessor_count;
111 /* this is a directory node */
112 svn_boolean_t is_dir;
114 /* length of the expanded representation content */
115 apr_int64_t expanded_size;
117 /* item ID of the noderev linked to the change. May be (0, 0). */
118 svn_fs_fs__id_part_t noderev_id;
120 /* item ID of the representation containing the new data. May be (0, 0). */
121 svn_fs_fs__id_part_t rep_id;
124 /* Represents a reference from item FROM to item TO. FROM may be a noderev
125 * or rep_id while TO is (currently) always a representation. We will sort
126 * them by TO which allows us to collect all dependent items.
128 typedef struct reference_t
130 svn_fs_fs__id_part_t to;
131 svn_fs_fs__id_part_t from;
134 /* This structure keeps track of all the temporary data and status that
135 * needs to be kept around during the creation of one pack file. After
136 * each revision range (in case we can't process all revs at once due to
137 * memory restrictions), parts of the data will get re-initialized.
139 typedef struct pack_context_t
141 /* file system that we operate on */
144 /* cancel function to invoke at regular intervals. May be NULL */
145 svn_cancel_func_t cancel_func;
147 /* baton to pass to CANCEL_FUNC */
150 /* first revision in the shard (and future pack file) */
151 svn_revnum_t shard_rev;
153 /* first revision in the range to process (>= SHARD_REV) */
154 svn_revnum_t start_rev;
156 /* first revision after the range to process (<= SHARD_END_REV) */
157 svn_revnum_t end_rev;
159 /* first revision after the current shard */
160 svn_revnum_t shard_end_rev;
162 /* log-to-phys proto index for the whole pack file */
163 apr_file_t *proto_l2p_index;
165 /* phys-to-log proto index for the whole pack file */
166 apr_file_t *proto_p2l_index;
168 /* full shard directory path (containing the unpacked revisions) */
169 const char *shard_dir;
171 /* full packed shard directory path (containing the pack file + indexes) */
172 const char *pack_file_dir;
174 /* full pack file path (including PACK_FILE_DIR) */
175 const char *pack_file_path;
177 /* current write position (i.e. file length) in the pack file */
178 apr_off_t pack_offset;
180 /* the pack file to ultimately write all data to */
181 apr_file_t *pack_file;
183 /* array of svn_fs_fs__p2l_entry_t *, all referring to change lists.
184 * Will be filled in phase 2 and be cleared after each revision range. */
185 apr_array_header_t *changes;
187 /* temp file receiving all change list items (referenced by CHANGES).
188 * Will be filled in phase 2 and be cleared after each revision range. */
189 apr_file_t *changes_file;
191 /* array of svn_fs_fs__p2l_entry_t *, all referring to file properties.
192 * Will be filled in phase 2 and be cleared after each revision range. */
193 apr_array_header_t *file_props;
195 /* temp file receiving all file prop items (referenced by FILE_PROPS).
196 * Will be filled in phase 2 and be cleared after each revision range.*/
197 apr_file_t *file_props_file;
199 /* array of svn_fs_fs__p2l_entry_t *, all referring to directory properties.
200 * Will be filled in phase 2 and be cleared after each revision range. */
201 apr_array_header_t *dir_props;
203 /* temp file receiving all directory prop items (referenced by DIR_PROPS).
204 * Will be filled in phase 2 and be cleared after each revision range.*/
205 apr_file_t *dir_props_file;
207 /* container for all PATH members in PATH_ORDER. */
208 svn_prefix_tree__t *paths;
210 /* array of path_order_t *. Will be filled in phase 2 and be cleared
211 * after each revision range. Sorted by PATH, NODE_ID. */
212 apr_array_header_t *path_order;
214 /* array of reference_t* linking representations to their delta bases.
215 * Will be filled in phase 2 and be cleared after each revision range.
216 * It will be sorted by the FROM members (for rep->base rep lookup). */
217 apr_array_header_t *references;
219 /* array of svn_fs_fs__p2l_entry_t*. Will be filled in phase 2 and be
220 * cleared after each revision range. During phase 3, we will set items
221 * to NULL that we already processed. */
222 apr_array_header_t *reps;
224 /* array of int, marking for each revision, the which offset their items
225 * begin in REPS. Will be filled in phase 2 and be cleared after
226 * each revision range. */
227 apr_array_header_t *rev_offsets;
229 /* temp file receiving all items referenced by REPS.
230 * Will be filled in phase 2 and be cleared after each revision range.*/
231 apr_file_t *reps_file;
233 /* pool used for temporary data structures that will be cleaned up when
234 * the next range of revisions is being processed */
235 apr_pool_t *info_pool;
238 /* Create and initialize a new pack context for packing shard SHARD_REV in
239 * SHARD_DIR into PACK_FILE_DIR within filesystem FS. Allocate it in POOL
240 * and return the structure in *CONTEXT.
242 * Limit the number of items being copied per iteration to MAX_ITEMS.
243 * Set CANCEL_FUNC and CANCEL_BATON as well.
246 initialize_pack_context(pack_context_t *context,
248 const char *pack_file_dir,
249 const char *shard_dir,
250 svn_revnum_t shard_rev,
252 svn_cancel_func_t cancel_func,
256 fs_fs_data_t *ffd = fs->fsap_data;
257 const char *temp_dir;
258 int max_revs = MIN(ffd->max_files_per_dir, max_items);
260 SVN_ERR_ASSERT(ffd->format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT);
261 SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0);
263 /* where we will place our various temp files */
264 SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
266 /* store parameters */
268 context->cancel_func = cancel_func;
269 context->cancel_baton = cancel_baton;
271 context->shard_rev = shard_rev;
272 context->start_rev = shard_rev;
273 context->end_rev = shard_rev;
274 context->shard_end_rev = shard_rev + ffd->max_files_per_dir;
276 /* Create the new directory and pack file. */
277 context->shard_dir = shard_dir;
278 context->pack_file_dir = pack_file_dir;
279 context->pack_file_path
280 = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
281 SVN_ERR(svn_io_file_open(&context->pack_file, context->pack_file_path,
282 APR_WRITE | APR_BUFFERED | APR_BINARY | APR_EXCL
283 | APR_CREATE, APR_OS_DEFAULT, pool));
285 /* Proto index files */
286 SVN_ERR(svn_fs_fs__l2p_proto_index_open(
287 &context->proto_l2p_index,
288 svn_dirent_join(pack_file_dir,
289 PATH_INDEX PATH_EXT_L2P_INDEX,
292 SVN_ERR(svn_fs_fs__p2l_proto_index_open(
293 &context->proto_p2l_index,
294 svn_dirent_join(pack_file_dir,
295 PATH_INDEX PATH_EXT_P2L_INDEX,
299 /* item buckets: one item info array and one temp file per bucket */
300 context->changes = apr_array_make(pool, max_items,
301 sizeof(svn_fs_fs__p2l_entry_t *));
302 SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
303 svn_io_file_del_on_close, pool, pool));
304 context->file_props = apr_array_make(pool, max_items,
305 sizeof(svn_fs_fs__p2l_entry_t *));
306 SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
307 svn_io_file_del_on_close, pool, pool));
308 context->dir_props = apr_array_make(pool, max_items,
309 sizeof(svn_fs_fs__p2l_entry_t *));
310 SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
311 svn_io_file_del_on_close, pool, pool));
313 /* noderev and representation item bucket */
314 context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int));
315 context->path_order = apr_array_make(pool, max_items,
316 sizeof(path_order_t *));
317 context->references = apr_array_make(pool, max_items,
318 sizeof(reference_t *));
319 context->reps = apr_array_make(pool, max_items,
320 sizeof(svn_fs_fs__p2l_entry_t *));
321 SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
322 svn_io_file_del_on_close, pool, pool));
324 /* the pool used for temp structures */
325 context->info_pool = svn_pool_create(pool);
326 context->paths = svn_prefix_tree__create(context->info_pool);
331 /* Clean up / free all revision range specific data and files in CONTEXT.
332 * Use POOL for temporary allocations.
335 reset_pack_context(pack_context_t *context,
338 apr_array_clear(context->changes);
339 SVN_ERR(svn_io_file_trunc(context->changes_file, 0, pool));
340 apr_array_clear(context->file_props);
341 SVN_ERR(svn_io_file_trunc(context->file_props_file, 0, pool));
342 apr_array_clear(context->dir_props);
343 SVN_ERR(svn_io_file_trunc(context->dir_props_file, 0, pool));
345 apr_array_clear(context->rev_offsets);
346 apr_array_clear(context->path_order);
347 apr_array_clear(context->references);
348 apr_array_clear(context->reps);
349 SVN_ERR(svn_io_file_trunc(context->reps_file, 0, pool));
351 svn_pool_clear(context->info_pool);
356 /* Call this after the last revision range. It will finalize all index files
357 * for CONTEXT and close any open files. Use POOL for temporary allocations.
360 close_pack_context(pack_context_t *context,
363 const char *proto_l2p_index_path;
364 const char *proto_p2l_index_path;
366 /* need the file names for the actual index creation call further down */
367 SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path,
368 context->proto_l2p_index, pool));
369 SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path,
370 context->proto_p2l_index, pool));
372 /* finalize proto index files */
373 SVN_ERR(svn_io_file_close(context->proto_l2p_index, pool));
374 SVN_ERR(svn_io_file_close(context->proto_p2l_index, pool));
376 /* Append the actual index data to the pack file. */
377 SVN_ERR(svn_fs_fs__add_index_data(context->fs, context->pack_file,
378 proto_l2p_index_path,
379 proto_p2l_index_path,
383 /* remove proto index files */
384 SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, pool));
385 SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, pool));
387 /* Ensure that packed file is written to disk.*/
388 SVN_ERR(svn_io_file_flush_to_disk(context->pack_file, pool));
389 SVN_ERR(svn_io_file_close(context->pack_file, pool));
394 /* Efficiently copy SIZE bytes from SOURCE to DEST. Invoke the CANCEL_FUNC
395 * from CONTEXT at regular intervals. Use POOL for allocations.
398 copy_file_data(pack_context_t *context,
404 /* most non-representation items will be small. Minimize the buffer
405 * and infrastructure overhead in that case. */
406 enum { STACK_BUFFER_SIZE = 1024 };
408 if (size < STACK_BUFFER_SIZE)
410 /* copy small data using a fixed-size buffer on stack */
411 char buffer[STACK_BUFFER_SIZE];
412 SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size,
414 SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size,
419 /* use streaming copies for larger data blocks. That may require
420 * the allocation of larger buffers and we should make sure that
421 * this extra memory is released asap. */
422 fs_fs_data_t *ffd = context->fs->fsap_data;
423 apr_pool_t *copypool = svn_pool_create(pool);
424 char *buffer = apr_palloc(copypool, ffd->block_size);
428 apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size));
429 if (context->cancel_func)
430 SVN_ERR(context->cancel_func(context->cancel_baton));
432 SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy,
434 SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy,
440 svn_pool_destroy(copypool);
446 /* Writes SIZE bytes, all 0, to DEST. Uses POOL for allocations.
449 write_null_bytes(apr_file_t *dest,
453 /* Have a collection of high-quality, easy to access NUL bytes handy. */
454 enum { BUFFER_SIZE = 1024 };
455 static const char buffer[BUFFER_SIZE] = { 0 };
457 /* copy SIZE of them into the file's buffer */
460 apr_size_t to_write = MIN(size, BUFFER_SIZE);
461 SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL, pool));
468 /* Copy the "simple" item (changed paths list or property representation)
469 * from the current position in REV_FILE to TEMP_FILE using CONTEXT. Add
470 * a copy of ENTRY to ENTRIES but with an updated offset value that points
471 * to the copy destination in TEMP_FILE. Use POOL for allocations.
474 copy_item_to_temp(pack_context_t *context,
475 apr_array_header_t *entries,
476 apr_file_t *temp_file,
477 apr_file_t *rev_file,
478 svn_fs_fs__p2l_entry_t *entry,
481 svn_fs_fs__p2l_entry_t *new_entry
482 = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
484 SVN_ERR(svn_fs_fs__get_file_offset(&new_entry->offset, temp_file, pool));
485 APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t *) = new_entry;
487 SVN_ERR(copy_file_data(context, temp_file, rev_file, entry->size, pool));
492 /* Return the offset within CONTEXT->REPS that corresponds to item
493 * ITEM_INDEX in REVISION.
496 get_item_array_index(pack_context_t *context,
497 svn_revnum_t revision,
498 apr_int64_t item_index)
500 assert(revision >= context->start_rev);
501 return (int)item_index + APR_ARRAY_IDX(context->rev_offsets,
502 revision - context->start_rev,
506 /* Write INFO to the correct position in CONTEXT->REP_INFOS. The latter
507 * may need auto-expanding. Overwriting an array element is not allowed.
510 add_item_rep_mapping(pack_context_t *context,
511 svn_fs_fs__p2l_entry_t *entry)
516 idx = get_item_array_index(context,
517 entry->item.revision,
520 /* make sure the index exists in the array */
521 while (context->reps->nelts <= idx)
522 APR_ARRAY_PUSH(context->reps, void *) = NULL;
524 /* set the element. If there is already an entry, there are probably
525 * two items claiming to be the same -> bail out */
526 assert(!APR_ARRAY_IDX(context->reps, idx, void *));
527 APR_ARRAY_IDX(context->reps, idx, void *) = entry;
530 /* Return the P2L entry from CONTEXT->REPS for the given ID. If there is
531 * none (or not anymore), return NULL. If RESET has been specified, set
532 * the array entry to NULL after returning the entry.
534 static svn_fs_fs__p2l_entry_t *
535 get_item(pack_context_t *context,
536 const svn_fs_fs__id_part_t *id,
539 svn_fs_fs__p2l_entry_t *result = NULL;
540 if (id->number && id->revision >= context->start_rev)
542 int idx = get_item_array_index(context, id->revision, id->number);
543 if (context->reps->nelts > idx)
545 result = APR_ARRAY_IDX(context->reps, idx, void *);
547 APR_ARRAY_IDX(context->reps, idx, void *) = NULL;
554 /* Copy representation item identified by ENTRY from the current position
555 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by
556 * our placement algorithm to CONTEXT. Use POOL for temporary allocations.
559 copy_rep_to_temp(pack_context_t *context,
560 apr_file_t *rev_file,
561 svn_fs_fs__p2l_entry_t *entry,
564 svn_fs_fs__rep_header_t *rep_header;
565 svn_stream_t *stream;
566 apr_off_t source_offset = entry->offset;
568 /* create a copy of ENTRY, make it point to the copy destination and
569 * store it in CONTEXT */
570 entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
571 SVN_ERR(svn_fs_fs__get_file_offset(&entry->offset, context->reps_file, pool));
572 add_item_rep_mapping(context, entry);
574 /* read & parse the representation header */
575 stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
576 SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, stream, pool, pool));
577 svn_stream_close(stream);
579 /* if the representation is a delta against some other rep, link the two */
580 if ( rep_header->type == svn_fs_fs__rep_delta
581 && rep_header->base_revision >= context->start_rev)
583 reference_t *reference = apr_pcalloc(context->info_pool,
585 reference->from = entry->item;
586 reference->to.revision = rep_header->base_revision;
587 reference->to.number = rep_header->base_item_index;
588 APR_ARRAY_PUSH(context->references, reference_t *) = reference;
591 /* copy the whole rep (including header!) to our temp file */
592 SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool));
593 SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size,
599 /* Directories first, dirs / files sorted by name in reverse lexical order.
600 * This maximizes the chance of two items being located close to one another
601 * in *all* pack files independent of their change order. It also groups
602 * multi-project repos nicely according to their sub-projects. The reverse
603 * order aspect gives "trunk" preference over "tags" and "branches", so
604 * trunk-related items are more likely to be contiguous.
607 compare_dir_entries_format7(const svn_sort__item_t *a,
608 const svn_sort__item_t *b)
610 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
611 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
613 if (lhs->kind != rhs->kind)
614 return lhs->kind == svn_node_dir ? -1 : 1;
616 return strcmp(lhs->name, rhs->name);
619 /* Directories entries sorted by revision (decreasing - to max cache hits)
620 * and offset (increasing - to max benefit from APR file buffering).
623 compare_dir_entries_format6(const svn_sort__item_t *a,
624 const svn_sort__item_t *b)
626 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
627 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
629 const svn_fs_fs__id_part_t *lhs_rev_item
630 = svn_fs_fs__id_rev_item(lhs->id);
631 const svn_fs_fs__id_part_t *rhs_rev_item
632 = svn_fs_fs__id_rev_item(rhs->id);
634 /* decreasing ("reverse") order on revs */
635 if (lhs_rev_item->revision != rhs_rev_item->revision)
636 return lhs_rev_item->revision > rhs_rev_item->revision ? -1 : 1;
638 /* increasing order on offsets */
639 if (lhs_rev_item->number != rhs_rev_item->number)
640 return lhs_rev_item->number > rhs_rev_item->number ? 1 : -1;
646 svn_fs_fs__order_dir_entries(svn_fs_t *fs,
647 apr_hash_t *directory,
648 apr_pool_t *result_pool,
649 apr_pool_t *scratch_pool)
651 apr_array_header_t *ordered
652 = svn_sort__hash(directory,
653 svn_fs_fs__use_log_addressing(fs)
654 ? compare_dir_entries_format7
655 : compare_dir_entries_format6,
658 apr_array_header_t *result
659 = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *));
662 for (i = 0; i < ordered->nelts; ++i)
663 APR_ARRAY_PUSH(result, svn_fs_dirent_t *)
664 = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value;
669 /* Return a duplicate of the the ORIGINAL path and with special sub-strins
670 * (e.g. "trunk") modified in such a way that have a lower lexicographic
671 * value than any other "normal" file name.
674 tweak_path_for_ordering(const char *original,
677 /* We may add further special cases as needed. */
678 enum {SPECIAL_COUNT = 2};
679 static const char *special[SPECIAL_COUNT] = {"trunk", "branch"};
681 char *path = apr_pstrdup(pool, original);
684 /* Replace the first char of any "special" sub-string we find by
685 * a control char, i.e. '\1' .. '\31'. In the rare event that this
686 * would clash with existing paths, no data will be lost but merely
687 * the node ordering will be sub-optimal.
689 for (i = 0; i < SPECIAL_COUNT; ++i)
690 for (pos = strstr(path, special[i]);
692 pos = strstr(pos + 1, special[i]))
694 *pos = (char)(i + '\1');
700 /* Copy node revision item identified by ENTRY from the current position
701 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by
702 * our placement algorithm to CONTEXT. Use POOL for temporary allocations.
705 copy_node_to_temp(pack_context_t *context,
706 apr_file_t *rev_file,
707 svn_fs_fs__p2l_entry_t *entry,
710 path_order_t *path_order = apr_pcalloc(context->info_pool,
711 sizeof(*path_order));
712 node_revision_t *noderev;
713 const char *sort_path;
714 svn_stream_t *stream;
715 apr_off_t source_offset = entry->offset;
717 /* read & parse noderev */
718 stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
719 SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, pool, pool));
720 svn_stream_close(stream);
722 /* create a copy of ENTRY, make it point to the copy destination and
723 * store it in CONTEXT */
724 entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
725 SVN_ERR(svn_fs_fs__get_file_offset(&entry->offset, context->reps_file,
727 add_item_rep_mapping(context, entry);
729 /* copy the noderev to our temp file */
730 SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool));
731 SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size,
734 /* if the node has a data representation, make that the node's "base".
735 * This will (often) cause the noderev to be placed right in front of
736 * its data representation. */
738 if (noderev->data_rep && noderev->data_rep->revision >= context->start_rev)
740 path_order->rep_id.revision = noderev->data_rep->revision;
741 path_order->rep_id.number = noderev->data_rep->item_index;
742 path_order->expanded_size = noderev->data_rep->expanded_size
743 ? noderev->data_rep->expanded_size
744 : noderev->data_rep->size;
747 /* Sort path is the key used for ordering noderevs and associated reps.
748 * It will not be stored in the final pack file. */
749 sort_path = tweak_path_for_ordering(noderev->created_path, pool);
750 path_order->path = svn_prefix_string__create(context->paths, sort_path);
751 path_order->node_id = *svn_fs_fs__id_node_id(noderev->id);
752 path_order->revision = svn_fs_fs__id_rev(noderev->id);
753 path_order->predecessor_count = noderev->predecessor_count;
754 path_order->is_dir = noderev->kind == svn_node_dir;
755 path_order->noderev_id = *svn_fs_fs__id_rev_item(noderev->id);
756 APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order;
761 /* implements compare_fn_t. Bring all directories in front of the files
762 and sort descendingly by PATH, NODE_ID and REVISION.
765 compare_path_order(const path_order_t * const * lhs_p,
766 const path_order_t * const * rhs_p)
768 const path_order_t * lhs = *lhs_p;
769 const path_order_t * rhs = *rhs_p;
771 /* cluster all directories */
772 int diff = rhs->is_dir - lhs->is_dir;
776 /* lexicographic order on path and node (i.e. latest first) */
777 diff = svn_prefix_string__compare(lhs->path, rhs->path);
781 /* reverse order on node (i.e. latest first) */
782 diff = svn_fs_fs__id_part_compare(&rhs->node_id, &lhs->node_id);
786 /* reverse order on revision (i.e. latest first) */
787 if (lhs->revision != rhs->revision)
788 return lhs->revision < rhs->revision ? 1 : -1;
793 /* implements compare_fn_t. Sort ascendingly by FROM, TO.
796 compare_references(const reference_t * const * lhs_p,
797 const reference_t * const * rhs_p)
799 const reference_t * lhs = *lhs_p;
800 const reference_t * rhs = *rhs_p;
802 int diff = svn_fs_fs__id_part_compare(&lhs->from, &rhs->from);
803 return diff ? diff : svn_fs_fs__id_part_compare(&lhs->to, &rhs->to);
806 /* implements compare_fn_t. Assume ascending order by FROM.
809 compare_ref_to_item(const reference_t * const * lhs_p,
810 const svn_fs_fs__id_part_t * rhs_p)
812 return svn_fs_fs__id_part_compare(&(*lhs_p)->from, rhs_p);
815 /* implements compare_fn_t. Finds the DIR / FILE boundary.
818 compare_is_dir(const path_order_t * const * lhs_p,
821 return (*lhs_p)->is_dir ? -1 : 0;
824 /* Look for the least significant bit set in VALUE and return the smallest
825 * number with the same property, i.e. the largest power of 2 that is a
826 * factor in VALUE. */
830 return value ? value - (value & (value - 1)) : INT_MAX;
833 /* Order a range of data collected in CONTEXT such that we can place them
834 * in the desired order. The input is taken from *PATH_ORDER, offsets FIRST
835 * to LAST and then written in the final order to the same range in *TEMP.
838 sort_reps_range(pack_context_t *context,
839 const path_order_t **path_order,
840 const path_order_t **temp,
844 const svn_prefix_string__t *path;
846 svn_fs_fs__id_part_t rep_id;
847 fs_fs_data_t *ffd = context->fs->fsap_data;
849 /* The logic below would fail for empty ranges. */
853 /* Re-order noderevs like this:
855 * (1) Most likely to be referenced by future pack files, in path order.
856 * (2) highest revision rep per path + dependency chain
857 * (3) Remaining reps in path, rev order
859 * We simply pick & chose from the existing path, rev order.
862 path = path_order[first]->path;
865 /* (1) For each path, pick the "roundest" representation and put it in
866 * front of all other nodes in the pack file. The "roundest" rep is
867 * the one most likely to be referenced from future pack files, i.e. we
868 * concentrate those potential "foreign link targets" in one section of
871 * And we only apply this to reps outside the linear deltification
872 * sections because references *into* linear deltification ranges are
875 for (i = first; i < last; ++i)
877 /* Investigated all nodes for the current path? */
878 if (svn_prefix_string__compare(path, path_order[i]->path))
881 path = path_order[i]->path;
883 /* Pick roundest non-linear deltified node. */
884 if (roundness(path_order[best]->predecessor_count)
885 >= ffd->max_linear_deltification)
887 temp[dest++] = path_order[best];
888 path_order[best] = NULL;
894 if ( roundness(path_order[best]->predecessor_count)
895 < roundness(path_order[i]->predecessor_count))
899 /* Treat the last path the same as all others. */
900 if (roundness(path_order[best]->predecessor_count)
901 >= ffd->max_linear_deltification)
903 temp[dest++] = path_order[best];
904 path_order[best] = NULL;
907 /* (2) For each (remaining) path, pick the nodes along the delta chain
908 * for the highest revision. Due to our ordering, this is the first
909 * node we encounter for any path.
911 * Most references that don't hit a delta base picked in (1), will
912 * access HEAD of the respective path. Keeping all its dependency chain
913 * in one place turns reconstruction into a linear scan of minimal length.
915 for (i = first; i < last; ++i)
918 /* This is the first path we still have to handle. */
919 path = path_order[i]->path;
920 rep_id = path_order[i]->rep_id;
924 for (i = first; i < last; ++i)
928 if (svn_prefix_string__compare(path, path_order[i]->path))
930 path = path_order[i]->path;
931 rep_id = path_order[i]->rep_id;
934 /* Pick nodes along the deltification chain. Skip side-branches. */
935 if (svn_fs_fs__id_part_eq(&path_order[i]->rep_id, &rep_id))
937 reference_t **reference;
939 temp[dest++] = path_order[i];
940 path_order[i] = NULL;
942 reference = svn_sort__array_lookup(context->references,
944 (int (*)(const void *, const void *))compare_ref_to_item);
946 rep_id = (*reference)->to;
950 /* (3) All remaining nodes in path, rev order. Linear deltification
951 * makes HEAD delta chains from (2) cover all or most of their deltas
952 * in a given pack file. So, this is just a few remnants that we put
953 * at the end of the pack file.
955 for (i = first; i < last; ++i)
957 temp[dest++] = path_order[i];
959 /* We now know the final ordering. */
960 assert(dest == last);
963 /* Order the data collected in CONTEXT such that we can place them in the
967 sort_reps(pack_context_t *context)
969 apr_pool_t *temp_pool;
970 const path_order_t **temp, **path_order;
971 int i, count, dir_count;
973 /* We will later assume that there is at least one node / path.
975 if (context->path_order->nelts == 0)
977 assert(context->references->nelts == 0);
981 /* Sort containers by path and IDs, respectively.
983 svn_sort__array(context->path_order,
984 (int (*)(const void *, const void *))compare_path_order);
985 svn_sort__array(context->references,
986 (int (*)(const void *, const void *))compare_references);
988 /* Directories are already in front; sort directories section and files
989 * section separately but use the same heuristics (see sub-function).
991 temp_pool = svn_pool_create(context->info_pool);
992 count = context->path_order->nelts;
993 temp = apr_pcalloc(temp_pool, count * sizeof(*temp));
994 path_order = (void *)context->path_order->elts;
996 /* Find the boundary between DIR and FILE section. */
997 dir_count = svn_sort__bsearch_lower_bound(context->path_order, NULL,
998 (int (*)(const void *, const void *))compare_is_dir);
1000 /* Sort those sub-sections separately. */
1001 sort_reps_range(context, path_order, temp, 0, dir_count);
1002 sort_reps_range(context, path_order, temp, dir_count, count);
1004 /* We now know the final ordering. */
1005 for (i = 0; i < count; ++i)
1006 path_order[i] = temp[i];
1008 svn_pool_destroy(temp_pool);
1011 /* implements compare_fn_t. Place LHS before RHS, if the latter is older.
1014 compare_p2l_info(const svn_fs_fs__p2l_entry_t * const * lhs,
1015 const svn_fs_fs__p2l_entry_t * const * rhs)
1017 assert(*lhs != *rhs);
1019 if ((*lhs)->item.revision == (*rhs)->item.revision)
1020 return (*lhs)->item.number > (*rhs)->item.number ? -1 : 1;
1022 return (*lhs)->item.revision > (*rhs)->item.revision ? -1 : 1;
1025 /* Sort svn_fs_fs__p2l_entry_t * array ENTRIES by age. Place the latest
1029 sort_items(apr_array_header_t *entries)
1031 svn_sort__array(entries,
1032 (int (*)(const void *, const void *))compare_p2l_info);
1035 /* Return the remaining unused bytes in the current block in CONTEXT's
1039 get_block_left(pack_context_t *context)
1041 fs_fs_data_t *ffd = context->fs->fsap_data;
1042 return ffd->block_size - (context->pack_offset % ffd->block_size);
1045 /* To prevent items from overlapping a block boundary, we will usually
1046 * put them into the next block and top up the old one with NUL bytes.
1047 * Pad CONTEXT's pack file to the end of the current block, if TO_ADD does
1048 * not fit into the current block and the padding is short enough.
1049 * Use POOL for allocations.
1051 static svn_error_t *
1052 auto_pad_block(pack_context_t *context,
1056 fs_fs_data_t *ffd = context->fs->fsap_data;
1058 /* This is the maximum number of bytes "wasted" that way per block.
1059 * Larger items will cross the block boundaries. */
1060 const apr_off_t max_padding = MAX(ffd->block_size / 50, 512);
1062 /* Is wasted space small enough to align the current item to the next
1064 apr_off_t padding = get_block_left(context);
1066 if (padding < to_add && padding < max_padding)
1068 /* Yes. To up with NUL bytes and don't forget to create
1069 * an P2L index entry marking this section as unused. */
1070 svn_fs_fs__p2l_entry_t null_entry;
1072 null_entry.offset = context->pack_offset;
1073 null_entry.size = padding;
1074 null_entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED;
1075 null_entry.item.revision = SVN_INVALID_REVNUM;
1076 null_entry.item.number = SVN_FS_FS__ITEM_INDEX_UNUSED;
1077 null_entry.fnv1_checksum = 0;
1079 SVN_ERR(write_null_bytes(context->pack_file, padding, pool));
1080 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
1081 context->proto_p2l_index, &null_entry, pool));
1082 context->pack_offset += padding;
1085 return SVN_NO_ERROR;
1088 /* Read the contents of ITEM, if not empty, from TEMP_FILE and write it
1089 * to CONTEXT->PACK_FILE. Use POOL for allocations.
1091 static svn_error_t *
1092 store_item(pack_context_t *context,
1093 apr_file_t *temp_file,
1094 svn_fs_fs__p2l_entry_t *item,
1097 apr_off_t safety_margin;
1099 /* skip empty entries */
1100 if (item->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
1101 return SVN_NO_ERROR;
1103 /* If the next item does not fit into the current block, auto-pad it.
1104 Take special care of textual noderevs since their parsers may
1105 prefetch up to 80 bytes and we don't want them to cross block
1107 safety_margin = item->type == SVN_FS_FS__ITEM_TYPE_NODEREV
1108 ? SVN__LINE_CHUNK_SIZE
1110 SVN_ERR(auto_pad_block(context, item->size + safety_margin, pool));
1112 /* select the item in the source file and copy it into the target
1114 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &item->offset, pool));
1115 SVN_ERR(copy_file_data(context, context->pack_file, temp_file,
1118 /* write index entry and update current position */
1119 item->offset = context->pack_offset;
1120 context->pack_offset += item->size;
1122 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(context->proto_p2l_index,
1125 APR_ARRAY_PUSH(context->reps, svn_fs_fs__p2l_entry_t *) = item;
1127 return SVN_NO_ERROR;
1130 /* Read the contents of the non-empty items in ITEMS from TEMP_FILE and
1131 * write them to CONTEXT->PACK_FILE. Use POOL for allocations.
1133 static svn_error_t *
1134 store_items(pack_context_t *context,
1135 apr_file_t *temp_file,
1136 apr_array_header_t *items,
1140 apr_pool_t *iterpool = svn_pool_create(pool);
1142 /* copy all items in strict order */
1143 for (i = 0; i < items->nelts; ++i)
1145 svn_pool_clear(iterpool);
1146 SVN_ERR(store_item(context, temp_file,
1147 APR_ARRAY_IDX(items, i, svn_fs_fs__p2l_entry_t *),
1151 svn_pool_destroy(iterpool);
1153 return SVN_NO_ERROR;
1156 /* Copy (append) the items identified by svn_fs_fs__p2l_entry_t * elements
1157 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE.
1158 * Use POOL for temporary allocations.
1160 static svn_error_t *
1161 copy_reps_from_temp(pack_context_t *context,
1162 apr_file_t *temp_file,
1165 apr_pool_t *iterpool = svn_pool_create(pool);
1166 apr_array_header_t *path_order = context->path_order;
1169 /* copy items in path order. */
1170 for (i = 0; i < path_order->nelts; ++i)
1172 path_order_t *current_path;
1173 svn_fs_fs__p2l_entry_t *node_part;
1174 svn_fs_fs__p2l_entry_t *rep_part;
1176 svn_pool_clear(iterpool);
1178 current_path = APR_ARRAY_IDX(path_order, i, path_order_t *);
1179 node_part = get_item(context, ¤t_path->noderev_id, TRUE);
1180 rep_part = get_item(context, ¤t_path->rep_id, TRUE);
1183 SVN_ERR(store_item(context, temp_file, node_part, iterpool));
1185 SVN_ERR(store_item(context, temp_file, rep_part, iterpool));
1188 svn_pool_destroy(iterpool);
1190 return SVN_NO_ERROR;
1193 /* implements compare_fn_t. Place LHS before RHS, if the latter belongs to
1197 compare_p2l_info_rev(const svn_fs_fs__p2l_entry_t * const * lhs_p,
1198 const svn_fs_fs__p2l_entry_t * const * rhs_p)
1200 const svn_fs_fs__p2l_entry_t * lhs = *lhs_p;
1201 const svn_fs_fs__p2l_entry_t * rhs = *rhs_p;
1203 if (lhs->item.revision == rhs->item.revision)
1206 return lhs->item.revision < rhs->item.revision ? -1 : 1;
1209 /* Write the log-to-phys proto index file for CONTEXT and use POOL for
1210 * temporary allocations. All items in all buckets must have been placed
1213 static svn_error_t *
1214 write_l2p_index(pack_context_t *context,
1217 apr_pool_t *iterpool = svn_pool_create(pool);
1218 svn_revnum_t prev_rev = SVN_INVALID_REVNUM;
1221 /* eliminate empty entries from CONTEXT->REPS */
1222 for (i = 0, dest = 0; i < context->reps->nelts; ++i)
1224 svn_fs_fs__p2l_entry_t *entry
1225 = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *);
1227 APR_ARRAY_IDX(context->reps, dest++, svn_fs_fs__p2l_entry_t *)
1230 context->reps->nelts = dest;
1232 /* we need to write the l2p index revision by revision */
1233 svn_sort__array(context->reps,
1234 (int (*)(const void *, const void *))compare_p2l_info_rev);
1236 /* write index entries */
1237 for (i = 0; i < context->reps->nelts; ++i)
1239 svn_fs_fs__p2l_entry_t *p2l_entry
1240 = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *);
1241 if (p2l_entry == NULL)
1244 /* next revision? */
1245 if (prev_rev != p2l_entry->item.revision)
1247 prev_rev = p2l_entry->item.revision;
1248 SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(
1249 context->proto_l2p_index, iterpool));
1253 SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(context->proto_l2p_index,
1255 p2l_entry->item.number,
1258 /* keep memory usage in check */
1260 svn_pool_clear(iterpool);
1263 svn_pool_destroy(iterpool);
1265 return SVN_NO_ERROR;
1268 /* Pack the current revision range of CONTEXT, i.e. this covers phases 2
1269 * to 4. Use POOL for allocations.
1271 static svn_error_t *
1272 pack_range(pack_context_t *context,
1275 fs_fs_data_t *ffd = context->fs->fsap_data;
1276 apr_pool_t *revpool = svn_pool_create(pool);
1277 apr_pool_t *iterpool = svn_pool_create(pool);
1278 apr_pool_t *iterpool2 = svn_pool_create(pool);
1280 /* Phase 2: Copy items into various buckets and build tracking info */
1281 svn_revnum_t revision;
1282 for (revision = context->start_rev; revision < context->end_rev; ++revision)
1284 apr_off_t offset = 0;
1285 svn_fs_fs__revision_file_t *rev_file;
1287 svn_pool_clear(revpool);
1289 /* Get the rev file dimensions (mainly index locations). */
1290 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs,
1291 revision, revpool, iterpool));
1292 SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
1294 /* store the indirect array index */
1295 APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts;
1297 /* read the phys-to-log index file until we covered the whole rev file.
1298 * That index contains enough info to build both target indexes from it. */
1299 while (offset < rev_file->l2p_offset)
1301 /* read one cluster */
1303 apr_array_header_t *entries;
1305 svn_pool_clear(iterpool);
1307 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs,
1308 rev_file, revision, offset,
1309 ffd->p2l_page_size, iterpool,
1312 for (i = 0; i < entries->nelts; ++i)
1314 svn_fs_fs__p2l_entry_t *entry
1315 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
1317 /* skip first entry if that was duplicated due crossing a
1319 if (offset > entry->offset)
1322 svn_pool_clear(iterpool2);
1324 /* process entry while inside the rev file */
1325 offset = entry->offset;
1326 if (offset < rev_file->l2p_offset)
1328 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset,
1331 if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES)
1332 SVN_ERR(copy_item_to_temp(context,
1334 context->changes_file,
1335 rev_file->file, entry,
1337 else if (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS)
1338 SVN_ERR(copy_item_to_temp(context,
1339 context->file_props,
1340 context->file_props_file,
1341 rev_file->file, entry,
1343 else if (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
1344 SVN_ERR(copy_item_to_temp(context,
1346 context->dir_props_file,
1347 rev_file->file, entry,
1349 else if ( entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP
1350 || entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP)
1351 SVN_ERR(copy_rep_to_temp(context, rev_file->file, entry,
1353 else if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV)
1354 SVN_ERR(copy_node_to_temp(context, rev_file->file, entry,
1357 SVN_ERR_ASSERT(entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED);
1359 offset += entry->size;
1363 if (context->cancel_func)
1364 SVN_ERR(context->cancel_func(context->cancel_baton));
1368 svn_pool_destroy(iterpool2);
1369 svn_pool_destroy(iterpool);
1371 /* phase 3: placement.
1372 * Use "newest first" placement for simple items. */
1373 sort_items(context->changes);
1374 sort_items(context->file_props);
1375 sort_items(context->dir_props);
1377 /* follow dependencies recursively for noderevs and data representations */
1380 /* phase 4: copy bucket data to pack file. Write P2L index. */
1381 SVN_ERR(store_items(context, context->changes_file, context->changes,
1383 svn_pool_clear(revpool);
1384 SVN_ERR(store_items(context, context->file_props_file, context->file_props,
1386 svn_pool_clear(revpool);
1387 SVN_ERR(store_items(context, context->dir_props_file, context->dir_props,
1389 svn_pool_clear(revpool);
1390 SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool));
1391 svn_pool_clear(revpool);
1393 /* write L2P index as well (now that we know all target offsets) */
1394 SVN_ERR(write_l2p_index(context, revpool));
1396 svn_pool_destroy(revpool);
1398 return SVN_NO_ERROR;
1401 /* Append CONTEXT->START_REV to the context's pack file with no re-ordering.
1402 * This function will only be used for very large revisions (>>100k changes).
1403 * Use POOL for temporary allocations.
1405 static svn_error_t *
1406 append_revision(pack_context_t *context,
1409 fs_fs_data_t *ffd = context->fs->fsap_data;
1410 apr_off_t offset = 0;
1411 apr_pool_t *iterpool = svn_pool_create(pool);
1412 svn_fs_fs__revision_file_t *rev_file;
1415 /* Get the size of the file. */
1416 const char *path = svn_dirent_join(context->shard_dir,
1417 apr_psprintf(iterpool, "%ld",
1418 context->start_rev),
1420 SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, pool));
1422 /* Copy all the bits from the rev file to the end of the pack file. */
1423 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs,
1424 context->start_rev, pool,
1426 SVN_ERR(copy_file_data(context, context->pack_file, rev_file->file,
1427 finfo.size, iterpool));
1429 /* mark the start of a new revision */
1430 SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(context->proto_l2p_index,
1433 /* read the phys-to-log index file until we covered the whole rev file.
1434 * That index contains enough info to build both target indexes from it. */
1435 while (offset < finfo.size)
1437 /* read one cluster */
1439 apr_array_header_t *entries;
1441 svn_pool_clear(iterpool);
1442 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs, rev_file,
1443 context->start_rev, offset,
1444 ffd->p2l_page_size, iterpool,
1447 for (i = 0; i < entries->nelts; ++i)
1449 svn_fs_fs__p2l_entry_t *entry
1450 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
1452 /* skip first entry if that was duplicated due crossing a
1454 if (offset > entry->offset)
1457 /* process entry while inside the rev file */
1458 offset = entry->offset;
1459 if (offset < finfo.size)
1461 entry->offset += context->pack_offset;
1462 offset += entry->size;
1463 SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(
1464 context->proto_l2p_index, entry->offset,
1465 entry->item.number, iterpool));
1466 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
1467 context->proto_p2l_index, entry, iterpool));
1472 svn_pool_destroy(iterpool);
1473 context->pack_offset += finfo.size;
1475 SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
1477 return SVN_NO_ERROR;
1480 /* Logical addressing mode packing logic.
1482 * Pack the revision shard starting at SHARD_REV in filesystem FS from
1483 * SHARD_DIR into the PACK_FILE_DIR, using POOL for allocations. Limit
1484 * the extra memory consumption to MAX_MEM bytes. CANCEL_FUNC and
1485 * CANCEL_BATON are what you think they are.
1487 static svn_error_t *
1488 pack_log_addressed(svn_fs_t *fs,
1489 const char *pack_file_dir,
1490 const char *shard_dir,
1491 svn_revnum_t shard_rev,
1493 svn_cancel_func_t cancel_func,
1499 /* estimated amount of memory used to represent one item in memory
1500 * during rev file packing */
1501 PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t))
1502 + APR_ALIGN_DEFAULT(2 *sizeof(void*))
1503 + APR_ALIGN_DEFAULT(sizeof(reference_t))
1504 + APR_ALIGN_DEFAULT(sizeof(svn_fs_fs__p2l_entry_t))
1509 apr_array_header_t *max_ids;
1510 pack_context_t context = { 0 };
1512 apr_size_t item_count = 0;
1513 apr_pool_t *iterpool = svn_pool_create(pool);
1515 /* Prevent integer overflow. We use apr arrays to process the items so
1516 * the maximum number of items is INT_MAX. */
1518 apr_size_t temp = max_mem / PER_ITEM_MEM;
1519 SVN_ERR_ASSERT(temp <= INT_MAX);
1520 max_items = (int)temp;
1523 /* set up a pack context */
1524 SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir,
1525 shard_rev, max_items, cancel_func,
1526 cancel_baton, pool));
1528 /* phase 1: determine the size of the revisions to pack */
1529 SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, shard_rev,
1530 context.shard_end_rev - shard_rev,
1533 /* pack revisions in ranges that don't exceed MAX_MEM */
1534 for (i = 0; i < max_ids->nelts; ++i)
1535 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) + item_count <= max_items)
1541 svn_pool_clear(iterpool);
1543 /* some unpacked revisions before this one? */
1544 if (context.start_rev < context.end_rev)
1546 /* pack them intelligently (might be just 1 rev but still ...) */
1547 SVN_ERR(pack_range(&context, iterpool));
1548 SVN_ERR(reset_pack_context(&context, iterpool));
1552 /* next revision range is to start with the current revision */
1553 context.start_rev = i + context.shard_rev;
1554 context.end_rev = context.start_rev + 1;
1556 /* if this is a very large revision, we must place it as is */
1557 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items)
1559 SVN_ERR(append_revision(&context, iterpool));
1560 context.start_rev++;
1563 item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1566 /* non-empty revision range at the end? */
1567 if (context.start_rev < context.end_rev)
1568 SVN_ERR(pack_range(&context, iterpool));
1570 /* last phase: finalize indexes and clean up */
1571 SVN_ERR(reset_pack_context(&context, iterpool));
1572 SVN_ERR(close_pack_context(&context, iterpool));
1573 svn_pool_destroy(iterpool);
1575 return SVN_NO_ERROR;
1578 /* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file.
1579 Use POOL for temporary allocations. */
1581 svn_fs_fs__get_packed_offset(apr_off_t *rev_offset,
1586 fs_fs_data_t *ffd = fs->fsap_data;
1587 svn_stream_t *manifest_stream;
1588 svn_boolean_t is_cached;
1590 apr_int64_t shard_pos;
1591 apr_array_header_t *manifest;
1592 apr_pool_t *iterpool;
1594 shard = rev / ffd->max_files_per_dir;
1596 /* position of the shard within the manifest */
1597 shard_pos = rev % ffd->max_files_per_dir;
1599 /* fetch exactly that element into *rev_offset, if the manifest is found
1601 SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached,
1602 ffd->packed_offset_cache, &shard,
1603 svn_fs_fs__get_sharded_offset, &shard_pos,
1607 return SVN_NO_ERROR;
1609 /* Open the manifest file. */
1610 SVN_ERR(svn_stream_open_readonly(&manifest_stream,
1611 svn_fs_fs__path_rev_packed(fs, rev,
1616 /* While we're here, let's just read the entire manifest file into an array,
1617 so we can cache the entire thing. */
1618 iterpool = svn_pool_create(pool);
1619 manifest = apr_array_make(pool, ffd->max_files_per_dir, sizeof(apr_off_t));
1625 svn_pool_clear(iterpool);
1626 SVN_ERR(svn_fs_fs__read_number_from_stream(&val, &eof, manifest_stream,
1631 APR_ARRAY_PUSH(manifest, apr_off_t) = (apr_off_t)val;
1633 svn_pool_destroy(iterpool);
1635 *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir,
1638 /* Close up shop and cache the array. */
1639 SVN_ERR(svn_stream_close(manifest_stream));
1640 return svn_cache__set(ffd->packed_offset_cache, &shard, manifest, pool);
1643 /* Packing logic for physical addresssing mode:
1644 * Simply concatenate all revision contents.
1646 * Pack the revision shard starting at SHARD_REV containing exactly
1647 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
1648 * using POOL for allocations. CANCEL_FUNC and CANCEL_BATON are what you
1651 static svn_error_t *
1652 pack_phys_addressed(const char *pack_file_dir,
1653 const char *shard_path,
1654 svn_revnum_t start_rev,
1655 int max_files_per_dir,
1656 svn_cancel_func_t cancel_func,
1660 const char *pack_file_path, *manifest_file_path;
1661 apr_file_t *pack_file;
1662 apr_file_t *manifest_file;
1663 svn_stream_t *manifest_stream;
1664 svn_revnum_t end_rev, rev;
1665 apr_off_t next_offset;
1666 apr_pool_t *iterpool;
1668 /* Some useful paths. */
1669 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
1670 manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST, pool);
1672 /* Create the new directory and pack file.
1673 * Use unbuffered apr_file_t since we're going to write using 16kb
1675 SVN_ERR(svn_io_file_open(&pack_file, pack_file_path,
1676 APR_WRITE | APR_CREATE | APR_EXCL,
1677 APR_OS_DEFAULT, pool));
1679 /* Create the manifest file. */
1680 SVN_ERR(svn_io_file_open(&manifest_file, manifest_file_path,
1681 APR_WRITE | APR_BUFFERED | APR_CREATE | APR_EXCL,
1682 APR_OS_DEFAULT, pool));
1683 manifest_stream = svn_stream_from_aprfile2(manifest_file, TRUE, pool);
1685 end_rev = start_rev + max_files_per_dir - 1;
1687 iterpool = svn_pool_create(pool);
1689 /* Iterate over the revisions in this shard, squashing them together. */
1690 for (rev = start_rev; rev <= end_rev; rev++)
1692 svn_stream_t *rev_stream;
1696 svn_pool_clear(iterpool);
1698 /* Get the size of the file. */
1699 path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev),
1701 SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, iterpool));
1703 /* build manifest */
1704 SVN_ERR(svn_stream_printf(manifest_stream, iterpool,
1705 "%" APR_OFF_T_FMT "\n", next_offset));
1706 next_offset += finfo.size;
1708 /* Copy all the bits from the rev file to the end of the pack file. */
1709 SVN_ERR(svn_stream_open_readonly(&rev_stream, path, iterpool, iterpool));
1710 SVN_ERR(svn_stream_copy3(rev_stream,
1711 svn_stream_from_aprfile2(pack_file, TRUE, pool),
1712 cancel_func, cancel_baton, iterpool));
1715 /* Close stream over APR file. */
1716 SVN_ERR(svn_stream_close(manifest_stream));
1718 /* Ensure that pack file is written to disk. */
1719 SVN_ERR(svn_io_file_flush_to_disk(manifest_file, pool));
1720 SVN_ERR(svn_io_file_close(manifest_file, pool));
1722 /* disallow write access to the manifest file */
1723 SVN_ERR(svn_io_set_file_read_only(manifest_file_path, FALSE, iterpool));
1725 /* Ensure that pack file is written to disk. */
1726 SVN_ERR(svn_io_file_flush_to_disk(pack_file, pool));
1727 SVN_ERR(svn_io_file_close(pack_file, pool));
1729 svn_pool_destroy(iterpool);
1731 return SVN_NO_ERROR;
1734 /* In filesystem FS, pack the revision SHARD containing exactly
1735 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
1736 * using POOL for allocations. Try to limit the amount of temporary
1737 * memory needed to MAX_MEM bytes. CANCEL_FUNC and CANCEL_BATON are what
1738 * you think they are.
1740 * If for some reason we detect a partial packing already performed, we
1741 * remove the pack file and start again.
1743 * The actual packing will be done in a format-specific sub-function.
1745 static svn_error_t *
1746 pack_rev_shard(svn_fs_t *fs,
1747 const char *pack_file_dir,
1748 const char *shard_path,
1750 int max_files_per_dir,
1752 svn_cancel_func_t cancel_func,
1756 const char *pack_file_path;
1757 svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir);
1759 /* Some useful paths. */
1760 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
1762 /* Remove any existing pack file for this shard, since it is incomplete. */
1763 SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton,
1766 /* Create the new directory and pack file. */
1767 SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, pool));
1769 /* Index information files */
1770 if (svn_fs_fs__use_log_addressing(fs))
1771 SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev,
1772 max_mem, cancel_func, cancel_baton, pool));
1774 SVN_ERR(pack_phys_addressed(pack_file_dir, shard_path, shard_rev,
1775 max_files_per_dir, cancel_func,
1776 cancel_baton, pool));
1778 SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, pool));
1779 SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, pool));
1781 return SVN_NO_ERROR;
1784 /* Baton struct used by pack_body(), pack_shard() and synced_pack_shard().
1785 These calls are nested and for every level additional fields will be
1789 /* Valid when entering pack_body(). */
1791 svn_fs_pack_notify_t notify_func;
1793 svn_cancel_func_t cancel_func;
1796 /* Additional entries valid when entering pack_shard(). */
1797 const char *revs_dir;
1798 const char *revsprops_dir;
1801 /* Additional entries valid when entering synced_pack_shard(). */
1802 const char *rev_shard_path;
1806 /* Part of the pack process that requires global (write) synchronization.
1807 * We pack the revision properties of the shard described by BATON and
1808 * In the file system at FS_PATH, pack the SHARD in REVS_DIR and replace
1809 * the non-packed revprop & rev shard folder(s) with the packed ones.
1810 * The packed rev folder has been created prior to calling this function.
1812 static svn_error_t *
1813 synced_pack_shard(void *baton,
1816 struct pack_baton *pb = baton;
1817 fs_fs_data_t *ffd = pb->fs->fsap_data;
1818 const char *revprops_shard_path, *revprops_pack_file_dir;
1820 /* if enabled, pack the revprops in an equivalent way */
1821 if (pb->revsprops_dir)
1823 revprops_pack_file_dir = svn_dirent_join(pb->revsprops_dir,
1825 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
1828 revprops_shard_path = svn_dirent_join(pb->revsprops_dir,
1829 apr_psprintf(pool, "%" APR_INT64_T_FMT, pb->shard),
1832 SVN_ERR(svn_fs_fs__pack_revprops_shard(revprops_pack_file_dir,
1833 revprops_shard_path,
1835 ffd->max_files_per_dir,
1836 (int)(0.9*ffd->revprop_pack_size),
1837 ffd->compress_packed_revprops
1838 ? SVN__COMPRESSION_ZLIB_DEFAULT
1839 : SVN__COMPRESSION_NONE,
1845 /* Update the min-unpacked-rev file to reflect our newly packed shard. */
1846 SVN_ERR(svn_fs_fs__write_min_unpacked_rev(pb->fs,
1847 (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir),
1849 ffd->min_unpacked_rev
1850 = (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir);
1852 /* Finally, remove the existing shard directories.
1853 * For revprops, clean up older obsolete shards as well as they might
1854 * have been left over from an interrupted FS upgrade. */
1855 SVN_ERR(svn_io_remove_dir2(pb->rev_shard_path, TRUE,
1856 pb->cancel_func, pb->cancel_baton, pool));
1857 if (pb->revsprops_dir)
1859 svn_node_kind_t kind = svn_node_dir;
1860 apr_int64_t to_cleanup = pb->shard;
1863 SVN_ERR(svn_fs_fs__delete_revprops_shard(revprops_shard_path,
1865 ffd->max_files_per_dir,
1870 /* If the previous shard exists, clean it up as well.
1871 Don't try to clean up shard 0 as it we can't tell quickly
1872 whether it actually needs cleaning up. */
1873 revprops_shard_path = svn_dirent_join(pb->revsprops_dir,
1874 apr_psprintf(pool, "%" APR_INT64_T_FMT, --to_cleanup),
1876 SVN_ERR(svn_io_check_path(revprops_shard_path, &kind, pool));
1878 while (kind == svn_node_dir && to_cleanup > 0);
1881 return SVN_NO_ERROR;
1884 /* Pack the shard described by BATON.
1886 * If for some reason we detect a partial packing already performed,
1887 * we remove the pack file and start again.
1889 static svn_error_t *
1890 pack_shard(struct pack_baton *baton,
1893 fs_fs_data_t *ffd = baton->fs->fsap_data;
1894 const char *rev_pack_file_dir;
1896 /* Notify caller we're starting to pack this shard. */
1897 if (baton->notify_func)
1898 SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard,
1899 svn_fs_pack_notify_start, pool));
1901 /* Some useful paths. */
1902 rev_pack_file_dir = svn_dirent_join(baton->revs_dir,
1904 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
1907 baton->rev_shard_path = svn_dirent_join(baton->revs_dir,
1909 "%" APR_INT64_T_FMT,
1913 /* pack the revision content */
1914 SVN_ERR(pack_rev_shard(baton->fs, rev_pack_file_dir, baton->rev_shard_path,
1915 baton->shard, ffd->max_files_per_dir,
1916 DEFAULT_MAX_MEM, baton->cancel_func,
1917 baton->cancel_baton, pool));
1919 /* For newer repo formats, we only acquired the pack lock so far.
1920 Before modifying the repo state by switching over to the packed
1921 data, we need to acquire the global (write) lock. */
1922 if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT)
1923 SVN_ERR(svn_fs_fs__with_write_lock(baton->fs, synced_pack_shard, baton,
1926 SVN_ERR(synced_pack_shard(baton, pool));
1928 /* Notify caller we're starting to pack this shard. */
1929 if (baton->notify_func)
1930 SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard,
1931 svn_fs_pack_notify_end, pool));
1933 return SVN_NO_ERROR;
1936 /* The work-horse for svn_fs_fs__pack, called with the FS write lock.
1937 This implements the svn_fs_fs__with_write_lock() 'body' callback
1938 type. BATON is a 'struct pack_baton *'.
1940 WARNING: if you add a call to this function, please note:
1941 The code currently assumes that any piece of code running with
1942 the write-lock set can rely on the ffd->min_unpacked_rev and
1943 ffd->min_unpacked_revprop caches to be up-to-date (and, by
1944 extension, on not having to use a retry when calling
1945 svn_fs_fs__path_rev_absolute() and friends). If you add a call
1946 to this function, consider whether you have to call
1947 svn_fs_fs__update_min_unpacked_rev().
1948 See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith
1950 static svn_error_t *
1951 pack_body(void *baton,
1954 struct pack_baton *pb = baton;
1955 fs_fs_data_t *ffd = pb->fs->fsap_data;
1956 apr_int64_t completed_shards;
1957 svn_revnum_t youngest;
1958 apr_pool_t *iterpool;
1960 /* If the repository isn't a new enough format, we don't support packing.
1961 Return a friendly error to that effect. */
1962 if (ffd->format < SVN_FS_FS__MIN_PACKED_FORMAT)
1963 return svn_error_createf(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
1964 _("FSFS format (%d) too old to pack; please upgrade the filesystem."),
1967 /* If we aren't using sharding, we can't do any packing, so quit. */
1968 if (!ffd->max_files_per_dir)
1969 return SVN_NO_ERROR;
1971 SVN_ERR(svn_fs_fs__read_min_unpacked_rev(&ffd->min_unpacked_rev, pb->fs,
1974 SVN_ERR(svn_fs_fs__youngest_rev(&youngest, pb->fs, pool));
1975 completed_shards = (youngest + 1) / ffd->max_files_per_dir;
1977 /* See if we've already completed all possible shards thus far. */
1978 if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir))
1979 return SVN_NO_ERROR;
1981 pb->revs_dir = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, pool);
1982 if (ffd->format >= SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT)
1983 pb->revsprops_dir = svn_dirent_join(pb->fs->path, PATH_REVPROPS_DIR,
1986 iterpool = svn_pool_create(pool);
1987 for (pb->shard = ffd->min_unpacked_rev / ffd->max_files_per_dir;
1988 pb->shard < completed_shards;
1991 svn_pool_clear(iterpool);
1993 if (pb->cancel_func)
1994 SVN_ERR(pb->cancel_func(pb->cancel_baton));
1996 SVN_ERR(pack_shard(pb, iterpool));
1999 svn_pool_destroy(iterpool);
2000 return SVN_NO_ERROR;
2004 svn_fs_fs__pack(svn_fs_t *fs,
2005 svn_fs_pack_notify_t notify_func,
2007 svn_cancel_func_t cancel_func,
2011 struct pack_baton pb = { 0 };
2012 fs_fs_data_t *ffd = fs->fsap_data;
2016 pb.notify_func = notify_func;
2017 pb.notify_baton = notify_baton;
2018 pb.cancel_func = cancel_func;
2019 pb.cancel_baton = cancel_baton;
2021 if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT)
2023 /* Newer repositories provide a pack operation specific lock.
2024 Acquire it to prevent concurrent packs.
2026 Since the file lock's lifetime is bound to a pool, we create a
2027 separate subpool here to release the lock immediately after the
2030 err = svn_fs_fs__with_pack_lock(fs, pack_body, &pb, pool);
2034 /* Use the global write lock for older repos. */
2035 err = svn_fs_fs__with_write_lock(fs, pack_body, &pb, pool);
2038 return svn_error_trace(err);