1 /* pack.c --- FSX shard packing functionality
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
24 #include "svn_pools.h"
25 #include "svn_dirent_uri.h"
26 #include "svn_sorts.h"
27 #include "private/svn_sorts_private.h"
28 #include "private/svn_subr_private.h"
29 #include "private/svn_string_private.h"
30 #include "private/svn_temp_serializer.h"
36 #include "transaction.h"
38 #include "low_level.h"
39 #include "cached_data.h"
44 #include "../libsvn_fs/fs-loader.h"
46 #include "svn_private_config.h"
47 #include "temp_serializer.h"
51 * We pack files on a pack file basis (e.g. 1000 revs) without changing
52 * existing pack files nor the revision files outside the range to pack.
54 * First, we will scan the revision file indexes to determine the number
55 * of items to "place" (i.e. determine their optimal position within the
56 * future pack file). For each item, we will need a constant amount of
57 * memory to track it. A MAX_MEM parameter sets a limit to the number of
58 * items we may place in one go. That means, we may not be able to add
59 * all revisions at once. Instead, we will run the placement for a subset
60 * of revisions at a time. The very unlikely worst case will simply append
61 * all revision data with just a little reshuffling inside each revision.
63 * In a second step, we read all revisions in the selected range, build
64 * the item tracking information and copy the items themselves from the
65 * revision files to temporary files. The latter serve as buckets for a
66 * very coarse bucket presort: Separate change lists, file properties,
67 * directory properties and noderevs + representations from one another.
69 * The third step will determine an optimized placement for the items in
70 * each of the 4 buckets separately. The first three will simply order
71 * their items by revision, starting with the newest once. Placing rep
72 * and noderev items is a more elaborate process documented in the code.
74 * In short, we store items in the following order:
75 * - changed paths lists
77 * - directory properties
78 * - directory representations corresponding noderevs, lexical path order
79 * with special treatment of "trunk" and "branches"
80 * - same for file representations
82 * Step 4 copies the items from the temporary buckets into the final
83 * pack file and writes the temporary index files.
85 * Finally, after the last range of revisions, create the final indexes.
88 /* Maximum amount of memory we allocate for placement information during
91 #define DEFAULT_MAX_MEM (64 * 1024 * 1024)
93 /* Data structure describing a node change at PATH, REVISION.
94 * We will sort these instances by PATH and NODE_ID such that we can combine
95 * similar nodes in the same reps container and store containers in path
98 typedef struct path_order_t
101 svn_prefix_string__t *path;
103 /* node ID for this PATH in REVISION */
104 svn_fs_x__id_t node_id;
106 /* when this change happened */
107 svn_revnum_t revision;
109 /* this is a directory node */
110 svn_boolean_t is_dir;
112 /* length of the expanded representation content */
113 apr_int64_t expanded_size;
115 /* item ID of the noderev linked to the change. May be (0, 0). */
116 svn_fs_x__id_t noderev_id;
118 /* item ID of the representation containing the new data. May be (0, 0). */
119 svn_fs_x__id_t rep_id;
122 /* Represents a reference from item FROM to item TO. FROM may be a noderev
123 * or rep_id while TO is (currently) always a representation. We will sort
124 * them by TO which allows us to collect all dependent items.
126 typedef struct reference_t
132 /* This structure keeps track of all the temporary data and status that
133 * needs to be kept around during the creation of one pack file. After
134 * each revision range (in case we can't process all revs at once due to
135 * memory restrictions), parts of the data will get re-initialized.
137 typedef struct pack_context_t
139 /* file system that we operate on */
142 /* cancel function to invoke at regular intervals. May be NULL */
143 svn_cancel_func_t cancel_func;
145 /* baton to pass to CANCEL_FUNC */
148 /* first revision in the shard (and future pack file) */
149 svn_revnum_t shard_rev;
151 /* first revision in the range to process (>= SHARD_REV) */
152 svn_revnum_t start_rev;
154 /* first revision after the range to process (<= SHARD_END_REV) */
155 svn_revnum_t end_rev;
157 /* first revision after the current shard */
158 svn_revnum_t shard_end_rev;
160 /* log-to-phys proto index for the whole pack file */
161 apr_file_t *proto_l2p_index;
163 /* phys-to-log proto index for the whole pack file */
164 apr_file_t *proto_p2l_index;
166 /* full shard directory path (containing the unpacked revisions) */
167 const char *shard_dir;
169 /* full packed shard directory path (containing the pack file + indexes) */
170 const char *pack_file_dir;
172 /* full pack file path (including PACK_FILE_DIR) */
173 const char *pack_file_path;
175 /* current write position (i.e. file length) in the pack file */
176 apr_off_t pack_offset;
178 /* the pack file to ultimately write all data to */
179 apr_file_t *pack_file;
181 /* array of svn_fs_x__p2l_entry_t *, all referring to change lists.
182 * Will be filled in phase 2 and be cleared after each revision range. */
183 apr_array_header_t *changes;
185 /* temp file receiving all change list items (referenced by CHANGES).
186 * Will be filled in phase 2 and be cleared after each revision range. */
187 apr_file_t *changes_file;
189 /* array of svn_fs_x__p2l_entry_t *, all referring to file properties.
190 * Will be filled in phase 2 and be cleared after each revision range. */
191 apr_array_header_t *file_props;
193 /* temp file receiving all file prop items (referenced by FILE_PROPS).
194 * Will be filled in phase 2 and be cleared after each revision range.*/
195 apr_file_t *file_props_file;
197 /* array of svn_fs_x__p2l_entry_t *, all referring to directory properties.
198 * Will be filled in phase 2 and be cleared after each revision range. */
199 apr_array_header_t *dir_props;
201 /* temp file receiving all directory prop items (referenced by DIR_PROPS).
202 * Will be filled in phase 2 and be cleared after each revision range.*/
203 apr_file_t *dir_props_file;
205 /* container for all PATH members in PATH_ORDER. */
206 svn_prefix_tree__t *paths;
208 /* array of path_order_t *. Will be filled in phase 2 and be cleared
209 * after each revision range. Sorted by PATH, NODE_ID. */
210 apr_array_header_t *path_order;
212 /* array of reference_t* linking representations to their delta bases.
213 * Will be filled in phase 2 and be cleared after each revision range.
214 * It will be sorted by the FROM members (for rep->base rep lookup). */
215 apr_array_header_t *references;
217 /* array of svn_fs_x__p2l_entry_t*. Will be filled in phase 2 and be
218 * cleared after each revision range. During phase 3, we will set items
219 * to NULL that we already processed. */
220 apr_array_header_t *reps;
222 /* array of int, marking for each revision, the which offset their items
223 * begin in REPS. Will be filled in phase 2 and be cleared after
224 * each revision range. */
225 apr_array_header_t *rev_offsets;
227 /* temp file receiving all items referenced by REPS.
228 * Will be filled in phase 2 and be cleared after each revision range.*/
229 apr_file_t *reps_file;
231 /* pool used for temporary data structures that will be cleaned up when
232 * the next range of revisions is being processed */
233 apr_pool_t *info_pool;
236 /* Create and initialize a new pack context for packing shard SHARD_REV in
237 * SHARD_DIR into PACK_FILE_DIR within filesystem FS. Allocate it in POOL
238 * and return the structure in *CONTEXT.
240 * Limit the number of items being copied per iteration to MAX_ITEMS.
241 * Set CANCEL_FUNC and CANCEL_BATON as well.
244 initialize_pack_context(pack_context_t *context,
246 const char *pack_file_dir,
247 const char *shard_dir,
248 svn_revnum_t shard_rev,
250 svn_cancel_func_t cancel_func,
254 svn_fs_x__data_t *ffd = fs->fsap_data;
255 const char *temp_dir;
256 int max_revs = MIN(ffd->max_files_per_dir, max_items);
258 SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0);
260 /* where we will place our various temp files */
261 SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
263 /* store parameters */
265 context->cancel_func = cancel_func;
266 context->cancel_baton = cancel_baton;
268 context->shard_rev = shard_rev;
269 context->start_rev = shard_rev;
270 context->end_rev = shard_rev;
271 context->shard_end_rev = shard_rev + ffd->max_files_per_dir;
273 /* Create the new directory and pack file. */
274 context->shard_dir = shard_dir;
275 context->pack_file_dir = pack_file_dir;
276 context->pack_file_path
277 = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
278 SVN_ERR(svn_io_file_open(&context->pack_file, context->pack_file_path,
279 APR_WRITE | APR_BUFFERED | APR_BINARY | APR_EXCL
280 | APR_CREATE, APR_OS_DEFAULT, pool));
282 /* Proto index files */
283 SVN_ERR(svn_fs_x__l2p_proto_index_open(
284 &context->proto_l2p_index,
285 svn_dirent_join(pack_file_dir,
286 PATH_INDEX PATH_EXT_L2P_INDEX,
289 SVN_ERR(svn_fs_x__p2l_proto_index_open(
290 &context->proto_p2l_index,
291 svn_dirent_join(pack_file_dir,
292 PATH_INDEX PATH_EXT_P2L_INDEX,
296 /* item buckets: one item info array and one temp file per bucket */
297 context->changes = apr_array_make(pool, max_items,
298 sizeof(svn_fs_x__p2l_entry_t *));
299 SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
300 svn_io_file_del_on_close, pool, pool));
301 context->file_props = apr_array_make(pool, max_items,
302 sizeof(svn_fs_x__p2l_entry_t *));
303 SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
304 svn_io_file_del_on_close, pool, pool));
305 context->dir_props = apr_array_make(pool, max_items,
306 sizeof(svn_fs_x__p2l_entry_t *));
307 SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
308 svn_io_file_del_on_close, pool, pool));
310 /* noderev and representation item bucket */
311 context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int));
312 context->path_order = apr_array_make(pool, max_items,
313 sizeof(path_order_t *));
314 context->references = apr_array_make(pool, max_items,
315 sizeof(reference_t *));
316 context->reps = apr_array_make(pool, max_items,
317 sizeof(svn_fs_x__p2l_entry_t *));
318 SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
319 svn_io_file_del_on_close, pool, pool));
321 /* the pool used for temp structures */
322 context->info_pool = svn_pool_create(pool);
323 context->paths = svn_prefix_tree__create(context->info_pool);
328 /* Clean up / free all revision range specific data and files in CONTEXT.
329 * Use SCRATCH_POOL for temporary allocations.
332 reset_pack_context(pack_context_t *context,
333 apr_pool_t *scratch_pool)
335 apr_array_clear(context->changes);
336 SVN_ERR(svn_io_file_trunc(context->changes_file, 0, scratch_pool));
337 apr_array_clear(context->file_props);
338 SVN_ERR(svn_io_file_trunc(context->file_props_file, 0, scratch_pool));
339 apr_array_clear(context->dir_props);
340 SVN_ERR(svn_io_file_trunc(context->dir_props_file, 0, scratch_pool));
342 apr_array_clear(context->rev_offsets);
343 apr_array_clear(context->path_order);
344 apr_array_clear(context->references);
345 apr_array_clear(context->reps);
346 SVN_ERR(svn_io_file_trunc(context->reps_file, 0, scratch_pool));
348 svn_pool_clear(context->info_pool);
353 /* Call this after the last revision range. It will finalize all index files
354 * for CONTEXT and close any open files.
355 * Use SCRATCH_POOL for temporary allocations.
358 close_pack_context(pack_context_t *context,
359 apr_pool_t *scratch_pool)
361 const char *proto_l2p_index_path;
362 const char *proto_p2l_index_path;
364 /* need the file names for the actual index creation call further down */
365 SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path,
366 context->proto_l2p_index, scratch_pool));
367 SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path,
368 context->proto_p2l_index, scratch_pool));
370 /* finalize proto index files */
371 SVN_ERR(svn_io_file_close(context->proto_l2p_index, scratch_pool));
372 SVN_ERR(svn_io_file_close(context->proto_p2l_index, scratch_pool));
374 /* Append the actual index data to the pack file. */
375 SVN_ERR(svn_fs_x__add_index_data(context->fs, context->pack_file,
376 proto_l2p_index_path,
377 proto_p2l_index_path,
381 /* remove proto index files */
382 SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, scratch_pool));
383 SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, scratch_pool));
385 SVN_ERR(svn_io_file_close(context->pack_file, scratch_pool));
390 /* Efficiently copy SIZE bytes from SOURCE to DEST. Invoke the CANCEL_FUNC
391 * from CONTEXT at regular intervals.
392 * Use SCRATCH_POOL for temporary allocations.
395 copy_file_data(pack_context_t *context,
399 apr_pool_t *scratch_pool)
401 /* most non-representation items will be small. Minimize the buffer
402 * and infrastructure overhead in that case. */
403 enum { STACK_BUFFER_SIZE = 1024 };
405 if (size < STACK_BUFFER_SIZE)
407 /* copy small data using a fixed-size buffer on stack */
408 char buffer[STACK_BUFFER_SIZE];
409 SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size,
410 NULL, NULL, scratch_pool));
411 SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size,
412 NULL, scratch_pool));
416 /* use streaming copies for larger data blocks. That may require
417 * the allocation of larger buffers and we should make sure that
418 * this extra memory is released asap. */
419 svn_fs_x__data_t *ffd = context->fs->fsap_data;
420 apr_pool_t *copypool = svn_pool_create(scratch_pool);
421 char *buffer = apr_palloc(copypool, ffd->block_size);
425 apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size));
426 if (context->cancel_func)
427 SVN_ERR(context->cancel_func(context->cancel_baton));
429 SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy,
430 NULL, NULL, scratch_pool));
431 SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy,
432 NULL, scratch_pool));
437 svn_pool_destroy(copypool);
443 /* Writes SIZE bytes, all 0, to DEST.
444 * Use SCRATCH_POOL for temporary allocations.
447 write_null_bytes(apr_file_t *dest,
449 apr_pool_t *scratch_pool)
451 /* Have a collection of high-quality, easy to access NUL bytes handy. */
452 enum { BUFFER_SIZE = 1024 };
453 static const char buffer[BUFFER_SIZE] = { 0 };
455 /* copy SIZE of them into the file's buffer */
458 apr_size_t to_write = MIN(size, BUFFER_SIZE);
459 SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL,
467 /* Copy the "simple" item (changed paths list or property representation)
468 * from the current position in REV_FILE to TEMP_FILE using CONTEXT. Add
469 * a copy of ENTRY to ENTRIES but with an updated offset value that points
470 * to the copy destination in TEMP_FILE.
471 * Use SCRATCH_POOL for temporary allocations.
474 copy_item_to_temp(pack_context_t *context,
475 apr_array_header_t *entries,
476 apr_file_t *temp_file,
477 svn_fs_x__revision_file_t *rev_file,
478 svn_fs_x__p2l_entry_t *entry,
479 apr_pool_t *scratch_pool)
481 svn_fs_x__p2l_entry_t *new_entry
482 = svn_fs_x__p2l_entry_dup(entry, context->info_pool);
484 SVN_ERR(svn_fs_x__get_file_offset(&new_entry->offset, temp_file,
486 APR_ARRAY_PUSH(entries, svn_fs_x__p2l_entry_t *) = new_entry;
488 SVN_ERR(copy_file_data(context, temp_file, rev_file->file, entry->size,
494 /* Return the offset within CONTEXT->REPS that corresponds to item
495 * ITEM_INDEX in REVISION.
498 get_item_array_index(pack_context_t *context,
499 svn_revnum_t revision,
500 apr_int64_t item_index)
502 assert(revision >= context->start_rev);
503 return (int)item_index + APR_ARRAY_IDX(context->rev_offsets,
504 revision - context->start_rev,
508 /* Write INFO to the correct position in CONTEXT->REPS. The latter may
509 * need auto-expanding. Overwriting an array element is not allowed.
512 add_item_rep_mapping(pack_context_t *context,
513 svn_fs_x__p2l_entry_t *entry)
516 assert(entry->item_count == 1);
519 idx = get_item_array_index(context,
520 entry->items[0].change_set,
521 entry->items[0].number);
523 /* make sure the index exists in the array */
524 while (context->reps->nelts <= idx)
525 APR_ARRAY_PUSH(context->reps, void *) = NULL;
527 /* set the element. If there is already an entry, there are probably
528 * two items claiming to be the same -> bail out */
529 assert(!APR_ARRAY_IDX(context->reps, idx, void *));
530 APR_ARRAY_IDX(context->reps, idx, void *) = entry;
533 /* Return the P2L entry from CONTEXT->REPS for the given ID. If there is
534 * none (or not anymore), return NULL. If RESET has been specified, set
535 * the array entry to NULL after returning the entry.
537 static svn_fs_x__p2l_entry_t *
538 get_item(pack_context_t *context,
539 const svn_fs_x__id_t *id,
542 svn_fs_x__p2l_entry_t *result = NULL;
543 svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
544 if (id->number && revision >= context->start_rev)
546 int idx = get_item_array_index(context, revision, id->number);
547 if (context->reps->nelts > idx)
549 result = APR_ARRAY_IDX(context->reps, idx, void *);
551 APR_ARRAY_IDX(context->reps, idx, void *) = NULL;
558 /* Copy representation item identified by ENTRY from the current position
559 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by
560 * our placement algorithm to CONTEXT.
561 * Use SCRATCH_POOL for temporary allocations.
564 copy_rep_to_temp(pack_context_t *context,
565 svn_fs_x__revision_file_t *rev_file,
566 svn_fs_x__p2l_entry_t *entry,
567 apr_pool_t *scratch_pool)
569 svn_fs_x__rep_header_t *rep_header;
570 apr_off_t source_offset = entry->offset;
572 /* create a copy of ENTRY, make it point to the copy destination and
573 * store it in CONTEXT */
574 entry = svn_fs_x__p2l_entry_dup(entry, context->info_pool);
575 SVN_ERR(svn_fs_x__get_file_offset(&entry->offset, context->reps_file,
577 add_item_rep_mapping(context, entry);
579 /* read & parse the representation header */
580 SVN_ERR(svn_fs_x__read_rep_header(&rep_header, rev_file->stream,
581 scratch_pool, scratch_pool));
583 /* if the representation is a delta against some other rep, link the two */
584 if ( rep_header->type == svn_fs_x__rep_delta
585 && rep_header->base_revision >= context->start_rev)
587 reference_t *reference = apr_pcalloc(context->info_pool,
589 reference->from = entry->items[0];
590 reference->to.change_set
591 = svn_fs_x__change_set_by_rev(rep_header->base_revision);
592 reference->to.number = rep_header->base_item_index;
593 APR_ARRAY_PUSH(context->references, reference_t *) = reference;
596 /* copy the whole rep (including header!) to our temp file */
597 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &source_offset,
599 SVN_ERR(copy_file_data(context, context->reps_file, rev_file->file,
600 entry->size, scratch_pool));
605 /* Directories first, dirs / files sorted by name in reverse lexical order.
606 * This maximizes the chance of two items being located close to one another
607 * in *all* pack files independent of their change order. It also groups
608 * multi-project repos nicely according to their sub-projects. The reverse
609 * order aspect gives "trunk" preference over "tags" and "branches", so
610 * trunk-related items are more likely to be contiguous.
613 compare_dir_entries(const svn_sort__item_t *a,
614 const svn_sort__item_t *b)
616 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
617 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
619 if (lhs->kind != rhs->kind)
620 return lhs->kind == svn_node_dir ? -1 : 1;
622 return strcmp(lhs->name, rhs->name);
626 svn_fs_x__order_dir_entries(svn_fs_t *fs,
627 apr_hash_t *directory,
628 apr_pool_t *result_pool,
629 apr_pool_t *scratch_pool)
631 apr_array_header_t *ordered
632 = svn_sort__hash(directory, compare_dir_entries, scratch_pool);
634 apr_array_header_t *result
635 = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *));
638 for (i = 0; i < ordered->nelts; ++i)
639 APR_ARRAY_PUSH(result, svn_fs_dirent_t *)
640 = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value;
645 /* Return a duplicate of the the ORIGINAL path and with special sub-strins
646 * (e.g. "trunk") modified in such a way that have a lower lexicographic
647 * value than any other "normal" file name.
650 tweak_path_for_ordering(const char *original,
653 /* We may add further special cases as needed. */
654 enum {SPECIAL_COUNT = 2};
655 static const char *special[SPECIAL_COUNT] = {"trunk", "branch"};
657 char *path = apr_pstrdup(pool, original);
660 /* Replace the first char of any "special" sub-string we find by
661 * a control char, i.e. '\1' .. '\31'. In the rare event that this
662 * would clash with existing paths, no data will be lost but merely
663 * the node ordering will be sub-optimal.
665 for (i = 0; i < SPECIAL_COUNT; ++i)
666 for (pos = strstr(path, special[i]);
668 pos = strstr(pos + 1, special[i]))
670 *pos = (char)(i + '\1');
676 /* Copy node revision item identified by ENTRY from the current position
677 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by
678 * our placement algorithm to CONTEXT.
679 * Use SCRATCH_POOL for temporary allocations.
682 copy_node_to_temp(pack_context_t *context,
683 svn_fs_x__revision_file_t *rev_file,
684 svn_fs_x__p2l_entry_t *entry,
685 apr_pool_t *scratch_pool)
687 path_order_t *path_order = apr_pcalloc(context->info_pool,
688 sizeof(*path_order));
689 svn_fs_x__noderev_t *noderev;
690 const char *sort_path;
691 apr_off_t source_offset = entry->offset;
693 /* read & parse noderev */
694 SVN_ERR(svn_fs_x__read_noderev(&noderev, rev_file->stream, scratch_pool,
697 /* create a copy of ENTRY, make it point to the copy destination and
698 * store it in CONTEXT */
699 entry = svn_fs_x__p2l_entry_dup(entry, context->info_pool);
700 SVN_ERR(svn_fs_x__get_file_offset(&entry->offset, context->reps_file,
702 add_item_rep_mapping(context, entry);
704 /* copy the noderev to our temp file */
705 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &source_offset,
707 SVN_ERR(copy_file_data(context, context->reps_file, rev_file->file,
708 entry->size, scratch_pool));
710 /* if the node has a data representation, make that the node's "base".
711 * This will (often) cause the noderev to be placed right in front of
712 * its data representation. */
714 if (noderev->data_rep
715 && svn_fs_x__get_revnum(noderev->data_rep->id.change_set)
716 >= context->start_rev)
718 reference_t *reference = apr_pcalloc(context->info_pool,
720 reference->from = entry->items[0];
721 reference->to.change_set = noderev->data_rep->id.change_set;
722 reference->to.number = noderev->data_rep->id.number;
723 APR_ARRAY_PUSH(context->references, reference_t *) = reference;
725 path_order->rep_id = reference->to;
726 path_order->expanded_size = noderev->data_rep->expanded_size;
729 /* Sort path is the key used for ordering noderevs and associated reps.
730 * It will not be stored in the final pack file. */
731 sort_path = tweak_path_for_ordering(noderev->created_path, scratch_pool);
732 path_order->path = svn_prefix_string__create(context->paths, sort_path);
733 path_order->node_id = noderev->node_id;
734 path_order->revision = svn_fs_x__get_revnum(noderev->noderev_id.change_set);
735 path_order->is_dir = noderev->kind == svn_node_dir;
736 path_order->noderev_id = noderev->noderev_id;
737 APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order;
742 /* implements compare_fn_t. Place LHS before RHS, if the latter is older.
745 compare_p2l_info(const svn_fs_x__p2l_entry_t * const * lhs,
746 const svn_fs_x__p2l_entry_t * const * rhs)
748 assert(*lhs != *rhs);
749 if ((*lhs)->item_count == 0)
750 return (*lhs)->item_count == 0 ? 0 : -1;
751 if ((*lhs)->item_count == 0)
754 if ((*lhs)->items[0].change_set == (*rhs)->items[0].change_set)
755 return (*lhs)->items[0].number > (*rhs)->items[0].number ? -1 : 1;
757 return (*lhs)->items[0].change_set > (*rhs)->items[0].change_set ? -1 : 1;
760 /* Sort svn_fs_x__p2l_entry_t * array ENTRIES by age. Place the latest
764 sort_items(apr_array_header_t *entries)
766 svn_sort__array(entries,
767 (int (*)(const void *, const void *))compare_p2l_info);
770 /* implements compare_fn_t. Sort descending by PATH, NODE_ID and REVISION.
773 compare_path_order(const path_order_t * const * lhs_p,
774 const path_order_t * const * rhs_p)
776 const path_order_t * lhs = *lhs_p;
777 const path_order_t * rhs = *rhs_p;
779 /* cluster all directories */
780 int diff = rhs->is_dir - lhs->is_dir;
784 /* lexicographic order on path and node (i.e. latest first) */
785 diff = svn_prefix_string__compare(lhs->path, rhs->path);
789 /* reverse order on node (i.e. latest first) */
790 diff = svn_fs_x__id_compare(&rhs->node_id, &lhs->node_id);
794 /* reverse order on revision (i.e. latest first) */
795 if (lhs->revision != rhs->revision)
796 return lhs->revision < rhs->revision ? 1 : -1;
801 /* implements compare_fn_t. Sort ascending by TO, FROM.
804 compare_references(const reference_t * const * lhs_p,
805 const reference_t * const * rhs_p)
807 const reference_t * lhs = *lhs_p;
808 const reference_t * rhs = *rhs_p;
810 int diff = svn_fs_x__id_compare(&lhs->to, &rhs->to);
811 return diff ? diff : svn_fs_x__id_compare(&lhs->from, &rhs->from);
814 /* Order the data collected in CONTEXT such that we can place them in the
818 sort_reps(pack_context_t *context)
820 svn_sort__array(context->path_order,
821 (int (*)(const void *, const void *))compare_path_order);
822 svn_sort__array(context->references,
823 (int (*)(const void *, const void *))compare_references);
826 /* Return the remaining unused bytes in the current block in CONTEXT's
830 get_block_left(pack_context_t *context)
832 svn_fs_x__data_t *ffd = context->fs->fsap_data;
833 return ffd->block_size - (context->pack_offset % ffd->block_size);
836 /* To prevent items from overlapping a block boundary, we will usually
837 * put them into the next block and top up the old one with NUL bytes.
838 * Pad CONTEXT's pack file to the end of the current block, if that padding
839 * is short enough. Use SCRATCH_POOL for temporary allocations.
842 auto_pad_block(pack_context_t *context,
843 apr_pool_t *scratch_pool)
845 svn_fs_x__data_t *ffd = context->fs->fsap_data;
847 /* This is the maximum number of bytes "wasted" that way per block.
848 * Larger items will cross the block boundaries. */
849 const apr_off_t max_padding = MAX(ffd->block_size / 50, 512);
851 /* Is wasted space small enough to align the current item to the next
853 apr_off_t padding = get_block_left(context);
855 if (padding < max_padding)
857 /* Yes. To up with NUL bytes and don't forget to create
858 * an P2L index entry marking this section as unused. */
859 svn_fs_x__p2l_entry_t null_entry;
861 null_entry.offset = context->pack_offset;
862 null_entry.size = padding;
863 null_entry.type = SVN_FS_X__ITEM_TYPE_UNUSED;
864 null_entry.fnv1_checksum = 0;
865 null_entry.item_count = 0;
866 null_entry.items = NULL;
868 SVN_ERR(write_null_bytes(context->pack_file, padding, scratch_pool));
869 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
870 (context->proto_p2l_index, &null_entry, scratch_pool));
871 context->pack_offset += padding;
877 /* Return the index of the first entry in CONTEXT->REFERENCES that
878 * references ITEM->ITEMS[0] if such entries exist. All matching items
879 * will be consecutive.
882 find_first_reference(pack_context_t *context,
883 svn_fs_x__p2l_entry_t *item)
886 int upper = context->references->nelts - 1;
888 while (lower <= upper)
890 int current = lower + (upper - lower) / 2;
891 reference_t *reference
892 = APR_ARRAY_IDX(context->references, current, reference_t *);
894 if (svn_fs_x__id_compare(&reference->to, item->items) < 0)
903 /* Check whether entry number IDX in CONTEXT->REFERENCES references ITEM.
906 is_reference_match(pack_context_t *context,
908 svn_fs_x__p2l_entry_t *item)
910 reference_t *reference;
911 if (context->references->nelts <= idx)
914 reference = APR_ARRAY_IDX(context->references, idx, reference_t *);
915 return svn_fs_x__id_eq(&reference->to, item->items);
918 /* Starting at IDX in CONTEXT->PATH_ORDER, select all representations and
919 * noderevs that should be placed into the same container, respectively.
920 * Append the path_order_t * elements encountered in SELECTED, the
921 * svn_fs_x__p2l_entry_t * of the representations that should be placed
922 * into the same reps container will be appended to REP_PARTS and the
923 * svn_fs_x__p2l_entry_t * of the noderevs referencing those reps will
924 * be appended to NODE_PARTS.
926 * Remove all returned items from the CONTEXT->REPS container and prevent
927 * them from being placed a second time later on. That also means that the
928 * caller has to place all items returned.
931 select_reps(pack_context_t *context,
933 apr_array_header_t *selected,
934 apr_array_header_t *node_parts,
935 apr_array_header_t *rep_parts)
937 apr_array_header_t *path_order = context->path_order;
938 path_order_t *start_path = APR_ARRAY_IDX(path_order, idx, path_order_t *);
940 svn_fs_x__p2l_entry_t *node_part;
941 svn_fs_x__p2l_entry_t *rep_part;
942 svn_fs_x__p2l_entry_t *depending;
945 /* collect all path_order records as well as rep and noderev items
946 * that occupy the same path with the same node. */
947 for (; idx < path_order->nelts; ++idx)
949 path_order_t *current_path
950 = APR_ARRAY_IDX(path_order, idx, path_order_t *);
952 if (!svn_fs_x__id_eq(&start_path->node_id, ¤t_path->node_id))
955 APR_ARRAY_IDX(path_order, idx, path_order_t *) = NULL;
956 node_part = get_item(context, ¤t_path->noderev_id, TRUE);
957 rep_part = get_item(context, ¤t_path->rep_id, TRUE);
959 if (node_part && rep_part)
960 APR_ARRAY_PUSH(selected, path_order_t *) = current_path;
963 APR_ARRAY_PUSH(node_parts, svn_fs_x__p2l_entry_t *) = node_part;
965 APR_ARRAY_PUSH(rep_parts, svn_fs_x__p2l_entry_t *) = rep_part;
968 /* collect depending reps and noderevs that reference any of the collected
970 for (i = 0; i < rep_parts->nelts; ++i)
972 rep_part = APR_ARRAY_IDX(rep_parts, i, svn_fs_x__p2l_entry_t*);
973 for (k = find_first_reference(context, rep_part);
974 is_reference_match(context, k, rep_part);
977 reference_t *reference
978 = APR_ARRAY_IDX(context->references, k, reference_t *);
980 depending = get_item(context, &reference->from, TRUE);
984 if (depending->type == SVN_FS_X__ITEM_TYPE_NODEREV)
985 APR_ARRAY_PUSH(node_parts, svn_fs_x__p2l_entry_t *) = depending;
987 APR_ARRAY_PUSH(rep_parts, svn_fs_x__p2l_entry_t *) = depending;
994 /* Return TRUE, if all path_order_t * in SELECTED reference contents that is
995 * not longer than LIMIT.
998 reps_fit_into_containers(apr_array_header_t *selected,
1002 for (i = 0; i < selected->nelts; ++i)
1003 if (APR_ARRAY_IDX(selected, i, path_order_t *)->expanded_size > limit)
1009 /* Write the *CONTAINER containing the noderevs described by the
1010 * svn_fs_x__p2l_entry_t * in ITEMS to the pack file on CONTEXT.
1011 * Append a P2L entry for the container to CONTAINER->REPS.
1012 * Afterwards, clear ITEMS and re-allocate *CONTAINER in CONTAINER_POOL
1013 * so the caller may fill them again.
1014 * Use SCRATCH_POOL for temporary allocations.
1016 static svn_error_t *
1017 write_nodes_container(pack_context_t *context,
1018 svn_fs_x__noderevs_t **container,
1019 apr_array_header_t *items,
1020 apr_pool_t *container_pool,
1021 apr_pool_t *scratch_pool)
1024 apr_off_t offset = 0;
1025 svn_fs_x__p2l_entry_t *container_entry;
1026 svn_stream_t *pack_stream;
1028 if (items->nelts == 0)
1029 return SVN_NO_ERROR;
1031 /* serialize container */
1032 container_entry = apr_palloc(context->info_pool, sizeof(*container_entry));
1033 pack_stream = svn_checksum__wrap_write_stream_fnv1a_32x4
1034 (&container_entry->fnv1_checksum,
1035 svn_stream_from_aprfile2(context->pack_file,
1036 TRUE, scratch_pool),
1038 SVN_ERR(svn_fs_x__write_noderevs_container(pack_stream, *container,
1040 SVN_ERR(svn_stream_close(pack_stream));
1041 SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset,
1044 /* replace first noderev item in ENTRIES with the container
1045 and set all others to NULL */
1046 container_entry->offset = context->pack_offset;
1047 container_entry->size = offset - container_entry->offset;
1048 container_entry->type = SVN_FS_X__ITEM_TYPE_NODEREVS_CONT;
1049 container_entry->item_count = items->nelts;
1050 container_entry->items = apr_palloc(context->info_pool,
1051 sizeof(svn_fs_x__id_t) * container_entry->item_count);
1053 for (i = 0; i < items->nelts; ++i)
1054 container_entry->items[i]
1055 = APR_ARRAY_IDX(items, i, svn_fs_x__p2l_entry_t *)->items[0];
1057 context->pack_offset = offset;
1058 APR_ARRAY_PUSH(context->reps, svn_fs_x__p2l_entry_t *)
1061 /* Write P2L index for copied items, i.e. the 1 container */
1062 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1063 (context->proto_p2l_index, container_entry, scratch_pool));
1065 svn_pool_clear(container_pool);
1066 *container = svn_fs_x__noderevs_create(16, container_pool);
1067 apr_array_clear(items);
1069 return SVN_NO_ERROR;
1072 /* Read the noderevs given by the svn_fs_x__p2l_entry_t * in NODE_PARTS
1073 * from TEMP_FILE and add them to *CONTAINER and NODES_IN_CONTAINER.
1074 * Whenever the container grows bigger than the current block in CONTEXT,
1075 * write the data to disk and continue in the next block.
1077 * Use CONTAINER_POOL to re-allocate the *CONTAINER as necessary and
1078 * SCRATCH_POOL to temporary allocations.
1080 static svn_error_t *
1081 store_nodes(pack_context_t *context,
1082 apr_file_t *temp_file,
1083 apr_array_header_t *node_parts,
1084 svn_fs_x__noderevs_t **container,
1085 apr_array_header_t *nodes_in_container,
1086 apr_pool_t *container_pool,
1087 apr_pool_t *scratch_pool)
1091 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1092 svn_stream_t *stream
1093 = svn_stream_from_aprfile2(temp_file, TRUE, scratch_pool);
1095 /* number of bytes in the current block not being spent on fixed-size
1096 items (i.e. those not put into the container). */
1097 apr_size_t capacity_left = get_block_left(context);
1099 /* Estimated noderev container size */
1100 apr_size_t last_container_size = 0, container_size = 0;
1102 /* Estimate extra capacity we will gain from container compression. */
1103 apr_size_t pack_savings = 0;
1104 for (i = 0; i < node_parts->nelts; ++i)
1106 svn_fs_x__noderev_t *noderev;
1107 svn_fs_x__p2l_entry_t *entry
1108 = APR_ARRAY_IDX(node_parts, i, svn_fs_x__p2l_entry_t *);
1110 /* if we reached the limit, check whether we saved some space
1111 through the container. */
1112 if (capacity_left + pack_savings < container_size + entry->size)
1113 container_size = svn_fs_x__noderevs_estimate_size(*container);
1115 /* If necessary and the container is large enough, try harder
1116 by actually serializing the container and determine current
1117 savings due to compression. */
1118 if ( capacity_left + pack_savings < container_size + entry->size
1119 && container_size > last_container_size + 2000)
1121 svn_stringbuf_t *serialized
1122 = svn_stringbuf_create_ensure(container_size, iterpool);
1123 svn_stream_t *temp_stream
1124 = svn_stream_from_stringbuf(serialized, iterpool);
1126 SVN_ERR(svn_fs_x__write_noderevs_container(temp_stream, *container,
1128 SVN_ERR(svn_stream_close(temp_stream));
1130 last_container_size = container_size;
1131 pack_savings = container_size - serialized->len;
1134 /* still doesn't fit? -> block is full. Flush */
1135 if ( capacity_left + pack_savings < container_size + entry->size
1136 && nodes_in_container->nelts < 2)
1138 SVN_ERR(auto_pad_block(context, iterpool));
1139 capacity_left = get_block_left(context);
1142 /* still doesn't fit? -> block is full. Flush */
1143 if (capacity_left + pack_savings < container_size + entry->size)
1145 SVN_ERR(write_nodes_container(context, container,
1146 nodes_in_container, container_pool,
1149 capacity_left = get_block_left(context);
1154 /* item will fit into the block. */
1155 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset, iterpool));
1156 SVN_ERR(svn_fs_x__read_noderev(&noderev, stream, iterpool, iterpool));
1157 svn_fs_x__noderevs_add(*container, noderev);
1159 container_size += entry->size;
1160 APR_ARRAY_PUSH(nodes_in_container, svn_fs_x__p2l_entry_t *) = entry;
1162 svn_pool_clear(iterpool);
1165 svn_pool_destroy(iterpool);
1167 return SVN_NO_ERROR;
1171 /* Finalize CONTAINER and write it to CONTEXT's pack file.
1172 * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES.
1173 * Use SCRATCH_POOL for temporary allocations.
1175 static svn_error_t *
1176 write_reps_container(pack_context_t *context,
1177 svn_fs_x__reps_builder_t *container,
1178 apr_array_header_t *sub_items,
1179 apr_array_header_t *new_entries,
1180 apr_pool_t *scratch_pool)
1182 apr_off_t offset = 0;
1183 svn_fs_x__p2l_entry_t container_entry;
1185 svn_stream_t *pack_stream
1186 = svn_checksum__wrap_write_stream_fnv1a_32x4
1187 (&container_entry.fnv1_checksum,
1188 svn_stream_from_aprfile2(context->pack_file,
1189 TRUE, scratch_pool),
1192 SVN_ERR(svn_fs_x__write_reps_container(pack_stream, container,
1194 SVN_ERR(svn_stream_close(pack_stream));
1195 SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset,
1198 container_entry.offset = context->pack_offset;
1199 container_entry.size = offset - container_entry.offset;
1200 container_entry.type = SVN_FS_X__ITEM_TYPE_REPS_CONT;
1201 container_entry.item_count = sub_items->nelts;
1202 container_entry.items = (svn_fs_x__id_t *)sub_items->elts;
1204 context->pack_offset = offset;
1205 APR_ARRAY_PUSH(new_entries, svn_fs_x__p2l_entry_t *)
1206 = svn_fs_x__p2l_entry_dup(&container_entry, context->info_pool);
1208 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1209 (context->proto_p2l_index, &container_entry, scratch_pool));
1211 return SVN_NO_ERROR;
1214 /* Read the (property) representations identified by svn_fs_x__p2l_entry_t
1215 * elements in ENTRIES from TEMP_FILE, aggregate them and write them into
1216 * CONTEXT->PACK_FILE. Use SCRATCH_POOL for temporary allocations.
1218 static svn_error_t *
1219 write_reps_containers(pack_context_t *context,
1220 apr_array_header_t *entries,
1221 apr_file_t *temp_file,
1222 apr_array_header_t *new_entries,
1223 apr_pool_t *scratch_pool)
1225 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1226 apr_pool_t *container_pool = svn_pool_create(scratch_pool);
1229 apr_ssize_t block_left = get_block_left(context);
1231 svn_fs_x__reps_builder_t *container
1232 = svn_fs_x__reps_builder_create(context->fs, container_pool);
1233 apr_array_header_t *sub_items
1234 = apr_array_make(scratch_pool, 64, sizeof(svn_fs_x__id_t));
1235 svn_fs_x__revision_file_t *file;
1237 SVN_ERR(svn_fs_x__wrap_temp_rev_file(&file, context->fs, temp_file,
1240 /* copy all items in strict order */
1241 for (i = entries->nelts-1; i >= 0; --i)
1243 svn_fs_x__representation_t representation = { 0 };
1244 svn_stringbuf_t *contents;
1245 svn_stream_t *stream;
1246 apr_size_t list_index;
1247 svn_fs_x__p2l_entry_t *entry
1248 = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
1250 if ((block_left < entry->size) && sub_items->nelts)
1252 block_left = get_block_left(context)
1253 - svn_fs_x__reps_estimate_size(container);
1256 if ((block_left < entry->size) && sub_items->nelts)
1258 SVN_ERR(write_reps_container(context, container, sub_items,
1259 new_entries, iterpool));
1261 apr_array_clear(sub_items);
1262 svn_pool_clear(container_pool);
1263 container = svn_fs_x__reps_builder_create(context->fs,
1265 block_left = get_block_left(context);
1268 /* still enough space in current block? */
1269 if (block_left < entry->size)
1271 SVN_ERR(auto_pad_block(context, iterpool));
1272 block_left = get_block_left(context);
1275 assert(entry->item_count == 1);
1276 representation.id = entry->items[0];
1278 /* select the change list in the source file, parse it and add it to
1280 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset,
1282 SVN_ERR(svn_fs_x__get_representation_length(&representation.size,
1283 &representation.expanded_size,
1286 SVN_ERR(svn_fs_x__get_contents(&stream, context->fs, &representation,
1288 contents = svn_stringbuf_create_ensure(representation.expanded_size,
1290 contents->len = representation.expanded_size;
1292 /* The representation is immutable. Read it normally. */
1293 SVN_ERR(svn_stream_read_full(stream, contents->data, &contents->len));
1294 SVN_ERR(svn_stream_close(stream));
1296 SVN_ERR(svn_fs_x__reps_add(&list_index, container,
1297 svn_stringbuf__morph_into_string(contents)));
1298 SVN_ERR_ASSERT(list_index == sub_items->nelts);
1299 block_left -= entry->size;
1301 APR_ARRAY_PUSH(sub_items, svn_fs_x__id_t) = entry->items[0];
1303 svn_pool_clear(iterpool);
1306 if (sub_items->nelts)
1307 SVN_ERR(write_reps_container(context, container, sub_items,
1308 new_entries, iterpool));
1310 svn_pool_destroy(iterpool);
1311 svn_pool_destroy(container_pool);
1313 return SVN_NO_ERROR;
1316 /* Return TRUE if the estimated size of the NODES_IN_CONTAINER plus the
1317 * representations given as svn_fs_x__p2l_entry_t * in ENTRIES may exceed
1318 * the space left in the current block.
1320 static svn_boolean_t
1321 should_flush_nodes_container(pack_context_t *context,
1322 svn_fs_x__noderevs_t *nodes_container,
1323 apr_array_header_t *entries)
1325 apr_ssize_t block_left = get_block_left(context);
1326 apr_ssize_t rep_sum = 0;
1327 apr_ssize_t container_size
1328 = svn_fs_x__noderevs_estimate_size(nodes_container);
1331 for (i = 0; i < entries->nelts; ++i)
1333 svn_fs_x__p2l_entry_t *entry
1334 = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
1335 rep_sum += entry->size;
1338 return block_left < rep_sum + container_size;
1341 /* Read the contents of the first COUNT non-NULL, non-empty items in ITEMS
1342 * from TEMP_FILE and write them to CONTEXT->PACK_FILE.
1343 * Use SCRATCH_POOL for temporary allocations.
1345 static svn_error_t *
1346 store_items(pack_context_t *context,
1347 apr_file_t *temp_file,
1348 apr_array_header_t *items,
1350 apr_pool_t *scratch_pool)
1353 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1355 /* copy all items in strict order */
1356 for (i = 0; i < count; ++i)
1358 svn_fs_x__p2l_entry_t *entry
1359 = APR_ARRAY_IDX(items, i, svn_fs_x__p2l_entry_t *);
1361 || entry->type == SVN_FS_X__ITEM_TYPE_UNUSED
1362 || entry->item_count == 0)
1365 /* select the item in the source file and copy it into the target
1367 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset,
1369 SVN_ERR(copy_file_data(context, context->pack_file, temp_file,
1370 entry->size, iterpool));
1372 /* write index entry and update current position */
1373 entry->offset = context->pack_offset;
1374 context->pack_offset += entry->size;
1376 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1377 (context->proto_p2l_index, entry, iterpool));
1379 APR_ARRAY_PUSH(context->reps, svn_fs_x__p2l_entry_t *) = entry;
1380 svn_pool_clear(iterpool);
1383 svn_pool_destroy(iterpool);
1385 return SVN_NO_ERROR;
1388 /* Copy (append) the items identified by svn_fs_x__p2l_entry_t * elements
1389 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE.
1390 * Use SCRATCH_POOL for temporary allocations.
1392 static svn_error_t *
1393 copy_reps_from_temp(pack_context_t *context,
1394 apr_file_t *temp_file,
1395 apr_pool_t *scratch_pool)
1397 svn_fs_x__data_t *ffd = context->fs->fsap_data;
1399 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1400 apr_pool_t *container_pool = svn_pool_create(scratch_pool);
1401 apr_array_header_t *path_order = context->path_order;
1402 apr_array_header_t *reps = context->reps;
1403 apr_array_header_t *selected = apr_array_make(scratch_pool, 16,
1404 path_order->elt_size);
1405 apr_array_header_t *node_parts = apr_array_make(scratch_pool, 16,
1407 apr_array_header_t *rep_parts = apr_array_make(scratch_pool, 16,
1409 apr_array_header_t *nodes_in_container = apr_array_make(scratch_pool, 16,
1412 int initial_reps_count = reps->nelts;
1414 /* 1 container for all noderevs in the current block. We will try to
1415 * not write it to disk until the current block fills up, i.e. aim for
1416 * a single noderevs container per block. */
1417 svn_fs_x__noderevs_t *nodes_container
1418 = svn_fs_x__noderevs_create(16, container_pool);
1420 /* copy items in path order. Create block-sized containers. */
1421 for (i = 0; i < path_order->nelts; ++i)
1423 if (APR_ARRAY_IDX(path_order, i, path_order_t *) == NULL)
1426 /* Collect reps to combine and all noderevs referencing them */
1427 SVN_ERR(select_reps(context, i, selected, node_parts, rep_parts));
1429 /* store the noderevs container in front of the reps */
1430 SVN_ERR(store_nodes(context, temp_file, node_parts, &nodes_container,
1431 nodes_in_container, container_pool, iterpool));
1433 /* actually flush the noderevs to disk if the reps container is likely
1434 * to fill the block, i.e. no further noderevs will be added to the
1435 * nodes container. */
1436 if (should_flush_nodes_container(context, nodes_container, node_parts))
1437 SVN_ERR(write_nodes_container(context, &nodes_container,
1438 nodes_in_container, container_pool,
1441 /* if all reps are short enough put them into one container.
1442 * Otherwise, just store all containers here. */
1443 if (reps_fit_into_containers(selected, 2 * ffd->block_size))
1444 SVN_ERR(write_reps_containers(context, rep_parts, temp_file,
1445 context->reps, iterpool));
1447 SVN_ERR(store_items(context, temp_file, rep_parts, rep_parts->nelts,
1450 /* processed all items */
1451 apr_array_clear(selected);
1452 apr_array_clear(node_parts);
1453 apr_array_clear(rep_parts);
1455 svn_pool_clear(iterpool);
1458 /* flush noderevs container to disk */
1459 if (nodes_in_container->nelts)
1460 SVN_ERR(write_nodes_container(context, &nodes_container,
1461 nodes_in_container, container_pool,
1464 /* copy all items in strict order */
1465 SVN_ERR(store_items(context, temp_file, reps, initial_reps_count,
1468 /* vaccum ENTRIES array: eliminate NULL entries */
1469 for (i = 0, k = 0; i < reps->nelts; ++i)
1471 svn_fs_x__p2l_entry_t *entry
1472 = APR_ARRAY_IDX(reps, i, svn_fs_x__p2l_entry_t *);
1475 APR_ARRAY_IDX(reps, k, svn_fs_x__p2l_entry_t *) = entry;
1481 svn_pool_destroy(iterpool);
1482 svn_pool_destroy(container_pool);
1484 return SVN_NO_ERROR;
1487 /* Finalize CONTAINER and write it to CONTEXT's pack file.
1488 * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES.
1489 * Use SCRATCH_POOL for temporary allocations.
1491 static svn_error_t *
1492 write_changes_container(pack_context_t *context,
1493 svn_fs_x__changes_t *container,
1494 apr_array_header_t *sub_items,
1495 apr_array_header_t *new_entries,
1496 apr_pool_t *scratch_pool)
1498 apr_off_t offset = 0;
1499 svn_fs_x__p2l_entry_t container_entry;
1501 svn_stream_t *pack_stream
1502 = svn_checksum__wrap_write_stream_fnv1a_32x4
1503 (&container_entry.fnv1_checksum,
1504 svn_stream_from_aprfile2(context->pack_file,
1505 TRUE, scratch_pool),
1508 SVN_ERR(svn_fs_x__write_changes_container(pack_stream,
1511 SVN_ERR(svn_stream_close(pack_stream));
1512 SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset,
1515 container_entry.offset = context->pack_offset;
1516 container_entry.size = offset - container_entry.offset;
1517 container_entry.type = SVN_FS_X__ITEM_TYPE_CHANGES_CONT;
1518 container_entry.item_count = sub_items->nelts;
1519 container_entry.items = (svn_fs_x__id_t *)sub_items->elts;
1521 context->pack_offset = offset;
1522 APR_ARRAY_PUSH(new_entries, svn_fs_x__p2l_entry_t *)
1523 = svn_fs_x__p2l_entry_dup(&container_entry, context->info_pool);
1525 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1526 (context->proto_p2l_index, &container_entry, scratch_pool));
1528 return SVN_NO_ERROR;
1531 /* Read the change lists identified by svn_fs_x__p2l_entry_t * elements
1532 * in ENTRIES strictly in from TEMP_FILE, aggregate them and write them
1533 * into CONTEXT->PACK_FILE. Use SCRATCH_POOL for temporary allocations.
1535 static svn_error_t *
1536 write_changes_containers(pack_context_t *context,
1537 apr_array_header_t *entries,
1538 apr_file_t *temp_file,
1539 apr_pool_t *scratch_pool)
1541 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1542 apr_pool_t *container_pool = svn_pool_create(scratch_pool);
1545 apr_ssize_t block_left = get_block_left(context);
1546 apr_ssize_t estimated_addition = 0;
1548 svn_fs_x__changes_t *container
1549 = svn_fs_x__changes_create(1000, container_pool);
1550 apr_array_header_t *sub_items
1551 = apr_array_make(scratch_pool, 64, sizeof(svn_fs_x__id_t));
1552 apr_array_header_t *new_entries
1553 = apr_array_make(context->info_pool, 16, entries->elt_size);
1554 svn_stream_t *temp_stream
1555 = svn_stream_from_aprfile2(temp_file, TRUE, scratch_pool);
1557 /* copy all items in strict order */
1558 for (i = entries->nelts-1; i >= 0; --i)
1560 apr_array_header_t *changes;
1561 apr_size_t list_index;
1562 svn_fs_x__p2l_entry_t *entry
1563 = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
1565 /* zip compression alone will significantly reduce the size of large
1566 * change lists. So, we will probably need even less than this estimate.
1568 apr_ssize_t estimated_size = (entry->size / 5) + 250;
1570 /* If necessary and enough data has been added to the container since
1571 * the last test, try harder by actually serializing the container and
1572 * determine current savings due to compression. */
1573 if (block_left < estimated_size && estimated_addition > 2000)
1575 svn_stringbuf_t *serialized
1576 = svn_stringbuf_create_ensure(get_block_left(context), iterpool);
1577 svn_stream_t *memory_stream
1578 = svn_stream_from_stringbuf(serialized, iterpool);
1580 SVN_ERR(svn_fs_x__write_changes_container(memory_stream,
1581 container, iterpool));
1582 SVN_ERR(svn_stream_close(temp_stream));
1584 block_left = get_block_left(context) - serialized->len;
1585 estimated_addition = 0;
1588 if ((block_left < estimated_size) && sub_items->nelts)
1590 SVN_ERR(write_changes_container(context, container, sub_items,
1591 new_entries, iterpool));
1593 apr_array_clear(sub_items);
1594 svn_pool_clear(container_pool);
1595 container = svn_fs_x__changes_create(1000, container_pool);
1596 block_left = get_block_left(context);
1597 estimated_addition = 0;
1600 /* still enough space in current block? */
1601 if (block_left < estimated_size)
1603 SVN_ERR(auto_pad_block(context, iterpool));
1604 block_left = get_block_left(context);
1607 /* select the change list in the source file, parse it and add it to
1609 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset,
1611 SVN_ERR(svn_fs_x__read_changes(&changes, temp_stream, scratch_pool,
1613 SVN_ERR(svn_fs_x__changes_append_list(&list_index, container, changes));
1614 SVN_ERR_ASSERT(list_index == sub_items->nelts);
1615 block_left -= estimated_size;
1616 estimated_addition += estimated_size;
1618 APR_ARRAY_PUSH(sub_items, svn_fs_x__id_t) = entry->items[0];
1620 svn_pool_clear(iterpool);
1623 if (sub_items->nelts)
1624 SVN_ERR(write_changes_container(context, container, sub_items,
1625 new_entries, iterpool));
1627 *entries = *new_entries;
1628 svn_pool_destroy(iterpool);
1629 svn_pool_destroy(container_pool);
1631 return SVN_NO_ERROR;
1634 /* Read the (property) representations identified by svn_fs_x__p2l_entry_t
1635 * elements in ENTRIES from TEMP_FILE, aggregate them and write them into
1636 * CONTEXT->PACK_FILE. Use SCRATCH_POOL for temporary allocations.
1638 static svn_error_t *
1639 write_property_containers(pack_context_t *context,
1640 apr_array_header_t *entries,
1641 apr_file_t *temp_file,
1642 apr_pool_t *scratch_pool)
1644 apr_array_header_t *new_entries
1645 = apr_array_make(context->info_pool, 16, entries->elt_size);
1647 SVN_ERR(write_reps_containers(context, entries, temp_file, new_entries,
1650 *entries = *new_entries;
1652 return SVN_NO_ERROR;
1655 /* Append all entries of svn_fs_x__p2l_entry_t * array TO_APPEND to
1656 * svn_fs_x__p2l_entry_t * array DEST.
1659 append_entries(apr_array_header_t *dest,
1660 apr_array_header_t *to_append)
1663 for (i = 0; i < to_append->nelts; ++i)
1664 APR_ARRAY_PUSH(dest, svn_fs_x__p2l_entry_t *)
1665 = APR_ARRAY_IDX(to_append, i, svn_fs_x__p2l_entry_t *);
1668 /* Write the log-to-phys proto index file for CONTEXT and use POOL for
1669 * temporary allocations. All items in all buckets must have been placed
1672 static svn_error_t *
1673 write_l2p_index(pack_context_t *context,
1676 apr_pool_t *scratch_pool = svn_pool_create(pool);
1677 const char *temp_name;
1678 const char *proto_index;
1679 apr_off_t offset = 0;
1681 /* lump all items into one bucket. As target, use the bucket that
1682 * probably has the most entries already. */
1683 append_entries(context->reps, context->changes);
1684 append_entries(context->reps, context->file_props);
1685 append_entries(context->reps, context->dir_props);
1687 /* Let the index code do the expensive L2P -> P2L transformation. */
1688 SVN_ERR(svn_fs_x__l2p_index_from_p2l_entries(&temp_name,
1691 pool, scratch_pool));
1693 /* Append newly written segment to exisiting proto index file. */
1694 SVN_ERR(svn_io_file_name_get(&proto_index, context->proto_l2p_index,
1697 SVN_ERR(svn_io_file_flush(context->proto_l2p_index, scratch_pool));
1698 SVN_ERR(svn_io_append_file(temp_name, proto_index, scratch_pool));
1699 SVN_ERR(svn_io_remove_file2(temp_name, FALSE, scratch_pool));
1700 SVN_ERR(svn_io_file_seek(context->proto_l2p_index, APR_END, &offset,
1704 svn_pool_destroy(scratch_pool);
1706 return SVN_NO_ERROR;
1709 /* Pack the current revision range of CONTEXT, i.e. this covers phases 2
1710 * to 4. Use SCRATCH_POOL for temporary allocations.
1712 static svn_error_t *
1713 pack_range(pack_context_t *context,
1714 apr_pool_t *scratch_pool)
1716 svn_fs_x__data_t *ffd = context->fs->fsap_data;
1717 apr_pool_t *revpool = svn_pool_create(scratch_pool);
1718 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1720 /* Phase 2: Copy items into various buckets and build tracking info */
1721 svn_revnum_t revision;
1722 for (revision = context->start_rev; revision < context->end_rev; ++revision)
1724 apr_off_t offset = 0;
1725 svn_fs_x__revision_file_t *rev_file;
1727 /* Get the rev file dimensions (mainly index locations). */
1728 SVN_ERR(svn_fs_x__open_pack_or_rev_file(&rev_file, context->fs,
1729 revision, revpool, iterpool));
1730 SVN_ERR(svn_fs_x__auto_read_footer(rev_file));
1732 /* store the indirect array index */
1733 APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts;
1735 /* read the phys-to-log index file until we covered the whole rev file.
1736 * That index contains enough info to build both target indexes from it. */
1737 while (offset < rev_file->l2p_offset)
1739 /* read one cluster */
1741 apr_array_header_t *entries;
1742 svn_pool_clear(iterpool);
1744 SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, context->fs,
1745 rev_file, revision, offset,
1746 ffd->p2l_page_size, iterpool,
1749 for (i = 0; i < entries->nelts; ++i)
1751 svn_fs_x__p2l_entry_t *entry
1752 = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t);
1754 /* skip first entry if that was duplicated due crossing a
1756 if (offset > entry->offset)
1759 /* process entry while inside the rev file */
1760 offset = entry->offset;
1761 if (offset < rev_file->l2p_offset)
1763 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset,
1766 if (entry->type == SVN_FS_X__ITEM_TYPE_CHANGES)
1767 SVN_ERR(copy_item_to_temp(context,
1769 context->changes_file,
1770 rev_file, entry, iterpool));
1771 else if (entry->type == SVN_FS_X__ITEM_TYPE_FILE_PROPS)
1772 SVN_ERR(copy_item_to_temp(context,
1773 context->file_props,
1774 context->file_props_file,
1775 rev_file, entry, iterpool));
1776 else if (entry->type == SVN_FS_X__ITEM_TYPE_DIR_PROPS)
1777 SVN_ERR(copy_item_to_temp(context,
1779 context->dir_props_file,
1780 rev_file, entry, iterpool));
1781 else if ( entry->type == SVN_FS_X__ITEM_TYPE_FILE_REP
1782 || entry->type == SVN_FS_X__ITEM_TYPE_DIR_REP)
1783 SVN_ERR(copy_rep_to_temp(context, rev_file, entry,
1785 else if (entry->type == SVN_FS_X__ITEM_TYPE_NODEREV)
1786 SVN_ERR(copy_node_to_temp(context, rev_file, entry,
1789 SVN_ERR_ASSERT(entry->type == SVN_FS_X__ITEM_TYPE_UNUSED);
1791 offset += entry->size;
1795 if (context->cancel_func)
1796 SVN_ERR(context->cancel_func(context->cancel_baton));
1799 svn_pool_clear(revpool);
1802 svn_pool_destroy(iterpool);
1804 /* phase 3: placement.
1805 * Use "newest first" placement for simple items. */
1806 sort_items(context->changes);
1807 sort_items(context->file_props);
1808 sort_items(context->dir_props);
1810 /* follow dependencies recursively for noderevs and data representations */
1813 /* phase 4: copy bucket data to pack file. Write P2L index. */
1814 SVN_ERR(write_changes_containers(context, context->changes,
1815 context->changes_file, revpool));
1816 svn_pool_clear(revpool);
1817 SVN_ERR(write_property_containers(context, context->file_props,
1818 context->file_props_file, revpool));
1819 svn_pool_clear(revpool);
1820 SVN_ERR(write_property_containers(context, context->dir_props,
1821 context->dir_props_file, revpool));
1822 svn_pool_clear(revpool);
1823 SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool));
1824 svn_pool_clear(revpool);
1826 /* write L2P index as well (now that we know all target offsets) */
1827 SVN_ERR(write_l2p_index(context, revpool));
1829 svn_pool_destroy(revpool);
1831 return SVN_NO_ERROR;
1834 /* Append CONTEXT->START_REV to the context's pack file with no re-ordering.
1835 * This function will only be used for very large revisions (>>100k changes).
1836 * Use SCRATCH_POOL for temporary allocations.
1838 static svn_error_t *
1839 append_revision(pack_context_t *context,
1840 apr_pool_t *scratch_pool)
1842 svn_fs_x__data_t *ffd = context->fs->fsap_data;
1843 apr_off_t offset = 0;
1844 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1845 svn_fs_x__revision_file_t *rev_file;
1848 /* Get the size of the file. */
1849 const char *path = svn_dirent_join(context->shard_dir,
1850 apr_psprintf(iterpool, "%ld",
1851 context->start_rev),
1853 SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, scratch_pool));
1855 /* Copy all the bits from the rev file to the end of the pack file. */
1856 SVN_ERR(svn_fs_x__open_pack_or_rev_file(&rev_file, context->fs,
1857 context->start_rev, scratch_pool,
1859 SVN_ERR(copy_file_data(context, context->pack_file, rev_file->file,
1860 finfo.size, iterpool));
1862 /* mark the start of a new revision */
1863 SVN_ERR(svn_fs_x__l2p_proto_index_add_revision(context->proto_l2p_index,
1866 /* read the phys-to-log index file until we covered the whole rev file.
1867 * That index contains enough info to build both target indexes from it. */
1868 while (offset < finfo.size)
1870 /* read one cluster */
1872 apr_array_header_t *entries;
1873 SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, context->fs, rev_file,
1874 context->start_rev, offset,
1875 ffd->p2l_page_size, iterpool,
1878 for (i = 0; i < entries->nelts; ++i)
1880 svn_fs_x__p2l_entry_t *entry
1881 = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t);
1883 /* skip first entry if that was duplicated due crossing a
1885 if (offset > entry->offset)
1888 /* process entry while inside the rev file */
1889 offset = entry->offset;
1890 if (offset < finfo.size)
1892 /* there should be true containers */
1893 SVN_ERR_ASSERT(entry->item_count == 1);
1895 entry->offset += context->pack_offset;
1896 offset += entry->size;
1897 SVN_ERR(svn_fs_x__l2p_proto_index_add_entry
1898 (context->proto_l2p_index, entry->offset, 0,
1899 entry->items[0].number, iterpool));
1900 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1901 (context->proto_p2l_index, entry, iterpool));
1905 svn_pool_clear(iterpool);
1908 svn_pool_destroy(iterpool);
1909 context->pack_offset += finfo.size;
1911 return SVN_NO_ERROR;
1914 /* Format 7 packing logic.
1916 * Pack the revision shard starting at SHARD_REV in filesystem FS from
1917 * SHARD_DIR into the PACK_FILE_DIR, using SCRATCH_POOL for temporary
1918 * allocations. Limit the extra memory consumption to MAX_MEM bytes.
1919 * CANCEL_FUNC and CANCEL_BATON are what you think they are.
1921 static svn_error_t *
1922 pack_log_addressed(svn_fs_t *fs,
1923 const char *pack_file_dir,
1924 const char *shard_dir,
1925 svn_revnum_t shard_rev,
1927 svn_cancel_func_t cancel_func,
1929 apr_pool_t *scratch_pool)
1933 /* estimated amount of memory used to represent one item in memory
1934 * during rev file packing */
1935 PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t))
1936 + APR_ALIGN_DEFAULT(2 *sizeof(void*))
1937 + APR_ALIGN_DEFAULT(sizeof(reference_t))
1938 + APR_ALIGN_DEFAULT(sizeof(svn_fs_x__p2l_entry_t))
1942 int max_items = max_mem / PER_ITEM_MEM > INT_MAX
1944 : (int)(max_mem / PER_ITEM_MEM);
1945 apr_array_header_t *max_ids;
1946 pack_context_t context = { 0 };
1948 apr_size_t item_count = 0;
1949 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1951 /* set up a pack context */
1952 SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir,
1953 shard_rev, max_items, cancel_func,
1954 cancel_baton, scratch_pool));
1956 /* phase 1: determine the size of the revisions to pack */
1957 SVN_ERR(svn_fs_x__l2p_get_max_ids(&max_ids, fs, shard_rev,
1958 context.shard_end_rev - shard_rev,
1959 scratch_pool, scratch_pool));
1961 /* pack revisions in ranges that don't exceed MAX_MEM */
1962 for (i = 0; i < max_ids->nelts; ++i)
1963 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) + item_count <= max_items)
1969 /* some unpacked revisions before this one? */
1970 if (context.start_rev < context.end_rev)
1972 /* pack them intelligently (might be just 1 rev but still ...) */
1973 SVN_ERR(pack_range(&context, iterpool));
1974 SVN_ERR(reset_pack_context(&context, iterpool));
1978 /* next revision range is to start with the current revision */
1979 context.start_rev = i + context.shard_rev;
1980 context.end_rev = context.start_rev + 1;
1982 /* if this is a very large revision, we must place it as is */
1983 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items)
1985 SVN_ERR(append_revision(&context, iterpool));
1986 context.start_rev++;
1989 item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1991 svn_pool_clear(iterpool);
1994 /* non-empty revision range at the end? */
1995 if (context.start_rev < context.end_rev)
1996 SVN_ERR(pack_range(&context, iterpool));
1998 /* last phase: finalize indexes and clean up */
1999 SVN_ERR(reset_pack_context(&context, iterpool));
2000 SVN_ERR(close_pack_context(&context, iterpool));
2001 svn_pool_destroy(iterpool);
2003 return SVN_NO_ERROR;
2006 /* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file.
2007 Use SCRATCH_POOL for temporary allocations. */
2009 svn_fs_x__get_packed_offset(apr_off_t *rev_offset,
2012 apr_pool_t *scratch_pool)
2014 svn_fs_x__data_t *ffd = fs->fsap_data;
2015 svn_stream_t *manifest_stream;
2016 svn_boolean_t is_cached;
2018 apr_int64_t shard_pos;
2019 apr_array_header_t *manifest;
2020 apr_pool_t *iterpool;
2022 shard = rev / ffd->max_files_per_dir;
2024 /* position of the shard within the manifest */
2025 shard_pos = rev % ffd->max_files_per_dir;
2027 /* fetch exactly that element into *rev_offset, if the manifest is found
2029 SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached,
2030 ffd->packed_offset_cache, &shard,
2031 svn_fs_x__get_sharded_offset, &shard_pos,
2035 return SVN_NO_ERROR;
2037 /* Open the manifest file. */
2038 SVN_ERR(svn_stream_open_readonly(&manifest_stream,
2039 svn_fs_x__path_rev_packed(fs, rev, PATH_MANIFEST,
2041 scratch_pool, scratch_pool));
2043 /* While we're here, let's just read the entire manifest file into an array,
2044 so we can cache the entire thing. */
2045 iterpool = svn_pool_create(scratch_pool);
2046 manifest = apr_array_make(scratch_pool, ffd->max_files_per_dir,
2053 svn_pool_clear(iterpool);
2054 SVN_ERR(svn_fs_x__read_number_from_stream(&val, &eof, manifest_stream,
2059 APR_ARRAY_PUSH(manifest, apr_off_t) = (apr_off_t)val;
2061 svn_pool_destroy(iterpool);
2063 *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir,
2066 /* Close up shop and cache the array. */
2067 SVN_ERR(svn_stream_close(manifest_stream));
2068 return svn_cache__set(ffd->packed_offset_cache, &shard, manifest,
2072 /* In filesystem FS, pack the revision SHARD containing exactly
2073 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
2074 * using SCRATCH_POOL for temporary allocations. Try to limit the amount of
2075 * temporary memory needed to MAX_MEM bytes. CANCEL_FUNC and CANCEL_BATON
2076 * are what you think they are.
2078 * If for some reason we detect a partial packing already performed, we
2079 * remove the pack file and start again.
2081 * The actual packing will be done in a format-specific sub-function.
2083 static svn_error_t *
2084 pack_rev_shard(svn_fs_t *fs,
2085 const char *pack_file_dir,
2086 const char *shard_path,
2088 int max_files_per_dir,
2090 svn_cancel_func_t cancel_func,
2092 apr_pool_t *scratch_pool)
2094 const char *pack_file_path;
2095 svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir);
2097 /* Some useful paths. */
2098 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, scratch_pool);
2100 /* Remove any existing pack file for this shard, since it is incomplete. */
2101 SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton,
2104 /* Create the new directory and pack file. */
2105 SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, scratch_pool));
2107 /* Index information files */
2108 SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev,
2109 max_mem, cancel_func, cancel_baton,
2112 SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, scratch_pool));
2113 SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, scratch_pool));
2115 return SVN_NO_ERROR;
2118 /* In the file system at FS_PATH, pack the SHARD in REVS_DIR and
2119 * REVPROPS_DIR containing exactly MAX_FILES_PER_DIR revisions, using
2120 * SCRATCH_POOL temporary for allocations. REVPROPS_DIR will be NULL if
2121 * revprop packing is not supported. COMPRESSION_LEVEL and MAX_PACK_SIZE
2122 * will be ignored in that case.
2124 * CANCEL_FUNC and CANCEL_BATON are what you think they are; similarly
2125 * NOTIFY_FUNC and NOTIFY_BATON.
2127 * If for some reason we detect a partial packing already performed, we
2128 * remove the pack file and start again.
2130 static svn_error_t *
2131 pack_shard(const char *revs_dir,
2132 const char *revsprops_dir,
2135 int max_files_per_dir,
2136 apr_off_t max_pack_size,
2137 int compression_level,
2138 svn_fs_pack_notify_t notify_func,
2140 svn_cancel_func_t cancel_func,
2142 apr_pool_t *scratch_pool)
2144 svn_fs_x__data_t *ffd = fs->fsap_data;
2145 const char *rev_shard_path, *rev_pack_file_dir;
2146 const char *revprops_shard_path, *revprops_pack_file_dir;
2148 /* Notify caller we're starting to pack this shard. */
2150 SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_start,
2153 /* Some useful paths. */
2154 rev_pack_file_dir = svn_dirent_join(revs_dir,
2155 apr_psprintf(scratch_pool,
2156 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
2159 rev_shard_path = svn_dirent_join(revs_dir,
2160 apr_psprintf(scratch_pool, "%" APR_INT64_T_FMT, shard),
2163 /* pack the revision content */
2164 SVN_ERR(pack_rev_shard(fs, rev_pack_file_dir, rev_shard_path,
2165 shard, max_files_per_dir, DEFAULT_MAX_MEM,
2166 cancel_func, cancel_baton, scratch_pool));
2168 /* if enabled, pack the revprops in an equivalent way */
2171 revprops_pack_file_dir = svn_dirent_join(revsprops_dir,
2172 apr_psprintf(scratch_pool,
2173 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
2176 revprops_shard_path = svn_dirent_join(revsprops_dir,
2177 apr_psprintf(scratch_pool, "%" APR_INT64_T_FMT, shard),
2180 SVN_ERR(svn_fs_x__pack_revprops_shard(revprops_pack_file_dir,
2181 revprops_shard_path,
2182 shard, max_files_per_dir,
2183 (int)(0.9 * max_pack_size),
2185 cancel_func, cancel_baton,
2189 /* Update the min-unpacked-rev file to reflect our newly packed shard. */
2190 SVN_ERR(svn_fs_x__write_min_unpacked_rev(fs,
2191 (svn_revnum_t)((shard + 1) * max_files_per_dir),
2193 ffd->min_unpacked_rev = (svn_revnum_t)((shard + 1) * max_files_per_dir);
2195 /* Finally, remove the existing shard directories.
2196 * For revprops, clean up older obsolete shards as well as they might
2197 * have been left over from an interrupted FS upgrade. */
2198 SVN_ERR(svn_io_remove_dir2(rev_shard_path, TRUE,
2199 cancel_func, cancel_baton, scratch_pool));
2202 svn_node_kind_t kind = svn_node_dir;
2203 apr_int64_t to_cleanup = shard;
2206 SVN_ERR(svn_fs_x__delete_revprops_shard(revprops_shard_path,
2209 cancel_func, cancel_baton,
2212 /* If the previous shard exists, clean it up as well.
2213 Don't try to clean up shard 0 as it we can't tell quickly
2214 whether it actually needs cleaning up. */
2215 revprops_shard_path = svn_dirent_join(revsprops_dir,
2216 apr_psprintf(scratch_pool,
2217 "%" APR_INT64_T_FMT,
2220 SVN_ERR(svn_io_check_path(revprops_shard_path, &kind, scratch_pool));
2222 while (kind == svn_node_dir && to_cleanup > 0);
2225 /* Notify caller we're starting to pack this shard. */
2227 SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_end,
2230 return SVN_NO_ERROR;
2233 typedef struct pack_baton_t
2236 svn_fs_pack_notify_t notify_func;
2238 svn_cancel_func_t cancel_func;
2243 /* The work-horse for svn_fs_x__pack, called with the FS write lock.
2244 This implements the svn_fs_x__with_write_lock() 'body' callback
2245 type. BATON is a 'pack_baton_t *'.
2247 WARNING: if you add a call to this function, please note:
2248 The code currently assumes that any piece of code running with
2249 the write-lock set can rely on the ffd->min_unpacked_rev and
2250 ffd->min_unpacked_revprop caches to be up-to-date (and, by
2251 extension, on not having to use a retry when calling
2252 svn_fs_x__path_rev_absolute() and friends). If you add a call
2253 to this function, consider whether you have to call
2254 update_min_unpacked_rev().
2255 See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith
2257 static svn_error_t *
2258 pack_body(void *baton,
2259 apr_pool_t *scratch_pool)
2261 pack_baton_t *pb = baton;
2262 svn_fs_x__data_t *ffd = pb->fs->fsap_data;
2263 apr_int64_t completed_shards;
2265 svn_revnum_t youngest;
2266 apr_pool_t *iterpool;
2267 const char *rev_data_path;
2268 const char *revprops_data_path = NULL;
2270 /* If we aren't using sharding, we can't do any packing, so quit. */
2271 SVN_ERR(svn_fs_x__read_min_unpacked_rev(&ffd->min_unpacked_rev, pb->fs,
2274 SVN_ERR(svn_fs_x__youngest_rev(&youngest, pb->fs, scratch_pool));
2275 completed_shards = (youngest + 1) / ffd->max_files_per_dir;
2277 /* See if we've already completed all possible shards thus far. */
2278 if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir))
2279 return SVN_NO_ERROR;
2281 rev_data_path = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, scratch_pool);
2282 revprops_data_path = svn_dirent_join(pb->fs->path, PATH_REVPROPS_DIR,
2285 iterpool = svn_pool_create(scratch_pool);
2286 for (i = ffd->min_unpacked_rev / ffd->max_files_per_dir;
2287 i < completed_shards;
2290 svn_pool_clear(iterpool);
2292 if (pb->cancel_func)
2293 SVN_ERR(pb->cancel_func(pb->cancel_baton));
2295 SVN_ERR(pack_shard(rev_data_path, revprops_data_path,
2296 pb->fs, i, ffd->max_files_per_dir,
2297 ffd->revprop_pack_size,
2298 ffd->compress_packed_revprops
2299 ? SVN__COMPRESSION_ZLIB_DEFAULT
2300 : SVN__COMPRESSION_NONE,
2301 pb->notify_func, pb->notify_baton,
2302 pb->cancel_func, pb->cancel_baton, iterpool));
2305 svn_pool_destroy(iterpool);
2306 return SVN_NO_ERROR;
2310 svn_fs_x__pack(svn_fs_t *fs,
2311 svn_fs_pack_notify_t notify_func,
2313 svn_cancel_func_t cancel_func,
2315 apr_pool_t *scratch_pool)
2317 pack_baton_t pb = { 0 };
2319 pb.notify_func = notify_func;
2320 pb.notify_baton = notify_baton;
2321 pb.cancel_func = cancel_func;
2322 pb.cancel_baton = cancel_baton;
2323 return svn_fs_x__with_pack_lock(fs, pack_body, &pb, scratch_pool);