1 /* pack.c --- FSX shard packing functionality
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
24 #include "svn_pools.h"
25 #include "svn_dirent_uri.h"
26 #include "svn_sorts.h"
27 #include "private/svn_sorts_private.h"
28 #include "private/svn_subr_private.h"
29 #include "private/svn_string_private.h"
30 #include "private/svn_temp_serializer.h"
36 #include "transaction.h"
38 #include "low_level.h"
39 #include "cached_data.h"
44 #include "../libsvn_fs/fs-loader.h"
46 #include "svn_private_config.h"
47 #include "temp_serializer.h"
51 * We pack files on a pack file basis (e.g. 1000 revs) without changing
52 * existing pack files nor the revision files outside the range to pack.
54 * First, we will scan the revision file indexes to determine the number
55 * of items to "place" (i.e. determine their optimal position within the
56 * future pack file). For each item, we will need a constant amount of
57 * memory to track it. A MAX_MEM parameter sets a limit to the number of
58 * items we may place in one go. That means, we may not be able to add
59 * all revisions at once. Instead, we will run the placement for a subset
60 * of revisions at a time. The very unlikely worst case will simply append
61 * all revision data with just a little reshuffling inside each revision.
63 * In a second step, we read all revisions in the selected range, build
64 * the item tracking information and copy the items themselves from the
65 * revision files to temporary files. The latter serve as buckets for a
66 * very coarse bucket presort: Separate change lists, file properties,
67 * directory properties and noderevs + representations from one another.
69 * The third step will determine an optimized placement for the items in
70 * each of the 4 buckets separately. The first three will simply order
71 * their items by revision, starting with the newest once. Placing rep
72 * and noderev items is a more elaborate process documented in the code.
74 * In short, we store items in the following order:
75 * - changed paths lists
77 * - directory properties
78 * - directory representations corresponding noderevs, lexical path order
79 * with special treatment of "trunk" and "branches"
80 * - same for file representations
82 * Step 4 copies the items from the temporary buckets into the final
83 * pack file and writes the temporary index files.
85 * Finally, after the last range of revisions, create the final indexes.
88 /* Maximum amount of memory we allocate for placement information during
91 #define DEFAULT_MAX_MEM (64 * 1024 * 1024)
93 /* Data structure describing a node change at PATH, REVISION.
94 * We will sort these instances by PATH and NODE_ID such that we can combine
95 * similar nodes in the same reps container and store containers in path
98 typedef struct path_order_t
101 svn_prefix_string__t *path;
103 /* node ID for this PATH in REVISION */
104 svn_fs_x__id_t node_id;
106 /* when this change happened */
107 svn_revnum_t revision;
109 /* length of the expanded representation content */
110 apr_int64_t expanded_size;
112 /* item ID of the noderev linked to the change. May be (0, 0). */
113 svn_fs_x__id_t noderev_id;
115 /* item ID of the representation containing the new data. May be (0, 0). */
116 svn_fs_x__id_t rep_id;
119 /* Represents a reference from item FROM to item TO. FROM may be a noderev
120 * or rep_id while TO is (currently) always a representation. We will sort
121 * them by TO which allows us to collect all dependent items.
123 typedef struct reference_t
129 /* This structure keeps track of all the temporary data and status that
130 * needs to be kept around during the creation of one pack file. After
131 * each revision range (in case we can't process all revs at once due to
132 * memory restrictions), parts of the data will get re-initialized.
134 typedef struct pack_context_t
136 /* file system that we operate on */
139 /* cancel function to invoke at regular intervals. May be NULL */
140 svn_cancel_func_t cancel_func;
142 /* baton to pass to CANCEL_FUNC */
145 /* first revision in the shard (and future pack file) */
146 svn_revnum_t shard_rev;
148 /* first revision in the range to process (>= SHARD_REV) */
149 svn_revnum_t start_rev;
151 /* first revision after the range to process (<= SHARD_END_REV) */
152 svn_revnum_t end_rev;
154 /* first revision after the current shard */
155 svn_revnum_t shard_end_rev;
157 /* log-to-phys proto index for the whole pack file */
158 apr_file_t *proto_l2p_index;
160 /* phys-to-log proto index for the whole pack file */
161 apr_file_t *proto_p2l_index;
163 /* full shard directory path (containing the unpacked revisions) */
164 const char *shard_dir;
166 /* full packed shard directory path (containing the pack file + indexes) */
167 const char *pack_file_dir;
169 /* full pack file path (including PACK_FILE_DIR) */
170 const char *pack_file_path;
172 /* current write position (i.e. file length) in the pack file */
173 apr_off_t pack_offset;
175 /* the pack file to ultimately write all data to */
176 apr_file_t *pack_file;
178 /* array of svn_fs_x__p2l_entry_t *, all referring to change lists.
179 * Will be filled in phase 2 and be cleared after each revision range. */
180 apr_array_header_t *changes;
182 /* temp file receiving all change list items (referenced by CHANGES).
183 * Will be filled in phase 2 and be cleared after each revision range. */
184 apr_file_t *changes_file;
186 /* array of svn_fs_x__p2l_entry_t *, all referring to file properties.
187 * Will be filled in phase 2 and be cleared after each revision range. */
188 apr_array_header_t *file_props;
190 /* temp file receiving all file prop items (referenced by FILE_PROPS).
191 * Will be filled in phase 2 and be cleared after each revision range.*/
192 apr_file_t *file_props_file;
194 /* array of svn_fs_x__p2l_entry_t *, all referring to directory properties.
195 * Will be filled in phase 2 and be cleared after each revision range. */
196 apr_array_header_t *dir_props;
198 /* temp file receiving all directory prop items (referenced by DIR_PROPS).
199 * Will be filled in phase 2 and be cleared after each revision range.*/
200 apr_file_t *dir_props_file;
202 /* container for all PATH members in PATH_ORDER. */
203 svn_prefix_tree__t *paths;
205 /* array of path_order_t *. Will be filled in phase 2 and be cleared
206 * after each revision range. Sorted by PATH, NODE_ID. */
207 apr_array_header_t *path_order;
209 /* array of reference_t* linking representations to their delta bases.
210 * Will be filled in phase 2 and be cleared after each revision range.
211 * It will be sorted by the FROM members (for rep->base rep lookup). */
212 apr_array_header_t *references;
214 /* array of svn_fs_x__p2l_entry_t*. Will be filled in phase 2 and be
215 * cleared after each revision range. During phase 3, we will set items
216 * to NULL that we already processed. */
217 apr_array_header_t *reps;
219 /* array of int, marking for each revision, at which offset their items
220 * begin in REPS. Will be filled in phase 2 and be cleared after
221 * each revision range. */
222 apr_array_header_t *rev_offsets;
224 /* temp file receiving all items referenced by REPS.
225 * Will be filled in phase 2 and be cleared after each revision range.*/
226 apr_file_t *reps_file;
228 /* pool used for temporary data structures that will be cleaned up when
229 * the next range of revisions is being processed */
230 apr_pool_t *info_pool;
233 /* Create and initialize a new pack context for packing shard SHARD_REV in
234 * SHARD_DIR into PACK_FILE_DIR within filesystem FS. Allocate it in POOL
235 * and return the structure in *CONTEXT.
237 * Limit the number of items being copied per iteration to MAX_ITEMS.
238 * Set CANCEL_FUNC and CANCEL_BATON as well.
241 initialize_pack_context(pack_context_t *context,
243 const char *pack_file_dir,
244 const char *shard_dir,
245 svn_revnum_t shard_rev,
247 svn_fs_x__batch_fsync_t *batch,
248 svn_cancel_func_t cancel_func,
252 svn_fs_x__data_t *ffd = fs->fsap_data;
253 const char *temp_dir;
254 int max_revs = MIN(ffd->max_files_per_dir, max_items);
256 SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0);
258 /* where we will place our various temp files */
259 SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
261 /* store parameters */
263 context->cancel_func = cancel_func;
264 context->cancel_baton = cancel_baton;
266 context->shard_rev = shard_rev;
267 context->start_rev = shard_rev;
268 context->end_rev = shard_rev;
269 context->shard_end_rev = shard_rev + ffd->max_files_per_dir;
271 /* Create the new directory and pack file. */
272 context->shard_dir = shard_dir;
273 context->pack_file_dir = pack_file_dir;
274 context->pack_file_path
275 = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
277 SVN_ERR(svn_fs_x__batch_fsync_open_file(&context->pack_file, batch,
278 context->pack_file_path, pool));
280 /* Proto index files */
281 SVN_ERR(svn_fs_x__l2p_proto_index_open(
282 &context->proto_l2p_index,
283 svn_dirent_join(pack_file_dir,
284 PATH_INDEX PATH_EXT_L2P_INDEX,
287 SVN_ERR(svn_fs_x__p2l_proto_index_open(
288 &context->proto_p2l_index,
289 svn_dirent_join(pack_file_dir,
290 PATH_INDEX PATH_EXT_P2L_INDEX,
294 /* item buckets: one item info array and one temp file per bucket */
295 context->changes = apr_array_make(pool, max_items,
296 sizeof(svn_fs_x__p2l_entry_t *));
297 SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
298 svn_io_file_del_on_close, pool, pool));
299 context->file_props = apr_array_make(pool, max_items,
300 sizeof(svn_fs_x__p2l_entry_t *));
301 SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
302 svn_io_file_del_on_close, pool, pool));
303 context->dir_props = apr_array_make(pool, max_items,
304 sizeof(svn_fs_x__p2l_entry_t *));
305 SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
306 svn_io_file_del_on_close, pool, pool));
308 /* noderev and representation item bucket */
309 context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int));
310 context->path_order = apr_array_make(pool, max_items,
311 sizeof(path_order_t *));
312 context->references = apr_array_make(pool, max_items,
313 sizeof(reference_t *));
314 context->reps = apr_array_make(pool, max_items,
315 sizeof(svn_fs_x__p2l_entry_t *));
316 SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
317 svn_io_file_del_on_close, pool, pool));
319 /* the pool used for temp structures */
320 context->info_pool = svn_pool_create(pool);
321 context->paths = svn_prefix_tree__create(context->info_pool);
326 /* Clean up / free all revision range specific data and files in CONTEXT.
327 * Use SCRATCH_POOL for temporary allocations.
330 reset_pack_context(pack_context_t *context,
331 apr_pool_t *scratch_pool)
333 apr_array_clear(context->changes);
334 SVN_ERR(svn_io_file_trunc(context->changes_file, 0, scratch_pool));
335 apr_array_clear(context->file_props);
336 SVN_ERR(svn_io_file_trunc(context->file_props_file, 0, scratch_pool));
337 apr_array_clear(context->dir_props);
338 SVN_ERR(svn_io_file_trunc(context->dir_props_file, 0, scratch_pool));
340 apr_array_clear(context->rev_offsets);
341 apr_array_clear(context->path_order);
342 apr_array_clear(context->references);
343 apr_array_clear(context->reps);
344 SVN_ERR(svn_io_file_trunc(context->reps_file, 0, scratch_pool));
346 svn_pool_clear(context->info_pool);
347 context->paths = svn_prefix_tree__create(context->info_pool);
352 /* Call this after the last revision range. It will finalize all index files
353 * for CONTEXT and close any open files.
354 * Use SCRATCH_POOL for temporary allocations.
357 close_pack_context(pack_context_t *context,
358 apr_pool_t *scratch_pool)
360 const char *proto_l2p_index_path;
361 const char *proto_p2l_index_path;
363 /* need the file names for the actual index creation call further down */
364 SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path,
365 context->proto_l2p_index, scratch_pool));
366 SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path,
367 context->proto_p2l_index, scratch_pool));
369 /* finalize proto index files */
370 SVN_ERR(svn_io_file_close(context->proto_l2p_index, scratch_pool));
371 SVN_ERR(svn_io_file_close(context->proto_p2l_index, scratch_pool));
373 /* Append the actual index data to the pack file. */
374 SVN_ERR(svn_fs_x__add_index_data(context->fs, context->pack_file,
375 proto_l2p_index_path,
376 proto_p2l_index_path,
380 /* remove proto index files */
381 SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, scratch_pool));
382 SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, scratch_pool));
387 /* Efficiently copy SIZE bytes from SOURCE to DEST. Invoke the CANCEL_FUNC
388 * from CONTEXT at regular intervals.
389 * Use SCRATCH_POOL for temporary allocations.
392 copy_file_data(pack_context_t *context,
396 apr_pool_t *scratch_pool)
398 /* most non-representation items will be small. Minimize the buffer
399 * and infrastructure overhead in that case. */
400 enum { STACK_BUFFER_SIZE = 1024 };
402 if (size < STACK_BUFFER_SIZE)
404 /* copy small data using a fixed-size buffer on stack */
405 char buffer[STACK_BUFFER_SIZE];
406 SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size,
407 NULL, NULL, scratch_pool));
408 SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size,
409 NULL, scratch_pool));
413 /* use streaming copies for larger data blocks. That may require
414 * the allocation of larger buffers and we should make sure that
415 * this extra memory is released asap. */
416 svn_fs_x__data_t *ffd = context->fs->fsap_data;
417 apr_pool_t *copypool = svn_pool_create(scratch_pool);
418 char *buffer = apr_palloc(copypool, ffd->block_size);
422 apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size));
423 if (context->cancel_func)
424 SVN_ERR(context->cancel_func(context->cancel_baton));
426 SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy,
427 NULL, NULL, scratch_pool));
428 SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy,
429 NULL, scratch_pool));
434 svn_pool_destroy(copypool);
440 /* Writes SIZE bytes, all 0, to DEST.
441 * Use SCRATCH_POOL for temporary allocations.
444 write_null_bytes(apr_file_t *dest,
446 apr_pool_t *scratch_pool)
448 /* Have a collection of high-quality, easy to access NUL bytes handy. */
449 enum { BUFFER_SIZE = 1024 };
450 static const char buffer[BUFFER_SIZE] = { 0 };
452 /* copy SIZE of them into the file's buffer */
455 apr_size_t to_write = MIN(size, BUFFER_SIZE);
456 SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL,
464 /* Copy the "simple" item (changed paths list or property representation)
465 * from the current position in REV_FILE to TEMP_FILE using CONTEXT. Add
466 * a copy of ENTRY to ENTRIES but with an updated offset value that points
467 * to the copy destination in TEMP_FILE.
468 * Use SCRATCH_POOL for temporary allocations.
471 copy_item_to_temp(pack_context_t *context,
472 apr_array_header_t *entries,
473 apr_file_t *temp_file,
474 svn_fs_x__revision_file_t *rev_file,
475 svn_fs_x__p2l_entry_t *entry,
476 apr_pool_t *scratch_pool)
479 svn_fs_x__p2l_entry_t *new_entry
480 = svn_fs_x__p2l_entry_dup(entry, context->info_pool);
482 SVN_ERR(svn_io_file_get_offset(&new_entry->offset, temp_file,
484 APR_ARRAY_PUSH(entries, svn_fs_x__p2l_entry_t *) = new_entry;
486 SVN_ERR(svn_fs_x__rev_file_get(&file, rev_file));
487 SVN_ERR(copy_file_data(context, temp_file, file, entry->size,
493 /* Return the offset within CONTEXT->REPS that corresponds to item
494 * ITEM_INDEX in REVISION.
497 get_item_array_index(pack_context_t *context,
498 svn_revnum_t revision,
499 apr_int64_t item_index)
501 assert(revision >= context->start_rev);
502 return (int)item_index + APR_ARRAY_IDX(context->rev_offsets,
503 revision - context->start_rev,
507 /* Write INFO to the correct position in CONTEXT->REPS. The latter may
508 * need auto-expanding. Overwriting an array element is not allowed.
511 add_item_rep_mapping(pack_context_t *context,
512 svn_fs_x__p2l_entry_t *entry)
515 assert(entry->item_count == 1);
518 idx = get_item_array_index(context,
519 entry->items[0].change_set,
520 entry->items[0].number);
522 /* make sure the index exists in the array */
523 while (context->reps->nelts <= idx)
524 APR_ARRAY_PUSH(context->reps, void *) = NULL;
526 /* set the element. If there is already an entry, there are probably
527 * two items claiming to be the same -> bail out */
528 assert(!APR_ARRAY_IDX(context->reps, idx, void *));
529 APR_ARRAY_IDX(context->reps, idx, void *) = entry;
532 /* Return the P2L entry from CONTEXT->REPS for the given ID. If there is
533 * none (or not anymore), return NULL. If RESET has been specified, set
534 * the array entry to NULL after returning the entry.
536 static svn_fs_x__p2l_entry_t *
537 get_item(pack_context_t *context,
538 const svn_fs_x__id_t *id,
541 svn_fs_x__p2l_entry_t *result = NULL;
542 svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
543 if (id->number && revision >= context->start_rev)
545 int idx = get_item_array_index(context, revision, id->number);
546 if (context->reps->nelts > idx)
548 result = APR_ARRAY_IDX(context->reps, idx, void *);
550 APR_ARRAY_IDX(context->reps, idx, void *) = NULL;
557 /* Copy representation item identified by ENTRY from the current position
558 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by
559 * our placement algorithm to CONTEXT.
560 * Use SCRATCH_POOL for temporary allocations.
563 copy_rep_to_temp(pack_context_t *context,
564 svn_fs_x__revision_file_t *rev_file,
565 svn_fs_x__p2l_entry_t *entry,
566 apr_pool_t *scratch_pool)
568 svn_fs_x__rep_header_t *rep_header;
569 svn_stream_t *stream;
571 apr_off_t source_offset = entry->offset;
573 /* create a copy of ENTRY, make it point to the copy destination and
574 * store it in CONTEXT */
575 entry = svn_fs_x__p2l_entry_dup(entry, context->info_pool);
576 SVN_ERR(svn_io_file_get_offset(&entry->offset, context->reps_file,
578 add_item_rep_mapping(context, entry);
580 /* read & parse the representation header */
581 SVN_ERR(svn_fs_x__rev_file_stream(&stream, rev_file));
582 SVN_ERR(svn_fs_x__read_rep_header(&rep_header, stream,
583 scratch_pool, scratch_pool));
585 /* if the representation is a delta against some other rep, link the two */
586 if ( rep_header->type == svn_fs_x__rep_delta
587 && rep_header->base_revision >= context->start_rev)
589 reference_t *reference = apr_pcalloc(context->info_pool,
591 reference->from = entry->items[0];
592 reference->to.change_set
593 = svn_fs_x__change_set_by_rev(rep_header->base_revision);
594 reference->to.number = rep_header->base_item_index;
595 APR_ARRAY_PUSH(context->references, reference_t *) = reference;
598 /* copy the whole rep (including header!) to our temp file */
599 SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, source_offset));
600 SVN_ERR(svn_fs_x__rev_file_get(&file, rev_file));
601 SVN_ERR(copy_file_data(context, context->reps_file, file, entry->size,
607 /* Directories first, dirs / files sorted by name in reverse lexical order.
608 * This maximizes the chance of two items being located close to one another
609 * in *all* pack files independent of their change order. It also groups
610 * multi-project repos nicely according to their sub-projects. The reverse
611 * order aspect gives "trunk" preference over "tags" and "branches", so
612 * trunk-related items are more likely to be contiguous.
615 compare_dir_entries(const svn_sort__item_t *a,
616 const svn_sort__item_t *b)
618 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
619 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
621 return strcmp(lhs->name, rhs->name);
625 svn_fs_x__order_dir_entries(svn_fs_t *fs,
626 apr_hash_t *directory,
627 apr_pool_t *result_pool,
628 apr_pool_t *scratch_pool)
630 apr_array_header_t *ordered
631 = svn_sort__hash(directory, compare_dir_entries, scratch_pool);
633 apr_array_header_t *result
634 = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *));
637 for (i = 0; i < ordered->nelts; ++i)
638 APR_ARRAY_PUSH(result, svn_fs_dirent_t *)
639 = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value;
644 /* Return a duplicate of the ORIGINAL path and with special sub-strings
645 * (e.g. "trunk") modified in such a way that have a lower lexicographic
646 * value than any other "normal" file name.
649 tweak_path_for_ordering(const char *original,
652 /* We may add further special cases as needed. */
653 enum {SPECIAL_COUNT = 2};
654 static const char *special[SPECIAL_COUNT] = {"trunk", "branch"};
656 char *path = apr_pstrdup(pool, original);
659 /* Replace the first char of any "special" sub-string we find by
660 * a control char, i.e. '\1' .. '\31'. In the rare event that this
661 * would clash with existing paths, no data will be lost but merely
662 * the node ordering will be sub-optimal.
664 for (i = 0; i < SPECIAL_COUNT; ++i)
665 for (pos = strstr(path, special[i]);
667 pos = strstr(pos + 1, special[i]))
669 *pos = (char)(i + '\1');
675 /* Copy node revision item identified by ENTRY from the current position
676 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by
677 * our placement algorithm to CONTEXT.
678 * Use SCRATCH_POOL for temporary allocations.
681 copy_node_to_temp(pack_context_t *context,
682 svn_fs_x__revision_file_t *rev_file,
683 svn_fs_x__p2l_entry_t *entry,
684 apr_pool_t *scratch_pool)
686 path_order_t *path_order = apr_pcalloc(context->info_pool,
687 sizeof(*path_order));
688 svn_fs_x__noderev_t *noderev;
689 svn_stream_t *stream;
691 const char *sort_path;
692 apr_off_t source_offset = entry->offset;
694 /* read & parse noderev */
695 SVN_ERR(svn_fs_x__rev_file_stream(&stream, rev_file));
696 SVN_ERR(svn_fs_x__read_noderev(&noderev, stream, scratch_pool,
699 /* create a copy of ENTRY, make it point to the copy destination and
700 * store it in CONTEXT */
701 entry = svn_fs_x__p2l_entry_dup(entry, context->info_pool);
702 SVN_ERR(svn_io_file_get_offset(&entry->offset, context->reps_file,
704 add_item_rep_mapping(context, entry);
706 /* copy the noderev to our temp file */
707 SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, source_offset));
708 SVN_ERR(svn_fs_x__rev_file_get(&file, rev_file));
709 SVN_ERR(copy_file_data(context, context->reps_file, file, entry->size,
712 /* if the node has a data representation, make that the node's "base".
713 * This will (often) cause the noderev to be placed right in front of
714 * its data representation. */
716 if (noderev->data_rep
717 && svn_fs_x__get_revnum(noderev->data_rep->id.change_set)
718 >= context->start_rev)
720 reference_t *reference = apr_pcalloc(context->info_pool,
722 reference->from = entry->items[0];
723 reference->to.change_set = noderev->data_rep->id.change_set;
724 reference->to.number = noderev->data_rep->id.number;
725 APR_ARRAY_PUSH(context->references, reference_t *) = reference;
727 path_order->rep_id = reference->to;
728 path_order->expanded_size = noderev->data_rep->expanded_size;
731 /* Sort path is the key used for ordering noderevs and associated reps.
732 * It will not be stored in the final pack file. */
733 sort_path = tweak_path_for_ordering(noderev->created_path, scratch_pool);
734 path_order->path = svn_prefix_string__create(context->paths, sort_path);
735 path_order->node_id = noderev->node_id;
736 path_order->revision = svn_fs_x__get_revnum(noderev->noderev_id.change_set);
737 path_order->noderev_id = noderev->noderev_id;
738 APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order;
743 /* implements compare_fn_t. Place LHS before RHS, if the latter is older.
746 compare_p2l_info(const svn_fs_x__p2l_entry_t * const * lhs,
747 const svn_fs_x__p2l_entry_t * const * rhs)
749 assert(*lhs != *rhs);
750 if ((*lhs)->item_count == 0)
751 return (*lhs)->item_count == 0 ? 0 : -1;
752 if ((*lhs)->item_count == 0)
755 if ((*lhs)->items[0].change_set == (*rhs)->items[0].change_set)
756 return (*lhs)->items[0].number > (*rhs)->items[0].number ? -1 : 1;
758 return (*lhs)->items[0].change_set > (*rhs)->items[0].change_set ? -1 : 1;
761 /* Sort svn_fs_x__p2l_entry_t * array ENTRIES by age. Place the latest
765 sort_items(apr_array_header_t *entries)
767 svn_sort__array(entries,
768 (int (*)(const void *, const void *))compare_p2l_info);
771 /* implements compare_fn_t. Sort descending by PATH, NODE_ID and REVISION.
774 compare_path_order(const path_order_t * const * lhs_p,
775 const path_order_t * const * rhs_p)
777 const path_order_t * lhs = *lhs_p;
778 const path_order_t * rhs = *rhs_p;
780 /* lexicographic order on path and node (i.e. latest first) */
781 int diff = svn_prefix_string__compare(lhs->path, rhs->path);
785 /* reverse order on node (i.e. latest first) */
786 diff = svn_fs_x__id_compare(&rhs->node_id, &lhs->node_id);
790 /* reverse order on revision (i.e. latest first) */
791 if (lhs->revision != rhs->revision)
792 return lhs->revision < rhs->revision ? 1 : -1;
797 /* implements compare_fn_t. Sort ascending by TO, FROM.
800 compare_references(const reference_t * const * lhs_p,
801 const reference_t * const * rhs_p)
803 const reference_t * lhs = *lhs_p;
804 const reference_t * rhs = *rhs_p;
806 int diff = svn_fs_x__id_compare(&lhs->to, &rhs->to);
807 return diff ? diff : svn_fs_x__id_compare(&lhs->from, &rhs->from);
810 /* Order the data collected in CONTEXT such that we can place them in the
814 sort_reps(pack_context_t *context)
816 svn_sort__array(context->path_order,
817 (int (*)(const void *, const void *))compare_path_order);
818 svn_sort__array(context->references,
819 (int (*)(const void *, const void *))compare_references);
822 /* Return the remaining unused bytes in the current block in CONTEXT's
826 get_block_left(pack_context_t *context)
828 svn_fs_x__data_t *ffd = context->fs->fsap_data;
829 return ffd->block_size - (context->pack_offset % ffd->block_size);
832 /* To prevent items from overlapping a block boundary, we will usually
833 * put them into the next block and top up the old one with NUL bytes.
834 * Pad CONTEXT's pack file to the end of the current block, if that padding
835 * is short enough. Use SCRATCH_POOL for temporary allocations.
838 auto_pad_block(pack_context_t *context,
839 apr_pool_t *scratch_pool)
841 svn_fs_x__data_t *ffd = context->fs->fsap_data;
843 /* This is the maximum number of bytes "wasted" that way per block.
844 * Larger items will cross the block boundaries. */
845 const apr_off_t max_padding = MAX(ffd->block_size / 50, 512);
847 /* Is wasted space small enough to align the current item to the next
849 apr_off_t padding = get_block_left(context);
851 if (padding < max_padding)
853 /* Yes. To up with NUL bytes and don't forget to create
854 * an P2L index entry marking this section as unused. */
855 svn_fs_x__p2l_entry_t null_entry;
857 null_entry.offset = context->pack_offset;
858 null_entry.size = padding;
859 null_entry.type = SVN_FS_X__ITEM_TYPE_UNUSED;
860 null_entry.fnv1_checksum = 0;
861 null_entry.item_count = 0;
862 null_entry.items = NULL;
864 SVN_ERR(write_null_bytes(context->pack_file, padding, scratch_pool));
865 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
866 (context->proto_p2l_index, &null_entry, scratch_pool));
867 context->pack_offset += padding;
873 /* Return the index of the first entry in CONTEXT->REFERENCES that
874 * references ITEM->ITEMS[0] if such entries exist. All matching items
875 * will be consecutive.
878 find_first_reference(pack_context_t *context,
879 svn_fs_x__p2l_entry_t *item)
882 int upper = context->references->nelts - 1;
884 while (lower <= upper)
886 int current = lower + (upper - lower) / 2;
887 reference_t *reference
888 = APR_ARRAY_IDX(context->references, current, reference_t *);
890 if (svn_fs_x__id_compare(&reference->to, item->items) < 0)
899 /* Check whether entry number IDX in CONTEXT->REFERENCES references ITEM.
902 is_reference_match(pack_context_t *context,
904 svn_fs_x__p2l_entry_t *item)
906 reference_t *reference;
907 if (context->references->nelts <= idx)
910 reference = APR_ARRAY_IDX(context->references, idx, reference_t *);
911 return svn_fs_x__id_eq(&reference->to, item->items);
914 /* Starting at IDX in CONTEXT->PATH_ORDER, select all representations and
915 * noderevs that should be placed into the same container, respectively.
916 * Append the path_order_t * elements encountered in SELECTED, the
917 * svn_fs_x__p2l_entry_t * of the representations that should be placed
918 * into the same reps container will be appended to REP_PARTS and the
919 * svn_fs_x__p2l_entry_t * of the noderevs referencing those reps will
920 * be appended to NODE_PARTS.
922 * Remove all returned items from the CONTEXT->REPS container and prevent
923 * them from being placed a second time later on. That also means that the
924 * caller has to place all items returned.
927 select_reps(pack_context_t *context,
929 apr_array_header_t *selected,
930 apr_array_header_t *node_parts,
931 apr_array_header_t *rep_parts)
933 apr_array_header_t *path_order = context->path_order;
934 path_order_t *start_path = APR_ARRAY_IDX(path_order, idx, path_order_t *);
936 svn_fs_x__p2l_entry_t *node_part;
937 svn_fs_x__p2l_entry_t *rep_part;
938 svn_fs_x__p2l_entry_t *depending;
941 /* collect all path_order records as well as rep and noderev items
942 * that occupy the same path with the same node. */
943 for (; idx < path_order->nelts; ++idx)
945 path_order_t *current_path
946 = APR_ARRAY_IDX(path_order, idx, path_order_t *);
948 if (!svn_fs_x__id_eq(&start_path->node_id, ¤t_path->node_id))
951 APR_ARRAY_IDX(path_order, idx, path_order_t *) = NULL;
952 node_part = get_item(context, ¤t_path->noderev_id, TRUE);
953 rep_part = get_item(context, ¤t_path->rep_id, TRUE);
955 if (node_part && rep_part)
956 APR_ARRAY_PUSH(selected, path_order_t *) = current_path;
959 APR_ARRAY_PUSH(node_parts, svn_fs_x__p2l_entry_t *) = node_part;
961 APR_ARRAY_PUSH(rep_parts, svn_fs_x__p2l_entry_t *) = rep_part;
964 /* collect depending reps and noderevs that reference any of the collected
966 for (i = 0; i < rep_parts->nelts; ++i)
968 rep_part = APR_ARRAY_IDX(rep_parts, i, svn_fs_x__p2l_entry_t*);
969 for (k = find_first_reference(context, rep_part);
970 is_reference_match(context, k, rep_part);
973 reference_t *reference
974 = APR_ARRAY_IDX(context->references, k, reference_t *);
976 depending = get_item(context, &reference->from, TRUE);
980 if (depending->type == SVN_FS_X__ITEM_TYPE_NODEREV)
981 APR_ARRAY_PUSH(node_parts, svn_fs_x__p2l_entry_t *) = depending;
983 APR_ARRAY_PUSH(rep_parts, svn_fs_x__p2l_entry_t *) = depending;
990 /* Return TRUE, if all path_order_t * in SELECTED reference contents that is
991 * not longer than LIMIT.
994 reps_fit_into_containers(apr_array_header_t *selected,
998 for (i = 0; i < selected->nelts; ++i)
999 if (APR_ARRAY_IDX(selected, i, path_order_t *)->expanded_size > limit)
1005 /* Write the *CONTAINER containing the noderevs described by the
1006 * svn_fs_x__p2l_entry_t * in ITEMS to the pack file on CONTEXT.
1007 * Append a P2L entry for the container to CONTAINER->REPS.
1008 * Afterwards, clear ITEMS and re-allocate *CONTAINER in CONTAINER_POOL
1009 * so the caller may fill them again.
1010 * Use SCRATCH_POOL for temporary allocations.
1012 static svn_error_t *
1013 write_nodes_container(pack_context_t *context,
1014 svn_fs_x__noderevs_t **container,
1015 apr_array_header_t *items,
1016 apr_pool_t *container_pool,
1017 apr_pool_t *scratch_pool)
1020 apr_off_t offset = 0;
1021 svn_fs_x__p2l_entry_t *container_entry;
1022 svn_stream_t *pack_stream;
1024 if (items->nelts == 0)
1025 return SVN_NO_ERROR;
1027 /* serialize container */
1028 container_entry = apr_palloc(context->info_pool, sizeof(*container_entry));
1029 pack_stream = svn_checksum__wrap_write_stream_fnv1a_32x4
1030 (&container_entry->fnv1_checksum,
1031 svn_stream_from_aprfile2(context->pack_file,
1032 TRUE, scratch_pool),
1034 SVN_ERR(svn_fs_x__write_noderevs_container(pack_stream, *container,
1036 SVN_ERR(svn_stream_close(pack_stream));
1037 SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset,
1040 /* replace first noderev item in ENTRIES with the container
1041 and set all others to NULL */
1042 container_entry->offset = context->pack_offset;
1043 container_entry->size = offset - container_entry->offset;
1044 container_entry->type = SVN_FS_X__ITEM_TYPE_NODEREVS_CONT;
1045 container_entry->item_count = items->nelts;
1046 container_entry->items = apr_palloc(context->info_pool,
1047 sizeof(svn_fs_x__id_t) * container_entry->item_count);
1049 for (i = 0; i < items->nelts; ++i)
1050 container_entry->items[i]
1051 = APR_ARRAY_IDX(items, i, svn_fs_x__p2l_entry_t *)->items[0];
1053 context->pack_offset = offset;
1054 APR_ARRAY_PUSH(context->reps, svn_fs_x__p2l_entry_t *)
1057 /* Write P2L index for copied items, i.e. the 1 container */
1058 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1059 (context->proto_p2l_index, container_entry, scratch_pool));
1061 svn_pool_clear(container_pool);
1062 *container = svn_fs_x__noderevs_create(16, container_pool);
1063 apr_array_clear(items);
1065 return SVN_NO_ERROR;
1068 /* Read the noderevs given by the svn_fs_x__p2l_entry_t * in NODE_PARTS
1069 * from TEMP_FILE and add them to *CONTAINER and NODES_IN_CONTAINER.
1070 * Whenever the container grows bigger than the current block in CONTEXT,
1071 * write the data to disk and continue in the next block.
1073 * Use CONTAINER_POOL to re-allocate the *CONTAINER as necessary and
1074 * SCRATCH_POOL to temporary allocations.
1076 static svn_error_t *
1077 store_nodes(pack_context_t *context,
1078 apr_file_t *temp_file,
1079 apr_array_header_t *node_parts,
1080 svn_fs_x__noderevs_t **container,
1081 apr_array_header_t *nodes_in_container,
1082 apr_pool_t *container_pool,
1083 apr_pool_t *scratch_pool)
1087 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1088 svn_stream_t *stream
1089 = svn_stream_from_aprfile2(temp_file, TRUE, scratch_pool);
1091 /* number of bytes in the current block not being spent on fixed-size
1092 items (i.e. those not put into the container). */
1093 apr_size_t capacity_left = get_block_left(context);
1095 /* Estimated noderev container size */
1096 apr_size_t last_container_size = 0, container_size = 0;
1098 /* Estimate extra capacity we will gain from container compression. */
1099 apr_size_t pack_savings = 0;
1100 for (i = 0; i < node_parts->nelts; ++i)
1102 svn_fs_x__noderev_t *noderev;
1103 svn_fs_x__p2l_entry_t *entry
1104 = APR_ARRAY_IDX(node_parts, i, svn_fs_x__p2l_entry_t *);
1106 /* if we reached the limit, check whether we saved some space
1107 through the container. */
1108 if (capacity_left + pack_savings < container_size + entry->size)
1109 container_size = svn_fs_x__noderevs_estimate_size(*container);
1111 /* If necessary and the container is large enough, try harder
1112 by actually serializing the container and determine current
1113 savings due to compression. */
1114 if ( capacity_left + pack_savings < container_size + entry->size
1115 && container_size > last_container_size + 2000)
1117 svn_stringbuf_t *serialized
1118 = svn_stringbuf_create_ensure(container_size, iterpool);
1119 svn_stream_t *temp_stream
1120 = svn_stream_from_stringbuf(serialized, iterpool);
1122 SVN_ERR(svn_fs_x__write_noderevs_container(temp_stream, *container,
1124 SVN_ERR(svn_stream_close(temp_stream));
1126 last_container_size = container_size;
1127 pack_savings = container_size - serialized->len;
1130 /* still doesn't fit? -> block is full. Flush */
1131 if ( capacity_left + pack_savings < container_size + entry->size
1132 && nodes_in_container->nelts < 2)
1134 SVN_ERR(auto_pad_block(context, iterpool));
1135 capacity_left = get_block_left(context);
1138 /* still doesn't fit? -> block is full. Flush */
1139 if (capacity_left + pack_savings < container_size + entry->size)
1141 SVN_ERR(write_nodes_container(context, container,
1142 nodes_in_container, container_pool,
1145 capacity_left = get_block_left(context);
1150 /* item will fit into the block. */
1151 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset, iterpool));
1152 SVN_ERR(svn_fs_x__read_noderev(&noderev, stream, iterpool, iterpool));
1153 svn_fs_x__noderevs_add(*container, noderev);
1155 container_size += entry->size;
1156 APR_ARRAY_PUSH(nodes_in_container, svn_fs_x__p2l_entry_t *) = entry;
1158 svn_pool_clear(iterpool);
1161 svn_pool_destroy(iterpool);
1163 return SVN_NO_ERROR;
1167 /* Finalize CONTAINER and write it to CONTEXT's pack file.
1168 * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES.
1169 * Use SCRATCH_POOL for temporary allocations.
1171 static svn_error_t *
1172 write_reps_container(pack_context_t *context,
1173 svn_fs_x__reps_builder_t *container,
1174 apr_array_header_t *sub_items,
1175 apr_array_header_t *new_entries,
1176 apr_pool_t *scratch_pool)
1178 apr_off_t offset = 0;
1179 svn_fs_x__p2l_entry_t container_entry;
1181 svn_stream_t *pack_stream
1182 = svn_checksum__wrap_write_stream_fnv1a_32x4
1183 (&container_entry.fnv1_checksum,
1184 svn_stream_from_aprfile2(context->pack_file,
1185 TRUE, scratch_pool),
1188 SVN_ERR(svn_fs_x__write_reps_container(pack_stream, container,
1190 SVN_ERR(svn_stream_close(pack_stream));
1191 SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset,
1194 container_entry.offset = context->pack_offset;
1195 container_entry.size = offset - container_entry.offset;
1196 container_entry.type = SVN_FS_X__ITEM_TYPE_REPS_CONT;
1197 container_entry.item_count = sub_items->nelts;
1198 container_entry.items = (svn_fs_x__id_t *)sub_items->elts;
1200 context->pack_offset = offset;
1201 APR_ARRAY_PUSH(new_entries, svn_fs_x__p2l_entry_t *)
1202 = svn_fs_x__p2l_entry_dup(&container_entry, context->info_pool);
1204 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1205 (context->proto_p2l_index, &container_entry, scratch_pool));
1207 return SVN_NO_ERROR;
1210 /* Read the (property) representations identified by svn_fs_x__p2l_entry_t
1211 * elements in ENTRIES from TEMP_FILE, aggregate them and write them into
1212 * CONTEXT->PACK_FILE. Use SCRATCH_POOL for temporary allocations.
1214 static svn_error_t *
1215 write_reps_containers(pack_context_t *context,
1216 apr_array_header_t *entries,
1217 apr_file_t *temp_file,
1218 apr_array_header_t *new_entries,
1219 apr_pool_t *scratch_pool)
1221 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1222 apr_pool_t *container_pool = svn_pool_create(scratch_pool);
1225 apr_ssize_t block_left = get_block_left(context);
1227 svn_fs_x__reps_builder_t *container
1228 = svn_fs_x__reps_builder_create(context->fs, container_pool);
1229 apr_array_header_t *sub_items
1230 = apr_array_make(scratch_pool, 64, sizeof(svn_fs_x__id_t));
1231 svn_fs_x__revision_file_t *file;
1233 SVN_ERR(svn_fs_x__rev_file_wrap_temp(&file, context->fs, temp_file,
1236 /* copy all items in strict order */
1237 for (i = entries->nelts-1; i >= 0; --i)
1239 svn_fs_x__representation_t representation = { 0 };
1240 svn_stringbuf_t *contents;
1241 svn_stream_t *stream;
1242 apr_size_t list_index;
1243 svn_fs_x__p2l_entry_t *entry
1244 = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
1246 if ((block_left < entry->size) && sub_items->nelts)
1248 block_left = get_block_left(context)
1249 - svn_fs_x__reps_estimate_size(container);
1252 if ((block_left < entry->size) && sub_items->nelts)
1254 SVN_ERR(write_reps_container(context, container, sub_items,
1255 new_entries, iterpool));
1257 apr_array_clear(sub_items);
1258 svn_pool_clear(container_pool);
1259 container = svn_fs_x__reps_builder_create(context->fs,
1261 block_left = get_block_left(context);
1264 /* still enough space in current block? */
1265 if (block_left < entry->size)
1267 SVN_ERR(auto_pad_block(context, iterpool));
1268 block_left = get_block_left(context);
1271 assert(entry->item_count == 1);
1272 representation.id = entry->items[0];
1274 /* select the change list in the source file, parse it and add it to
1276 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset,
1278 SVN_ERR(svn_fs_x__get_representation_length(&representation.size,
1279 &representation.expanded_size,
1282 SVN_ERR(svn_fs_x__get_contents(&stream, context->fs, &representation,
1284 contents = svn_stringbuf_create_ensure(representation.expanded_size,
1286 contents->len = representation.expanded_size;
1288 /* The representation is immutable. Read it normally. */
1289 SVN_ERR(svn_stream_read_full(stream, contents->data, &contents->len));
1290 SVN_ERR(svn_stream_close(stream));
1292 SVN_ERR(svn_fs_x__reps_add(&list_index, container,
1293 svn_stringbuf__morph_into_string(contents)));
1294 SVN_ERR_ASSERT(list_index == sub_items->nelts);
1295 block_left -= entry->size;
1297 APR_ARRAY_PUSH(sub_items, svn_fs_x__id_t) = entry->items[0];
1299 svn_pool_clear(iterpool);
1302 if (sub_items->nelts)
1303 SVN_ERR(write_reps_container(context, container, sub_items,
1304 new_entries, iterpool));
1306 svn_pool_destroy(iterpool);
1307 svn_pool_destroy(container_pool);
1309 return SVN_NO_ERROR;
1312 /* Return TRUE if the estimated size of the NODES_IN_CONTAINER plus the
1313 * representations given as svn_fs_x__p2l_entry_t * in ENTRIES may exceed
1314 * the space left in the current block.
1316 static svn_boolean_t
1317 should_flush_nodes_container(pack_context_t *context,
1318 svn_fs_x__noderevs_t *nodes_container,
1319 apr_array_header_t *entries)
1321 apr_ssize_t block_left = get_block_left(context);
1322 apr_ssize_t rep_sum = 0;
1323 apr_ssize_t container_size
1324 = svn_fs_x__noderevs_estimate_size(nodes_container);
1327 for (i = 0; i < entries->nelts; ++i)
1329 svn_fs_x__p2l_entry_t *entry
1330 = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
1331 rep_sum += entry->size;
1334 return block_left < rep_sum + container_size;
1337 /* Read the contents of the first COUNT non-NULL, non-empty items in ITEMS
1338 * from TEMP_FILE and write them to CONTEXT->PACK_FILE.
1339 * Use SCRATCH_POOL for temporary allocations.
1341 static svn_error_t *
1342 store_items(pack_context_t *context,
1343 apr_file_t *temp_file,
1344 apr_array_header_t *items,
1346 apr_pool_t *scratch_pool)
1349 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1351 /* copy all items in strict order */
1352 for (i = 0; i < count; ++i)
1354 svn_fs_x__p2l_entry_t *entry
1355 = APR_ARRAY_IDX(items, i, svn_fs_x__p2l_entry_t *);
1357 || entry->type == SVN_FS_X__ITEM_TYPE_UNUSED
1358 || entry->item_count == 0)
1361 /* select the item in the source file and copy it into the target
1363 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset,
1365 SVN_ERR(copy_file_data(context, context->pack_file, temp_file,
1366 entry->size, iterpool));
1368 /* write index entry and update current position */
1369 entry->offset = context->pack_offset;
1370 context->pack_offset += entry->size;
1372 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1373 (context->proto_p2l_index, entry, iterpool));
1375 APR_ARRAY_PUSH(context->reps, svn_fs_x__p2l_entry_t *) = entry;
1376 svn_pool_clear(iterpool);
1379 svn_pool_destroy(iterpool);
1381 return SVN_NO_ERROR;
1384 /* Copy (append) the items identified by svn_fs_x__p2l_entry_t * elements
1385 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE.
1386 * Use SCRATCH_POOL for temporary allocations.
1388 static svn_error_t *
1389 copy_reps_from_temp(pack_context_t *context,
1390 apr_file_t *temp_file,
1391 apr_pool_t *scratch_pool)
1393 svn_fs_x__data_t *ffd = context->fs->fsap_data;
1395 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1396 apr_pool_t *container_pool = svn_pool_create(scratch_pool);
1397 apr_array_header_t *path_order = context->path_order;
1398 apr_array_header_t *reps = context->reps;
1399 apr_array_header_t *selected = apr_array_make(scratch_pool, 16,
1400 path_order->elt_size);
1401 apr_array_header_t *node_parts = apr_array_make(scratch_pool, 16,
1403 apr_array_header_t *rep_parts = apr_array_make(scratch_pool, 16,
1405 apr_array_header_t *nodes_in_container = apr_array_make(scratch_pool, 16,
1408 int initial_reps_count = reps->nelts;
1410 /* 1 container for all noderevs in the current block. We will try to
1411 * not write it to disk until the current block fills up, i.e. aim for
1412 * a single noderevs container per block. */
1413 svn_fs_x__noderevs_t *nodes_container
1414 = svn_fs_x__noderevs_create(16, container_pool);
1416 /* copy items in path order. Create block-sized containers. */
1417 for (i = 0; i < path_order->nelts; ++i)
1419 if (APR_ARRAY_IDX(path_order, i, path_order_t *) == NULL)
1422 /* Collect reps to combine and all noderevs referencing them */
1423 SVN_ERR(select_reps(context, i, selected, node_parts, rep_parts));
1425 /* store the noderevs container in front of the reps */
1426 SVN_ERR(store_nodes(context, temp_file, node_parts, &nodes_container,
1427 nodes_in_container, container_pool, iterpool));
1429 /* actually flush the noderevs to disk if the reps container is likely
1430 * to fill the block, i.e. no further noderevs will be added to the
1431 * nodes container. */
1432 if (should_flush_nodes_container(context, nodes_container, node_parts))
1433 SVN_ERR(write_nodes_container(context, &nodes_container,
1434 nodes_in_container, container_pool,
1437 /* if all reps are short enough put them into one container.
1438 * Otherwise, just store all containers here. */
1439 if (reps_fit_into_containers(selected, 2 * ffd->block_size))
1440 SVN_ERR(write_reps_containers(context, rep_parts, temp_file,
1441 context->reps, iterpool));
1443 SVN_ERR(store_items(context, temp_file, rep_parts, rep_parts->nelts,
1446 /* processed all items */
1447 apr_array_clear(selected);
1448 apr_array_clear(node_parts);
1449 apr_array_clear(rep_parts);
1451 svn_pool_clear(iterpool);
1454 /* flush noderevs container to disk */
1455 if (nodes_in_container->nelts)
1456 SVN_ERR(write_nodes_container(context, &nodes_container,
1457 nodes_in_container, container_pool,
1460 /* copy all items in strict order */
1461 SVN_ERR(store_items(context, temp_file, reps, initial_reps_count,
1464 /* vaccum ENTRIES array: eliminate NULL entries */
1465 for (i = 0, k = 0; i < reps->nelts; ++i)
1467 svn_fs_x__p2l_entry_t *entry
1468 = APR_ARRAY_IDX(reps, i, svn_fs_x__p2l_entry_t *);
1471 APR_ARRAY_IDX(reps, k, svn_fs_x__p2l_entry_t *) = entry;
1477 svn_pool_destroy(iterpool);
1478 svn_pool_destroy(container_pool);
1480 return SVN_NO_ERROR;
1483 /* Finalize CONTAINER and write it to CONTEXT's pack file.
1484 * Append an P2L entry containing the given SUB_ITEMS to NEW_ENTRIES.
1485 * Use SCRATCH_POOL for temporary allocations.
1487 static svn_error_t *
1488 write_changes_container(pack_context_t *context,
1489 svn_fs_x__changes_t *container,
1490 apr_array_header_t *sub_items,
1491 apr_array_header_t *new_entries,
1492 apr_pool_t *scratch_pool)
1494 apr_off_t offset = 0;
1495 svn_fs_x__p2l_entry_t container_entry;
1497 svn_stream_t *pack_stream
1498 = svn_checksum__wrap_write_stream_fnv1a_32x4
1499 (&container_entry.fnv1_checksum,
1500 svn_stream_from_aprfile2(context->pack_file,
1501 TRUE, scratch_pool),
1504 SVN_ERR(svn_fs_x__write_changes_container(pack_stream,
1507 SVN_ERR(svn_stream_close(pack_stream));
1508 SVN_ERR(svn_io_file_seek(context->pack_file, APR_CUR, &offset,
1511 container_entry.offset = context->pack_offset;
1512 container_entry.size = offset - container_entry.offset;
1513 container_entry.type = SVN_FS_X__ITEM_TYPE_CHANGES_CONT;
1514 container_entry.item_count = sub_items->nelts;
1515 container_entry.items = (svn_fs_x__id_t *)sub_items->elts;
1517 context->pack_offset = offset;
1518 APR_ARRAY_PUSH(new_entries, svn_fs_x__p2l_entry_t *)
1519 = svn_fs_x__p2l_entry_dup(&container_entry, context->info_pool);
1521 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1522 (context->proto_p2l_index, &container_entry, scratch_pool));
1524 return SVN_NO_ERROR;
1527 /* Read the change lists identified by svn_fs_x__p2l_entry_t * elements
1528 * in ENTRIES strictly in from TEMP_FILE, aggregate them and write them
1529 * into CONTEXT->PACK_FILE. Use SCRATCH_POOL for temporary allocations.
1531 static svn_error_t *
1532 write_changes_containers(pack_context_t *context,
1533 apr_array_header_t *entries,
1534 apr_file_t *temp_file,
1535 apr_pool_t *scratch_pool)
1537 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1538 apr_pool_t *container_pool = svn_pool_create(scratch_pool);
1541 apr_ssize_t block_left = get_block_left(context);
1542 apr_ssize_t estimated_addition = 0;
1544 svn_fs_x__changes_t *container
1545 = svn_fs_x__changes_create(1000, container_pool);
1546 apr_array_header_t *sub_items
1547 = apr_array_make(scratch_pool, 64, sizeof(svn_fs_x__id_t));
1548 apr_array_header_t *new_entries
1549 = apr_array_make(context->info_pool, 16, entries->elt_size);
1550 svn_stream_t *temp_stream
1551 = svn_stream_from_aprfile2(temp_file, TRUE, scratch_pool);
1553 /* copy all items in strict order */
1554 for (i = entries->nelts-1; i >= 0; --i)
1556 apr_array_header_t *changes;
1557 apr_size_t list_index;
1558 svn_fs_x__p2l_entry_t *entry
1559 = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
1561 /* zip compression alone will significantly reduce the size of large
1562 * change lists. So, we will probably need even less than this estimate.
1564 apr_ssize_t estimated_size = (entry->size / 5) + 250;
1566 /* If necessary and enough data has been added to the container since
1567 * the last test, try harder by actually serializing the container and
1568 * determine current savings due to compression. */
1569 if (block_left < estimated_size && estimated_addition > 2000)
1571 svn_stringbuf_t *serialized
1572 = svn_stringbuf_create_ensure(get_block_left(context), iterpool);
1573 svn_stream_t *memory_stream
1574 = svn_stream_from_stringbuf(serialized, iterpool);
1576 SVN_ERR(svn_fs_x__write_changes_container(memory_stream,
1577 container, iterpool));
1578 SVN_ERR(svn_stream_close(temp_stream));
1580 block_left = get_block_left(context) - serialized->len;
1581 estimated_addition = 0;
1584 if ((block_left < estimated_size) && sub_items->nelts)
1586 SVN_ERR(write_changes_container(context, container, sub_items,
1587 new_entries, iterpool));
1589 apr_array_clear(sub_items);
1590 svn_pool_clear(container_pool);
1591 container = svn_fs_x__changes_create(1000, container_pool);
1592 block_left = get_block_left(context);
1593 estimated_addition = 0;
1596 /* still enough space in current block? */
1597 if (block_left < estimated_size)
1599 SVN_ERR(auto_pad_block(context, iterpool));
1600 block_left = get_block_left(context);
1603 /* select the change list in the source file, parse it and add it to
1605 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &entry->offset,
1607 SVN_ERR(svn_fs_x__read_changes(&changes, temp_stream, INT_MAX,
1608 scratch_pool, iterpool));
1609 SVN_ERR(svn_fs_x__changes_append_list(&list_index, container, changes));
1610 SVN_ERR_ASSERT(list_index == sub_items->nelts);
1611 block_left -= estimated_size;
1612 estimated_addition += estimated_size;
1614 APR_ARRAY_PUSH(sub_items, svn_fs_x__id_t) = entry->items[0];
1616 svn_pool_clear(iterpool);
1619 if (sub_items->nelts)
1620 SVN_ERR(write_changes_container(context, container, sub_items,
1621 new_entries, iterpool));
1623 *entries = *new_entries;
1624 svn_pool_destroy(iterpool);
1625 svn_pool_destroy(container_pool);
1627 return SVN_NO_ERROR;
1630 /* Read the (property) representations identified by svn_fs_x__p2l_entry_t
1631 * elements in ENTRIES from TEMP_FILE, aggregate them and write them into
1632 * CONTEXT->PACK_FILE. Use SCRATCH_POOL for temporary allocations.
1634 static svn_error_t *
1635 write_property_containers(pack_context_t *context,
1636 apr_array_header_t *entries,
1637 apr_file_t *temp_file,
1638 apr_pool_t *scratch_pool)
1640 apr_array_header_t *new_entries
1641 = apr_array_make(context->info_pool, 16, entries->elt_size);
1643 SVN_ERR(write_reps_containers(context, entries, temp_file, new_entries,
1646 *entries = *new_entries;
1648 return SVN_NO_ERROR;
1651 /* Append all entries of svn_fs_x__p2l_entry_t * array TO_APPEND to
1652 * svn_fs_x__p2l_entry_t * array DEST.
1655 append_entries(apr_array_header_t *dest,
1656 apr_array_header_t *to_append)
1659 for (i = 0; i < to_append->nelts; ++i)
1660 APR_ARRAY_PUSH(dest, svn_fs_x__p2l_entry_t *)
1661 = APR_ARRAY_IDX(to_append, i, svn_fs_x__p2l_entry_t *);
1664 /* Write the log-to-phys proto index file for CONTEXT and use POOL for
1665 * temporary allocations. All items in all buckets must have been placed
1668 static svn_error_t *
1669 write_l2p_index(pack_context_t *context,
1672 apr_pool_t *scratch_pool = svn_pool_create(pool);
1673 const char *temp_name;
1674 const char *proto_index;
1675 apr_off_t offset = 0;
1677 /* lump all items into one bucket. As target, use the bucket that
1678 * probably has the most entries already. */
1679 append_entries(context->reps, context->changes);
1680 append_entries(context->reps, context->file_props);
1681 append_entries(context->reps, context->dir_props);
1683 /* Let the index code do the expensive L2P -> P2L transformation. */
1684 SVN_ERR(svn_fs_x__l2p_index_from_p2l_entries(&temp_name,
1687 pool, scratch_pool));
1689 /* Append newly written segment to exisiting proto index file. */
1690 SVN_ERR(svn_io_file_name_get(&proto_index, context->proto_l2p_index,
1693 SVN_ERR(svn_io_file_flush(context->proto_l2p_index, scratch_pool));
1694 SVN_ERR(svn_io_append_file(temp_name, proto_index, scratch_pool));
1695 SVN_ERR(svn_io_remove_file2(temp_name, FALSE, scratch_pool));
1696 SVN_ERR(svn_io_file_seek(context->proto_l2p_index, APR_END, &offset,
1700 svn_pool_destroy(scratch_pool);
1702 return SVN_NO_ERROR;
1705 /* Pack the current revision range of CONTEXT, i.e. this covers phases 2
1706 * to 4. Use SCRATCH_POOL for temporary allocations.
1708 static svn_error_t *
1709 pack_range(pack_context_t *context,
1710 apr_pool_t *scratch_pool)
1712 svn_fs_x__data_t *ffd = context->fs->fsap_data;
1713 apr_pool_t *revpool = svn_pool_create(scratch_pool);
1714 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1716 /* Phase 2: Copy items into various buckets and build tracking info */
1717 svn_revnum_t revision;
1718 for (revision = context->start_rev; revision < context->end_rev; ++revision)
1720 apr_off_t offset = 0;
1721 svn_fs_x__revision_file_t *rev_file;
1722 svn_fs_x__index_info_t l2p_index_info;
1724 /* Get the rev file dimensions (mainly index locations). */
1725 SVN_ERR(svn_fs_x__rev_file_init(&rev_file, context->fs, revision,
1727 SVN_ERR(svn_fs_x__rev_file_l2p_info(&l2p_index_info, rev_file));
1729 /* store the indirect array index */
1730 APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts;
1732 /* read the phys-to-log index file until we covered the whole rev file.
1733 * That index contains enough info to build both target indexes from it. */
1734 while (offset < l2p_index_info.start)
1736 /* read one cluster */
1738 apr_array_header_t *entries;
1739 svn_pool_clear(iterpool);
1741 SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, context->fs,
1742 rev_file, revision, offset,
1743 ffd->p2l_page_size, iterpool,
1746 for (i = 0; i < entries->nelts; ++i)
1748 svn_fs_x__p2l_entry_t *entry
1749 = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t);
1751 /* skip first entry if that was duplicated due crossing a
1753 if (offset > entry->offset)
1756 /* process entry while inside the rev file */
1757 offset = entry->offset;
1758 if (offset < l2p_index_info.start)
1760 SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, offset));
1762 if (entry->type == SVN_FS_X__ITEM_TYPE_CHANGES)
1763 SVN_ERR(copy_item_to_temp(context,
1765 context->changes_file,
1766 rev_file, entry, iterpool));
1767 else if (entry->type == SVN_FS_X__ITEM_TYPE_FILE_PROPS)
1768 SVN_ERR(copy_item_to_temp(context,
1769 context->file_props,
1770 context->file_props_file,
1771 rev_file, entry, iterpool));
1772 else if (entry->type == SVN_FS_X__ITEM_TYPE_DIR_PROPS)
1773 SVN_ERR(copy_item_to_temp(context,
1775 context->dir_props_file,
1776 rev_file, entry, iterpool));
1777 else if ( entry->type == SVN_FS_X__ITEM_TYPE_FILE_REP
1778 || entry->type == SVN_FS_X__ITEM_TYPE_DIR_REP)
1779 SVN_ERR(copy_rep_to_temp(context, rev_file, entry,
1781 else if (entry->type == SVN_FS_X__ITEM_TYPE_NODEREV)
1782 SVN_ERR(copy_node_to_temp(context, rev_file, entry,
1785 SVN_ERR_ASSERT(entry->type == SVN_FS_X__ITEM_TYPE_UNUSED);
1787 offset += entry->size;
1791 if (context->cancel_func)
1792 SVN_ERR(context->cancel_func(context->cancel_baton));
1795 svn_pool_clear(revpool);
1798 svn_pool_destroy(iterpool);
1800 /* phase 3: placement.
1801 * Use "newest first" placement for simple items. */
1802 sort_items(context->changes);
1803 sort_items(context->file_props);
1804 sort_items(context->dir_props);
1806 /* follow dependencies recursively for noderevs and data representations */
1809 /* phase 4: copy bucket data to pack file. Write P2L index. */
1810 SVN_ERR(write_changes_containers(context, context->changes,
1811 context->changes_file, revpool));
1812 svn_pool_clear(revpool);
1813 SVN_ERR(write_property_containers(context, context->file_props,
1814 context->file_props_file, revpool));
1815 svn_pool_clear(revpool);
1816 SVN_ERR(write_property_containers(context, context->dir_props,
1817 context->dir_props_file, revpool));
1818 svn_pool_clear(revpool);
1819 SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool));
1820 svn_pool_clear(revpool);
1822 /* write L2P index as well (now that we know all target offsets) */
1823 SVN_ERR(write_l2p_index(context, revpool));
1825 svn_pool_destroy(revpool);
1827 return SVN_NO_ERROR;
1830 /* Append CONTEXT->START_REV to the context's pack file with no re-ordering.
1831 * This function will only be used for very large revisions (>>100k changes).
1832 * Use SCRATCH_POOL for temporary allocations.
1834 static svn_error_t *
1835 append_revision(pack_context_t *context,
1836 apr_pool_t *scratch_pool)
1838 svn_fs_x__data_t *ffd = context->fs->fsap_data;
1839 apr_off_t offset = 0;
1840 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1841 svn_fs_x__revision_file_t *rev_file;
1843 svn_filesize_t revdata_size;
1845 /* Copy all non-index contents the rev file to the end of the pack file. */
1846 SVN_ERR(svn_fs_x__rev_file_init(&rev_file, context->fs, context->start_rev,
1848 SVN_ERR(svn_fs_x__rev_file_data_size(&revdata_size, rev_file));
1850 SVN_ERR(svn_fs_x__rev_file_get(&file, rev_file));
1851 SVN_ERR(svn_io_file_aligned_seek(file, ffd->block_size, NULL, 0,
1853 SVN_ERR(copy_file_data(context, context->pack_file, file, revdata_size,
1856 /* mark the start of a new revision */
1857 SVN_ERR(svn_fs_x__l2p_proto_index_add_revision(context->proto_l2p_index,
1860 /* read the phys-to-log index file until we covered the whole rev file.
1861 * That index contains enough info to build both target indexes from it. */
1862 while (offset < revdata_size)
1864 /* read one cluster */
1866 apr_array_header_t *entries;
1867 SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, context->fs, rev_file,
1868 context->start_rev, offset,
1869 ffd->p2l_page_size, iterpool,
1872 for (i = 0; i < entries->nelts; ++i)
1874 svn_fs_x__p2l_entry_t *entry
1875 = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t);
1877 /* skip first entry if that was duplicated due crossing a
1879 if (offset > entry->offset)
1882 /* process entry while inside the rev file */
1883 offset = entry->offset;
1884 if (offset < revdata_size)
1886 /* there should be true containers */
1887 SVN_ERR_ASSERT(entry->item_count == 1);
1889 entry->offset += context->pack_offset;
1890 offset += entry->size;
1891 SVN_ERR(svn_fs_x__l2p_proto_index_add_entry
1892 (context->proto_l2p_index, entry->offset, 0,
1893 entry->items[0].number, iterpool));
1894 SVN_ERR(svn_fs_x__p2l_proto_index_add_entry
1895 (context->proto_p2l_index, entry, iterpool));
1899 svn_pool_clear(iterpool);
1902 svn_pool_destroy(iterpool);
1903 context->pack_offset += revdata_size;
1905 return SVN_NO_ERROR;
1908 /* Format 7 packing logic.
1910 * Pack the revision shard starting at SHARD_REV in filesystem FS from
1911 * SHARD_DIR into the PACK_FILE_DIR, using SCRATCH_POOL for temporary
1912 * allocations. Limit the extra memory consumption to MAX_MEM bytes.
1913 * CANCEL_FUNC and CANCEL_BATON are what you think they are.
1914 * Schedule necessary fsync calls in BATCH.
1916 static svn_error_t *
1917 pack_log_addressed(svn_fs_t *fs,
1918 const char *pack_file_dir,
1919 const char *shard_dir,
1920 svn_revnum_t shard_rev,
1922 svn_fs_x__batch_fsync_t *batch,
1923 svn_cancel_func_t cancel_func,
1925 apr_pool_t *scratch_pool)
1929 /* estimated amount of memory used to represent one item in memory
1930 * during rev file packing */
1931 PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t))
1932 + APR_ALIGN_DEFAULT(2 *sizeof(void*))
1933 + APR_ALIGN_DEFAULT(sizeof(reference_t))
1934 + APR_ALIGN_DEFAULT(sizeof(svn_fs_x__p2l_entry_t))
1938 int max_items = max_mem / PER_ITEM_MEM > INT_MAX
1940 : (int)(max_mem / PER_ITEM_MEM);
1941 apr_array_header_t *max_ids;
1942 pack_context_t context = { 0 };
1944 apr_size_t item_count = 0;
1945 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1947 /* set up a pack context */
1948 SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir,
1949 shard_rev, max_items, batch, cancel_func,
1950 cancel_baton, scratch_pool));
1952 /* phase 1: determine the size of the revisions to pack */
1953 SVN_ERR(svn_fs_x__l2p_get_max_ids(&max_ids, fs, shard_rev,
1954 context.shard_end_rev - shard_rev,
1955 scratch_pool, scratch_pool));
1957 /* pack revisions in ranges that don't exceed MAX_MEM */
1958 for (i = 0; i < max_ids->nelts; ++i)
1959 if ( APR_ARRAY_IDX(max_ids, i, apr_uint64_t)
1960 <= (apr_uint64_t)max_items - item_count)
1962 item_count += APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1967 /* some unpacked revisions before this one? */
1968 if (context.start_rev < context.end_rev)
1970 /* pack them intelligently (might be just 1 rev but still ...) */
1971 SVN_ERR(pack_range(&context, iterpool));
1972 SVN_ERR(reset_pack_context(&context, iterpool));
1976 /* next revision range is to start with the current revision */
1977 context.start_rev = i + context.shard_rev;
1978 context.end_rev = context.start_rev + 1;
1980 /* if this is a very large revision, we must place it as is */
1981 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items)
1983 SVN_ERR(append_revision(&context, iterpool));
1984 context.start_rev++;
1987 item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1989 svn_pool_clear(iterpool);
1992 /* non-empty revision range at the end? */
1993 if (context.start_rev < context.end_rev)
1994 SVN_ERR(pack_range(&context, iterpool));
1996 /* last phase: finalize indexes and clean up */
1997 SVN_ERR(reset_pack_context(&context, iterpool));
1998 SVN_ERR(close_pack_context(&context, iterpool));
1999 svn_pool_destroy(iterpool);
2001 return SVN_NO_ERROR;
2004 /* In filesystem FS, pack the revision SHARD containing exactly
2005 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
2006 * using SCRATCH_POOL for temporary allocations. Try to limit the amount of
2007 * temporary memory needed to MAX_MEM bytes. CANCEL_FUNC and CANCEL_BATON
2008 * are what you think they are. Schedule necessary fsync calls in BATCH.
2010 * If for some reason we detect a partial packing already performed, we
2011 * remove the pack file and start again.
2013 * The actual packing will be done in a format-specific sub-function.
2015 static svn_error_t *
2016 pack_rev_shard(svn_fs_t *fs,
2017 const char *pack_file_dir,
2018 const char *shard_path,
2020 int max_files_per_dir,
2022 svn_fs_x__batch_fsync_t *batch,
2023 svn_cancel_func_t cancel_func,
2025 apr_pool_t *scratch_pool)
2027 const char *pack_file_path;
2028 svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir);
2030 /* Some useful paths. */
2031 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, scratch_pool);
2033 /* Remove any existing pack file for this shard, since it is incomplete. */
2034 SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton,
2037 /* Create the new directory and pack file. */
2038 SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, scratch_pool));
2039 SVN_ERR(svn_fs_x__batch_fsync_new_path(batch, pack_file_dir, scratch_pool));
2041 /* Index information files */
2042 SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev,
2043 max_mem, batch, cancel_func, cancel_baton,
2046 SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, scratch_pool));
2047 SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, scratch_pool));
2049 return SVN_NO_ERROR;
2052 /* In the file system at FS_PATH, pack the SHARD in DIR containing exactly
2053 * MAX_FILES_PER_DIR revisions, using SCRATCH_POOL temporary for allocations.
2054 * COMPRESSION_LEVEL and MAX_PACK_SIZE will be ignored in that case.
2055 * An attempt will be made to keep memory usage below MAX_MEM.
2057 * CANCEL_FUNC and CANCEL_BATON are what you think they are; similarly
2058 * NOTIFY_FUNC and NOTIFY_BATON.
2060 * If for some reason we detect a partial packing already performed, we
2061 * remove the pack file and start again.
2063 static svn_error_t *
2064 pack_shard(const char *dir,
2067 int max_files_per_dir,
2068 apr_off_t max_pack_size,
2069 int compression_level,
2071 svn_fs_pack_notify_t notify_func,
2073 svn_cancel_func_t cancel_func,
2075 apr_pool_t *scratch_pool)
2077 svn_fs_x__data_t *ffd = fs->fsap_data;
2078 const char *shard_path, *pack_file_dir;
2079 svn_fs_x__batch_fsync_t *batch;
2081 /* Notify caller we're starting to pack this shard. */
2083 SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_start,
2086 /* Perform all fsyncs through this instance. */
2087 SVN_ERR(svn_fs_x__batch_fsync_create(&batch, ffd->flush_to_disk,
2090 /* Some useful paths. */
2091 pack_file_dir = svn_dirent_join(dir,
2092 apr_psprintf(scratch_pool,
2093 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
2096 shard_path = svn_dirent_join(dir,
2097 apr_psprintf(scratch_pool, "%" APR_INT64_T_FMT, shard),
2100 /* pack the revision content */
2101 SVN_ERR(pack_rev_shard(fs, pack_file_dir, shard_path,
2102 shard, max_files_per_dir, max_mem, batch,
2103 cancel_func, cancel_baton, scratch_pool));
2105 /* pack the revprops in an equivalent way */
2106 SVN_ERR(svn_fs_x__pack_revprops_shard(fs,
2109 shard, max_files_per_dir,
2110 (int)(0.9 * max_pack_size),
2111 compression_level, batch,
2112 cancel_func, cancel_baton,
2115 /* Update the min-unpacked-rev file to reflect our newly packed shard. */
2116 SVN_ERR(svn_fs_x__write_min_unpacked_rev(fs,
2117 (svn_revnum_t)((shard + 1) * max_files_per_dir),
2119 ffd->min_unpacked_rev = (svn_revnum_t)((shard + 1) * max_files_per_dir);
2121 /* Ensure that packed file is written to disk.*/
2122 SVN_ERR(svn_fs_x__batch_fsync_run(batch, scratch_pool));
2124 /* Finally, remove the existing shard directories. */
2125 SVN_ERR(svn_io_remove_dir2(shard_path, TRUE,
2126 cancel_func, cancel_baton, scratch_pool));
2128 /* Notify caller we're starting to pack this shard. */
2130 SVN_ERR(notify_func(notify_baton, shard, svn_fs_pack_notify_end,
2133 return SVN_NO_ERROR;
2136 /* Read the youngest rev and the first non-packed rev info for FS from disk.
2137 Set *FULLY_PACKED when there is no completed unpacked shard.
2138 Use SCRATCH_POOL for temporary allocations.
2140 static svn_error_t *
2141 get_pack_status(svn_boolean_t *fully_packed,
2143 apr_pool_t *scratch_pool)
2145 svn_fs_x__data_t *ffd = fs->fsap_data;
2146 apr_int64_t completed_shards;
2147 svn_revnum_t youngest;
2149 SVN_ERR(svn_fs_x__read_min_unpacked_rev(&ffd->min_unpacked_rev, fs,
2152 SVN_ERR(svn_fs_x__youngest_rev(&youngest, fs, scratch_pool));
2153 completed_shards = (youngest + 1) / ffd->max_files_per_dir;
2155 /* See if we've already completed all possible shards thus far. */
2156 if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir))
2157 *fully_packed = TRUE;
2159 *fully_packed = FALSE;
2161 return SVN_NO_ERROR;
2164 typedef struct pack_baton_t
2168 svn_fs_pack_notify_t notify_func;
2170 svn_cancel_func_t cancel_func;
2175 /* The work-horse for svn_fs_x__pack, called with the FS write lock.
2176 This implements the svn_fs_x__with_write_lock() 'body' callback
2177 type. BATON is a 'pack_baton_t *'.
2179 WARNING: if you add a call to this function, please note:
2180 The code currently assumes that any piece of code running with
2181 the write-lock set can rely on the ffd->min_unpacked_rev and
2182 ffd->min_unpacked_revprop caches to be up-to-date (and, by
2183 extension, on not having to use a retry when calling
2184 svn_fs_x__path_rev_absolute() and friends). If you add a call
2185 to this function, consider whether you have to call
2186 update_min_unpacked_rev().
2187 See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith
2189 static svn_error_t *
2190 pack_body(void *baton,
2191 apr_pool_t *scratch_pool)
2193 pack_baton_t *pb = baton;
2194 svn_fs_x__data_t *ffd = pb->fs->fsap_data;
2195 apr_int64_t completed_shards;
2197 apr_pool_t *iterpool;
2198 const char *data_path;
2199 svn_boolean_t fully_packed;
2201 /* Since another process might have already packed the repo,
2202 we need to re-read the pack status. */
2203 SVN_ERR(get_pack_status(&fully_packed, pb->fs, scratch_pool));
2206 if (pb->notify_func)
2207 (*pb->notify_func)(pb->notify_baton,
2208 ffd->min_unpacked_rev / ffd->max_files_per_dir,
2209 svn_fs_pack_notify_noop, scratch_pool);
2211 return SVN_NO_ERROR;
2214 completed_shards = (ffd->youngest_rev_cache + 1) / ffd->max_files_per_dir;
2215 data_path = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, scratch_pool);
2217 iterpool = svn_pool_create(scratch_pool);
2218 for (i = ffd->min_unpacked_rev / ffd->max_files_per_dir;
2219 i < completed_shards;
2222 svn_pool_clear(iterpool);
2224 if (pb->cancel_func)
2225 SVN_ERR(pb->cancel_func(pb->cancel_baton));
2227 SVN_ERR(pack_shard(data_path,
2228 pb->fs, i, ffd->max_files_per_dir,
2229 ffd->revprop_pack_size,
2230 ffd->compress_packed_revprops
2231 ? SVN__COMPRESSION_ZLIB_DEFAULT
2232 : SVN__COMPRESSION_NONE,
2234 pb->notify_func, pb->notify_baton,
2235 pb->cancel_func, pb->cancel_baton, iterpool));
2238 svn_pool_destroy(iterpool);
2239 return SVN_NO_ERROR;
2243 svn_fs_x__pack(svn_fs_t *fs,
2245 svn_fs_pack_notify_t notify_func,
2247 svn_cancel_func_t cancel_func,
2249 apr_pool_t *scratch_pool)
2251 pack_baton_t pb = { 0 };
2252 svn_boolean_t fully_packed;
2254 /* Is there we even anything to do?. */
2255 SVN_ERR(get_pack_status(&fully_packed, fs, scratch_pool));
2258 svn_fs_x__data_t *ffd = fs->fsap_data;
2261 (*notify_func)(notify_baton,
2262 ffd->min_unpacked_rev / ffd->max_files_per_dir,
2263 svn_fs_pack_notify_noop, scratch_pool);
2265 return SVN_NO_ERROR;
2268 /* Lock the repo and start the pack process. */
2270 pb.notify_func = notify_func;
2271 pb.notify_baton = notify_baton;
2272 pb.cancel_func = cancel_func;
2273 pb.cancel_baton = cancel_baton;
2274 pb.max_mem = max_mem ? max_mem : DEFAULT_MAX_MEM;
2276 return svn_fs_x__with_pack_lock(fs, pack_body, &pb, scratch_pool);