1 /* cached_data.c --- cached (read) access to FSX data
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
23 #include "cached_data.h"
28 #include "svn_ctype.h"
29 #include "svn_sorts.h"
31 #include "private/svn_io_private.h"
32 #include "private/svn_sorts_private.h"
33 #include "private/svn_string_private.h"
34 #include "private/svn_subr_private.h"
35 #include "private/svn_temp_serializer.h"
38 #include "low_level.h"
41 #include "temp_serializer.h"
47 #include "../libsvn_fs/fs-loader.h"
48 #include "../libsvn_delta/delta.h" /* for SVN_DELTA_WINDOW_SIZE */
50 #include "svn_private_config.h"
52 /* forward-declare. See implementation for the docstring */
54 block_read(void **result,
56 const svn_fs_x__id_t *id,
57 svn_fs_x__revision_file_t *revision_file,
59 apr_pool_t *result_pool,
60 apr_pool_t *scratch_pool);
63 /* Defined this to enable access logging via dgb__log_access
64 #define SVN_FS_X__LOG_ACCESS
67 /* When SVN_FS_X__LOG_ACCESS has been defined, write a line to console
68 * showing where ID is located in FS and use ITEM to show details on it's
69 * contents if not NULL. Use SCRATCH_POOL for temporary allocations.
72 dbg__log_access(svn_fs_t *fs,
73 const svn_fs_x__id_t *id,
75 apr_uint32_t item_type,
76 apr_pool_t *scratch_pool)
78 /* no-op if this macro is not defined */
79 #ifdef SVN_FS_X__LOG_ACCESS
80 svn_fs_x__data_t *ffd = fs->fsap_data;
81 apr_off_t offset = -1;
82 apr_off_t end_offset = 0;
83 apr_uint32_t sub_item = 0;
84 svn_fs_x__p2l_entry_t *entry = NULL;
85 static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop",
86 "node ", "chgs ", "rep ", "c:", "n:", "r:"};
87 const char *description = "";
88 const char *type = types[item_type];
89 const char *pack = "";
90 svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
92 /* determine rev / pack file offset */
93 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, id, scratch_pool));
95 /* constructing the pack file description */
96 if (revision < ffd->min_unpacked_rev)
97 pack = apr_psprintf(scratch_pool, "%4ld|",
98 revision / ffd->max_files_per_dir);
100 /* construct description if possible */
101 if (item_type == SVN_FS_X__ITEM_TYPE_NODEREV && item != NULL)
103 svn_fs_x__noderev_t *node = item;
106 ? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT,
107 svn_fs_x__get_revnum(node->data_rep->id.change_set),
108 node->data_rep->id.number)
112 ? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT,
113 svn_fs_x__get_revnum(node->prop_rep->id.change_set),
114 node->prop_rep->id.number)
116 description = apr_psprintf(scratch_pool, "%s (pc=%d%s%s)",
118 node->predecessor_count,
122 else if (item_type == SVN_FS_X__ITEM_TYPE_ANY_REP)
124 svn_fs_x__rep_header_t *header = item;
126 description = " (txdelta window)";
127 else if (header->type == svn_fs_x__rep_self_delta)
128 description = " DELTA";
130 description = apr_psprintf(scratch_pool,
131 " DELTA against %ld/%" APR_UINT64_T_FMT,
132 header->base_revision,
133 header->base_item_index);
135 else if (item_type == SVN_FS_X__ITEM_TYPE_CHANGES && item != NULL)
137 apr_array_header_t *changes = item;
138 switch (changes->nelts)
140 case 0: description = " no change";
142 case 1: description = " 1 change";
144 default: description = apr_psprintf(scratch_pool, " %d changes",
149 /* reverse index lookup: get item description in ENTRY */
150 SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, revision, offset,
155 end_offset = offset + entry->size;
156 type = types[entry->type];
158 /* merge the sub-item number with the container type */
159 if ( entry->type == SVN_FS_X__ITEM_TYPE_CHANGES_CONT
160 || entry->type == SVN_FS_X__ITEM_TYPE_NODEREVS_CONT
161 || entry->type == SVN_FS_X__ITEM_TYPE_REPS_CONT)
162 type = apr_psprintf(scratch_pool, "%s%-3d", type, sub_item);
166 printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT" %s\n",
167 pack, (long)(offset / ffd->block_size),
168 (long)(offset % ffd->block_size),
169 (long)(end_offset / ffd->block_size),
170 (long)(end_offset % ffd->block_size),
171 type, revision, id->number, description);
178 /* Open the revision file for the item given by ID in filesystem FS and
179 store the newly opened file in FILE. Seek to the item's location before
182 Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */
184 open_and_seek_revision(svn_fs_x__revision_file_t **file,
186 const svn_fs_x__id_t *id,
187 apr_pool_t *result_pool,
188 apr_pool_t *scratch_pool)
190 svn_fs_x__revision_file_t *rev_file;
191 apr_off_t offset = -1;
192 apr_uint32_t sub_item = 0;
193 svn_revnum_t rev = svn_fs_x__get_revnum(id->change_set);
195 SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool));
197 SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, rev, result_pool));
198 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file, id,
200 SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, offset));
207 /* Open the representation REP for a node-revision in filesystem FS, seek
208 to its position and store the newly opened file in FILE.
210 Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */
212 open_and_seek_transaction(svn_fs_x__revision_file_t **file,
214 svn_fs_x__representation_t *rep,
215 apr_pool_t *result_pool,
216 apr_pool_t *scratch_pool)
219 apr_uint32_t sub_item = 0;
220 apr_int64_t txn_id = svn_fs_x__get_txn_id(rep->id.change_set);
222 SVN_ERR(svn_fs_x__rev_file_open_proto_rev(file, fs, txn_id, result_pool,
225 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, *file, &rep->id,
227 SVN_ERR(svn_fs_x__rev_file_seek(*file, NULL, offset));
232 /* Given a node-id ID, and a representation REP in filesystem FS, open
233 the correct file and seek to the correction location. Store this
236 Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */
238 open_and_seek_representation(svn_fs_x__revision_file_t **file_p,
240 svn_fs_x__representation_t *rep,
241 apr_pool_t *result_pool,
242 apr_pool_t *scratch_pool)
244 if (svn_fs_x__is_revision(rep->id.change_set))
245 return open_and_seek_revision(file_p, fs, &rep->id, result_pool,
248 return open_and_seek_transaction(file_p, fs, rep, result_pool,
255 err_dangling_id(svn_fs_t *fs,
256 const svn_fs_x__id_t *id)
258 svn_string_t *id_str = svn_fs_x__id_unparse(id, fs->pool);
259 return svn_error_createf
260 (SVN_ERR_FS_ID_NOT_FOUND, 0,
261 _("Reference to non-existent node '%s' in filesystem '%s'"),
262 id_str->data, fs->path);
265 /* Get the node-revision for the node ID in FS.
266 Set *NODEREV_P to the new node-revision structure, allocated in POOL.
267 See svn_fs_x__get_node_revision, which wraps this and adds another
270 get_node_revision_body(svn_fs_x__noderev_t **noderev_p,
272 const svn_fs_x__id_t *id,
273 apr_pool_t *result_pool,
274 apr_pool_t *scratch_pool)
277 svn_boolean_t is_cached = FALSE;
278 svn_fs_x__data_t *ffd = fs->fsap_data;
280 if (svn_fs_x__is_txn(id->change_set))
283 svn_stream_t *stream;
285 /* This is a transaction node-rev. Its storage logic is very
286 different from that of rev / pack files. */
287 err = svn_io_file_open(&file,
288 svn_fs_x__path_txn_node_rev(fs, id,
291 APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
293 if (err && APR_STATUS_IS_ENOENT(err->apr_err))
295 svn_error_clear(err);
296 return svn_error_trace(err_dangling_id(fs, id));
300 return svn_error_trace(err);
303 /* Be sure to close the file ASAP. */
304 stream = svn_stream_from_aprfile2(file, FALSE, scratch_pool);
305 SVN_ERR(svn_fs_x__read_noderev(noderev_p, stream,
306 result_pool, scratch_pool));
310 svn_fs_x__revision_file_t *revision_file;
312 /* noderevs in rev / pack files can be cached */
313 svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
314 svn_fs_x__pair_cache_key_t key;
316 SVN_ERR(svn_fs_x__rev_file_init(&revision_file, fs, revision,
319 /* First, try a noderevs container cache lookup. */
320 if ( svn_fs_x__is_packed_rev(fs, revision)
321 && ffd->noderevs_container_cache)
324 apr_uint32_t sub_item;
325 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, revision_file,
327 key.revision = svn_fs_x__packed_base_rev(fs, revision);
330 SVN_ERR(svn_cache__get_partial((void **)noderev_p, &is_cached,
331 ffd->noderevs_container_cache, &key,
332 svn_fs_x__noderevs_get_func,
333 &sub_item, result_pool));
338 key.revision = revision;
339 key.second = id->number;
341 /* Not found or not applicable. Try a noderev cache lookup.
342 * If that succeeds, we are done here. */
343 SVN_ERR(svn_cache__get((void **) noderev_p,
345 ffd->node_revision_cache,
351 /* block-read will parse the whole block and will also return
352 the one noderev that we need right now. */
353 SVN_ERR(block_read((void **)noderev_p, fs,
359 SVN_ERR(svn_fs_x__close_revision_file(revision_file));
366 svn_fs_x__get_node_revision(svn_fs_x__noderev_t **noderev_p,
368 const svn_fs_x__id_t *id,
369 apr_pool_t *result_pool,
370 apr_pool_t *scratch_pool)
372 svn_error_t *err = get_node_revision_body(noderev_p, fs, id,
373 result_pool, scratch_pool);
374 if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
376 svn_string_t *id_string = svn_fs_x__id_unparse(id, scratch_pool);
377 return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
378 "Corrupt node-revision '%s'",
382 SVN_ERR(dbg__log_access(fs, id, *noderev_p,
383 SVN_FS_X__ITEM_TYPE_NODEREV, scratch_pool));
385 return svn_error_trace(err);
390 svn_fs_x__get_mergeinfo_count(apr_int64_t *count,
392 const svn_fs_x__id_t *id,
393 apr_pool_t *scratch_pool)
395 svn_fs_x__noderev_t *noderev;
397 /* If we want a full acccess log, we need to provide full data and
398 cannot take shortcuts here. */
399 #if !defined(SVN_FS_X__LOG_ACCESS)
401 /* First, try a noderevs container cache lookup. */
402 if (! svn_fs_x__is_txn(id->change_set))
404 /* noderevs in rev / pack files can be cached */
405 svn_fs_x__data_t *ffd = fs->fsap_data;
406 svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
408 svn_fs_x__revision_file_t *rev_file;
409 SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, revision,
412 if ( svn_fs_x__is_packed_rev(fs, revision)
413 && ffd->noderevs_container_cache)
415 svn_fs_x__pair_cache_key_t key;
417 apr_uint32_t sub_item;
418 svn_boolean_t is_cached;
420 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file,
422 key.revision = svn_fs_x__packed_base_rev(fs, revision);
425 SVN_ERR(svn_cache__get_partial((void **)count, &is_cached,
426 ffd->noderevs_container_cache, &key,
427 svn_fs_x__mergeinfo_count_get_func,
428 &sub_item, scratch_pool));
435 /* fallback to the naive implementation handling all edge cases */
436 SVN_ERR(svn_fs_x__get_node_revision(&noderev, fs, id, scratch_pool,
438 *count = noderev->mergeinfo_count;
443 /* Describes a lazily opened rev / pack file. Instances will be shared
444 between multiple instances of rep_state_t. */
445 typedef struct shared_file_t
447 /* The opened file. NULL while file is not open, yet. */
448 svn_fs_x__revision_file_t *rfile;
450 /* file system to open the file in */
453 /* a revision contained in the FILE. Since this file may be shared,
454 that value may be different from REP_STATE_T->REVISION. */
455 svn_revnum_t revision;
457 /* pool to use when creating the FILE. This guarantees that the file
458 remains open / valid beyond the respective local context that required
459 the file to be opened eventually. */
463 /* Represents where in the current svndiff data block each
464 representation is. */
465 typedef struct rep_state_t
467 /* shared lazy-open rev/pack file structure */
468 shared_file_t *sfile;
469 /* The txdelta window cache to use or NULL. */
470 svn_cache__t *window_cache;
471 /* Caches un-deltified windows. May be NULL. */
472 svn_cache__t *combined_cache;
473 /* ID addressing the representation */
474 svn_fs_x__id_t rep_id;
475 /* length of the header at the start of the rep.
476 0 iff this is rep is stored in a container
477 (i.e. does not have a header) */
478 apr_size_t header_size;
479 apr_off_t start; /* The starting offset for the raw
480 svndiff data minus header.
481 -1 if the offset is yet unknown. */
482 /* sub-item index in case the rep is containered */
483 apr_uint32_t sub_item;
484 apr_off_t current;/* The current offset relative to START. */
485 apr_off_t size; /* The on-disk size of the representation. */
486 int ver; /* If a delta, what svndiff version?
487 -1 for unknown delta version. */
488 int chunk_index; /* number of the window to read */
491 /* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */
493 auto_open_shared_file(shared_file_t *file)
495 if (file->rfile == NULL)
496 SVN_ERR(svn_fs_x__rev_file_init(&file->rfile, file->fs,
497 file->revision, file->pool));
502 /* Set RS->START to the begin of the representation raw in RS->SFILE->RFILE,
503 if that hasn't been done yet. Use SCRATCH_POOL for temporary allocations.
506 auto_set_start_offset(rep_state_t *rs,
507 apr_pool_t *scratch_pool)
511 SVN_ERR(svn_fs_x__item_offset(&rs->start, &rs->sub_item,
512 rs->sfile->fs, rs->sfile->rfile,
513 &rs->rep_id, scratch_pool));
514 rs->start += rs->header_size;
520 /* Set RS->VER depending on what is found in the already open RS->FILE->FILE
521 if the diff version is still unknown. Use SCRATCH_POOL for temporary
525 auto_read_diff_version(rep_state_t *rs,
526 apr_pool_t *scratch_pool)
531 SVN_ERR(svn_fs_x__rev_file_seek(rs->sfile->rfile, NULL, rs->start));
532 SVN_ERR(svn_fs_x__rev_file_read(rs->sfile->rfile, buf, sizeof(buf)));
534 /* ### Layering violation */
535 if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N')))
536 return svn_error_create
537 (SVN_ERR_FS_CORRUPT, NULL,
538 _("Malformed svndiff data in representation"));
548 /* See create_rep_state, which wraps this and adds another error. */
550 create_rep_state_body(rep_state_t **rep_state,
551 svn_fs_x__rep_header_t **rep_header,
552 shared_file_t **shared_file,
553 svn_fs_x__representation_t *rep,
555 apr_pool_t *result_pool,
556 apr_pool_t *scratch_pool)
558 svn_fs_x__data_t *ffd = fs->fsap_data;
559 rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs));
560 svn_fs_x__rep_header_t *rh;
561 svn_boolean_t is_cached = FALSE;
562 svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
563 apr_uint64_t estimated_window_storage;
567 * - refers to a valid revision,
568 * - refers to a packed revision,
569 * - as does the rep we want to read, and
570 * - refers to the same pack file as the rep
571 * we can re-use the same, already open file object
573 svn_boolean_t reuse_shared_file
574 = shared_file && *shared_file && (*shared_file)->rfile
575 && SVN_IS_VALID_REVNUM((*shared_file)->revision)
576 && (*shared_file)->revision < ffd->min_unpacked_rev
577 && revision < ffd->min_unpacked_rev
578 && ( ((*shared_file)->revision / ffd->max_files_per_dir)
579 == (revision / ffd->max_files_per_dir));
581 svn_fs_x__representation_cache_key_t key = { 0 };
582 key.revision = revision;
583 key.is_packed = revision < ffd->min_unpacked_rev;
584 key.item_index = rep->id.number;
586 /* continue constructing RS and RA */
587 rs->size = rep->size;
588 rs->rep_id = rep->id;
592 /* Very long files stored as self-delta will produce a huge number of
593 delta windows. Don't cache them lest we don't thrash the cache.
594 Since we don't know the depth of the delta chain, let's assume, the
595 whole contents get rewritten 3 times.
597 estimated_window_storage
598 = 4 * ( (rep->expanded_size ? rep->expanded_size : rep->size)
599 + SVN_DELTA_WINDOW_SIZE);
600 estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX);
602 rs->window_cache = ffd->txdelta_window_cache
603 && svn_cache__is_cachable(ffd->txdelta_window_cache,
604 (apr_size_t)estimated_window_storage)
605 ? ffd->txdelta_window_cache
607 rs->combined_cache = ffd->combined_window_cache
608 && svn_cache__is_cachable(ffd->combined_window_cache,
609 (apr_size_t)estimated_window_storage)
610 ? ffd->combined_window_cache
613 /* cache lookup, i.e. skip reading the rep header if possible */
614 if (SVN_IS_VALID_REVNUM(revision))
615 SVN_ERR(svn_cache__get((void **) &rh, &is_cached,
616 ffd->rep_header_cache, &key, result_pool));
618 /* initialize the (shared) FILE member in RS */
619 if (reuse_shared_file)
621 rs->sfile = *shared_file;
625 shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file));
626 file->revision = revision;
627 file->pool = result_pool;
631 /* remember the current file, if suggested by the caller */
636 /* read rep header, if necessary */
639 svn_stream_t *stream;
641 /* we will need the on-disk location for non-txn reps */
643 svn_boolean_t in_container = TRUE;
645 /* ensure file is open and navigate to the start of rep header */
646 if (reuse_shared_file)
648 /* ... we can re-use the same, already open file object.
649 * This implies that we don't read from a txn.
651 rs->sfile = *shared_file;
652 SVN_ERR(auto_open_shared_file(rs->sfile));
656 /* otherwise, create a new file object. May or may not be
659 SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep,
660 result_pool, scratch_pool));
663 if (SVN_IS_VALID_REVNUM(revision))
665 apr_uint32_t sub_item;
667 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs,
668 rs->sfile->rfile, &rep->id,
671 /* is rep stored in some star-deltified container? */
674 svn_fs_x__p2l_entry_t *entry;
675 SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, rs->sfile->rfile,
677 scratch_pool, scratch_pool));
678 in_container = entry->type == SVN_FS_X__ITEM_TYPE_REPS_CONT;
683 /* construct a container rep header */
684 *rep_header = apr_pcalloc(result_pool, sizeof(**rep_header));
685 (*rep_header)->type = svn_fs_x__rep_container;
692 SVN_ERR(svn_fs_x__rev_file_seek(rs->sfile->rfile, NULL, offset));
695 SVN_ERR(svn_fs_x__rev_file_stream(&stream, rs->sfile->rfile));
696 SVN_ERR(svn_fs_x__read_rep_header(&rh, stream,
697 result_pool, scratch_pool));
698 SVN_ERR(svn_fs_x__rev_file_offset(&rs->start, rs->sfile->rfile));
700 /* populate the cache if appropriate */
701 if (SVN_IS_VALID_REVNUM(revision))
703 SVN_ERR(block_read(NULL, fs, &rs->rep_id, rs->sfile->rfile, NULL,
704 result_pool, scratch_pool));
705 SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh,
711 SVN_ERR(dbg__log_access(fs, &rs->rep_id, rh, SVN_FS_X__ITEM_TYPE_ANY_REP,
714 rs->header_size = rh->header_size;
720 /* skip "SVNx" diff marker */
726 /* Read the rep args for REP in filesystem FS and create a rep_state
727 for reading the representation. Return the rep_state in *REP_STATE
728 and the rep args in *REP_ARGS, both allocated in POOL.
730 When reading multiple reps, i.e. a skip delta chain, you may provide
731 non-NULL SHARED_FILE. (If SHARED_FILE is not NULL, in the first
732 call it should be a pointer to NULL.) The function will use this
733 variable to store the previous call results and tries to re-use it.
734 This may result in significant savings in I/O for packed files and
735 number of open file handles.
738 create_rep_state(rep_state_t **rep_state,
739 svn_fs_x__rep_header_t **rep_header,
740 shared_file_t **shared_file,
741 svn_fs_x__representation_t *rep,
743 apr_pool_t *result_pool,
744 apr_pool_t *scratch_pool)
746 svn_error_t *err = create_rep_state_body(rep_state, rep_header,
747 shared_file, rep, fs,
748 result_pool, scratch_pool);
749 if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
751 /* ### This always returns "-1" for transaction reps, because
752 ### this particular bit of code doesn't know if the rep is
753 ### stored in the protorev or in the mutable area (for props
754 ### or dir contents). It is pretty rare for FSX to *read*
755 ### from the protorev file, though, so this is probably OK.
756 ### And anyone going to debug corruption errors is probably
757 ### going to jump straight to this comment anyway! */
758 return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
759 "Corrupt representation '%s'",
761 ? svn_fs_x__unparse_representation
762 (rep, TRUE, scratch_pool,
766 /* ### Call representation_string() ? */
767 return svn_error_trace(err);
771 svn_fs_x__check_rep(svn_fs_x__representation_t *rep,
773 apr_pool_t *scratch_pool)
776 apr_uint32_t sub_item;
777 svn_fs_x__p2l_entry_t *entry;
778 svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
780 svn_fs_x__revision_file_t *rev_file;
781 SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, revision, scratch_pool));
783 /* Does REP->ID refer to an actual item? Which one is it? */
784 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file, &rep->id,
787 /* What is the type of that item? */
788 SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, rev_file, revision, offset,
789 scratch_pool, scratch_pool));
791 /* Verify that we've got an item that is actually a representation. */
793 || ( entry->type != SVN_FS_X__ITEM_TYPE_FILE_REP
794 && entry->type != SVN_FS_X__ITEM_TYPE_DIR_REP
795 && entry->type != SVN_FS_X__ITEM_TYPE_FILE_PROPS
796 && entry->type != SVN_FS_X__ITEM_TYPE_DIR_PROPS
797 && entry->type != SVN_FS_X__ITEM_TYPE_REPS_CONT))
798 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
799 _("No representation found at offset %s "
800 "for item %s in revision %ld"),
801 apr_off_t_toa(scratch_pool, offset),
802 apr_psprintf(scratch_pool, "%" APR_UINT64_T_FMT,
810 Do any allocations in POOL. */
812 svn_fs_x__rep_chain_length(int *chain_length,
814 svn_fs_x__representation_t *rep,
816 apr_pool_t *scratch_pool)
818 svn_fs_x__data_t *ffd = fs->fsap_data;
819 svn_revnum_t shard_size = ffd->max_files_per_dir;
820 svn_boolean_t is_delta = FALSE;
823 svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
824 svn_revnum_t last_shard = revision / shard_size;
826 /* Note that this iteration pool will be used in a non-standard way.
827 * To reuse open file handles between iterations (e.g. while within the
828 * same pack file), we only clear this pool once in a while instead of
829 * at the start of each iteration. */
830 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
832 /* Check whether the length of the deltification chain is acceptable.
833 * Otherwise, shared reps may form a non-skipping delta chain in
835 svn_fs_x__representation_t base_rep = *rep;
837 /* re-use open files between iterations */
838 shared_file_t *file_hint = NULL;
840 svn_fs_x__rep_header_t *header;
842 /* follow the delta chain towards the end but for at most
843 * MAX_CHAIN_LENGTH steps. */
846 rep_state_t *rep_state;
847 revision = svn_fs_x__get_revnum(base_rep.id.change_set);
848 if (revision / shard_size != last_shard)
850 last_shard = revision / shard_size;
854 SVN_ERR(create_rep_state_body(&rep_state,
862 base_rep.id.change_set
863 = svn_fs_x__change_set_by_rev(header->base_revision);
864 base_rep.id.number = header->base_item_index;
865 base_rep.size = header->base_length;
866 is_delta = header->type == svn_fs_x__rep_delta;
868 /* Clear it the ITERPOOL once in a while. Doing it too frequently
869 * renders the FILE_HINT ineffective. Doing too infrequently, may
870 * leave us with too many open file handles.
872 * Note that this is mostly about efficiency, with larger values
873 * being more efficient, and any non-zero value is legal here. When
874 * reading deltified contents, we may keep 10s of rev files open at
875 * the same time and the system has to cope with that. Thus, the
876 * limit of 16 chosen below is in the same ballpark.
882 svn_pool_clear(iterpool);
885 while (is_delta && base_rep.id.change_set);
887 *chain_length = count;
888 *shard_count = shards;
889 svn_pool_destroy(iterpool);
895 typedef struct rep_read_baton_t
897 /* The FS from which we're reading. */
900 /* Representation to read. */
901 svn_fs_x__representation_t rep;
903 /* If not NULL, this is the base for the first delta window in rs_list */
904 svn_stringbuf_t *base_window;
906 /* The state of all prior delta representations. */
907 apr_array_header_t *rs_list;
909 /* The plaintext state, if there is a plaintext. */
910 rep_state_t *src_state;
912 /* The index of the current delta chunk, if we are reading a delta. */
915 /* The buffer where we store undeltified data. */
920 /* A checksum context for summing the data read in order to verify it.
921 Note: we don't need to use the sha1 checksum because we're only doing
922 data verification, for which md5 is perfectly safe. */
923 svn_checksum_ctx_t *md5_checksum_ctx;
925 svn_boolean_t checksum_finalized;
927 /* The stored checksum of the representation we are reading, its
928 length, and the amount we've read so far. Some of this
929 information is redundant with rs_list and src_state, but it's
930 convenient for the checksumming code to have it here. */
931 unsigned char md5_digest[APR_MD5_DIGESTSIZE];
936 /* The key for the fulltext cache for this rep, if there is a
938 svn_fs_x__pair_cache_key_t fulltext_cache_key;
939 /* The text we've been reading, if we're going to cache it. */
940 svn_stringbuf_t *current_fulltext;
942 /* If not NULL, attempt to read the data from this cache.
943 Once that lookup fails, reset it to NULL. */
944 svn_cache__t *fulltext_cache;
946 /* Bytes delivered from the FULLTEXT_CACHE so far. If the next
947 lookup fails, we need to skip that much data from the reconstructed
948 window stream before we continue normal operation. */
949 svn_filesize_t fulltext_delivered;
951 /* Used for temporary allocations during the read. */
952 apr_pool_t *scratch_pool;
954 /* Pool used to store file handles and other data that is persistant
955 for the entire stream read. */
956 apr_pool_t *filehandle_pool;
959 /* Set window key in *KEY to address the window described by RS.
960 For convenience, return the KEY. */
961 static svn_fs_x__window_cache_key_t *
962 get_window_key(svn_fs_x__window_cache_key_t *key,
965 svn_revnum_t revision = svn_fs_x__get_revnum(rs->rep_id.change_set);
966 assert(revision <= APR_UINT32_MAX);
968 key->revision = (apr_uint32_t)revision;
969 key->item_index = rs->rep_id.number;
970 key->chunk_index = rs->chunk_index;
975 /* Read the WINDOW_P number CHUNK_INDEX for the representation given in
976 * rep state RS from the current FSX session's cache. This will be a
977 * no-op and IS_CACHED will be set to FALSE if no cache has been given.
978 * If a cache is available IS_CACHED will inform the caller about the
979 * success of the lookup. Allocations (of the window in particualar) will
982 * If the information could be found, put RS to CHUNK_INDEX.
985 /* Return data type for get_cached_window_sizes_func.
987 typedef struct window_sizes_t
989 /* length of the txdelta window in its on-disk format */
990 svn_filesize_t packed_len;
992 /* expanded (and combined) window length */
993 svn_filesize_t target_len;
996 /* Implements svn_cache__partial_getter_func_t extracting the packed
997 * and expanded window sizes from a cached window and return the size
998 * info as a window_sizes_t* in *OUT.
1000 static svn_error_t *
1001 get_cached_window_sizes_func(void **out,
1003 apr_size_t data_len,
1007 const svn_fs_x__txdelta_cached_window_t *window = data;
1008 const svn_txdelta_window_t *txdelta_window
1009 = svn_temp_deserializer__ptr(window, (const void **)&window->window);
1011 window_sizes_t *result = apr_palloc(pool, sizeof(*result));
1012 result->packed_len = window->end_offset - window->start_offset;
1013 result->target_len = txdelta_window->tview_len;
1017 return SVN_NO_ERROR;
1020 /* Read the WINDOW_P number CHUNK_INDEX for the representation given in
1021 * rep state RS from the current FSFS session's cache. This will be a
1022 * no-op and IS_CACHED will be set to FALSE if no cache has been given.
1023 * If a cache is available IS_CACHED will inform the caller about the
1024 * success of the lookup. Allocations of the window in will be made
1025 * from RESULT_POOL. Use SCRATCH_POOL for temporary allocations.
1027 * If the information could be found, put RS to CHUNK_INDEX.
1029 static svn_error_t *
1030 get_cached_window_sizes(window_sizes_t **sizes,
1032 svn_boolean_t *is_cached,
1035 svn_fs_x__window_cache_key_t key = { 0 };
1036 SVN_ERR(svn_cache__get_partial((void **)sizes,
1039 get_window_key(&key, rs),
1040 get_cached_window_sizes_func,
1044 return SVN_NO_ERROR;
1047 static svn_error_t *
1048 get_cached_window(svn_txdelta_window_t **window_p,
1051 svn_boolean_t *is_cached,
1052 apr_pool_t *result_pool,
1053 apr_pool_t *scratch_pool)
1055 /* ask the cache for the desired txdelta window */
1056 svn_fs_x__txdelta_cached_window_t *cached_window;
1057 svn_fs_x__window_cache_key_t key = { 0 };
1058 get_window_key(&key, rs);
1059 key.chunk_index = chunk_index;
1060 SVN_ERR(svn_cache__get((void **) &cached_window,
1068 /* found it. Pass it back to the caller. */
1069 *window_p = cached_window->window;
1071 /* manipulate the RS as if we just read the data */
1072 rs->current = cached_window->end_offset;
1073 rs->chunk_index = chunk_index;
1076 return SVN_NO_ERROR;
1079 /* Store the WINDOW read for the rep state RS with the given START_OFFSET
1080 * within the pack / rev file in the current FSX session's cache. This
1081 * will be a no-op if no cache has been given.
1082 * Temporary allocations will be made from SCRATCH_POOL. */
1083 static svn_error_t *
1084 set_cached_window(svn_txdelta_window_t *window,
1086 apr_off_t start_offset,
1087 apr_pool_t *scratch_pool)
1089 /* store the window and the first offset _past_ it */
1090 svn_fs_x__txdelta_cached_window_t cached_window;
1091 svn_fs_x__window_cache_key_t key = {0};
1093 cached_window.window = window;
1094 cached_window.start_offset = start_offset - rs->start;
1095 cached_window.end_offset = rs->current;
1097 /* but key it with the start offset because that is the known state
1098 * when we will look it up */
1099 SVN_ERR(svn_cache__set(rs->window_cache,
1100 get_window_key(&key, rs),
1104 return SVN_NO_ERROR;
1107 /* Read the WINDOW_P for the rep state RS from the current FSX session's
1108 * cache. This will be a no-op and IS_CACHED will be set to FALSE if no
1109 * cache has been given. If a cache is available IS_CACHED will inform
1110 * the caller about the success of the lookup. Allocations (of the window
1111 * in particular) will be made from POOL.
1113 static svn_error_t *
1114 get_cached_combined_window(svn_stringbuf_t **window_p,
1116 svn_boolean_t *is_cached,
1119 /* ask the cache for the desired txdelta window */
1120 svn_fs_x__window_cache_key_t key = { 0 };
1121 return svn_cache__get((void **)window_p,
1124 get_window_key(&key, rs),
1128 /* Store the WINDOW read for the rep state RS in the current FSX session's
1129 * cache. This will be a no-op if no cache has been given.
1130 * Temporary allocations will be made from SCRATCH_POOL. */
1131 static svn_error_t *
1132 set_cached_combined_window(svn_stringbuf_t *window,
1134 apr_pool_t *scratch_pool)
1136 /* but key it with the start offset because that is the known state
1137 * when we will look it up */
1138 svn_fs_x__window_cache_key_t key = { 0 };
1139 return svn_cache__set(rs->combined_cache,
1140 get_window_key(&key, rs),
1145 /* Build an array of rep_state structures in *LIST giving the delta
1146 reps from first_rep to a self-compressed rep. Set *SRC_STATE to
1147 the container rep we find at the end of the chain, or to NULL if
1148 the final delta representation is self-compressed.
1149 The representation to start from is designated by filesystem FS, id
1150 ID, and representation REP.
1151 Also, set *WINDOW_P to the base window content for *LIST, if it
1152 could be found in cache. Otherwise, *LIST will contain the base
1153 representation for the whole delta chain.
1155 static svn_error_t *
1156 build_rep_list(apr_array_header_t **list,
1157 svn_stringbuf_t **window_p,
1158 rep_state_t **src_state,
1160 svn_fs_x__representation_t *first_rep,
1161 apr_pool_t *result_pool,
1162 apr_pool_t *scratch_pool)
1164 svn_fs_x__representation_t rep;
1165 rep_state_t *rs = NULL;
1166 svn_fs_x__rep_header_t *rep_header;
1167 svn_boolean_t is_cached = FALSE;
1168 shared_file_t *shared_file = NULL;
1169 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1171 *list = apr_array_make(result_pool, 1, sizeof(rep_state_t *));
1174 /* for the top-level rep, we need the rep_args */
1175 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs,
1176 result_pool, iterpool));
1180 svn_pool_clear(iterpool);
1182 /* fetch state, if that has not been done already */
1184 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file,
1185 &rep, fs, result_pool, iterpool));
1187 /* for txn reps and containered reps, there won't be a cached
1188 * combined window */
1189 if (svn_fs_x__is_revision(rep.id.change_set)
1190 && rep_header->type != svn_fs_x__rep_container
1191 && rs->combined_cache)
1192 SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached,
1197 /* We already have a reconstructed window in our cache.
1198 Write a pseudo rep_state with the full length. */
1201 rs->size = (*window_p)->len;
1206 if (rep_header->type == svn_fs_x__rep_container)
1208 /* This is a container item, so just return the current rep_state. */
1213 /* Push this rep onto the list. If it's self-compressed, we're done. */
1214 APR_ARRAY_PUSH(*list, rep_state_t *) = rs;
1215 if (rep_header->type == svn_fs_x__rep_self_delta)
1222 = svn_fs_x__change_set_by_rev(rep_header->base_revision);
1223 rep.id.number = rep_header->base_item_index;
1224 rep.size = rep_header->base_length;
1228 svn_pool_destroy(iterpool);
1230 return SVN_NO_ERROR;
1234 /* Create a rep_read_baton structure for node revision NODEREV in
1235 filesystem FS and store it in *RB_P. If FULLTEXT_CACHE_KEY is not
1236 NULL, it is the rep's key in the fulltext cache, and a stringbuf
1237 must be allocated to store the text. If rep is mutable, it must be
1238 refer to file contents.
1240 Allocate the result in RESULT_POOL. This includes the pools within *RB_P.
1242 static svn_error_t *
1243 rep_read_get_baton(rep_read_baton_t **rb_p,
1245 svn_fs_x__representation_t *rep,
1246 svn_fs_x__pair_cache_key_t fulltext_cache_key,
1247 apr_pool_t *result_pool)
1249 rep_read_baton_t *b;
1251 b = apr_pcalloc(result_pool, sizeof(*b));
1254 b->base_window = NULL;
1257 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5,
1259 b->checksum_finalized = FALSE;
1260 memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest));
1261 b->len = rep->expanded_size;
1263 b->fulltext_cache_key = fulltext_cache_key;
1265 /* Clearable sub-pools. Since they have to remain valid for as long as B
1266 lives, we can't take them from some scratch pool. The caller of this
1267 function will have no control over how those subpools will be used. */
1268 b->scratch_pool = svn_pool_create(result_pool);
1269 b->filehandle_pool = svn_pool_create(result_pool);
1270 b->fulltext_cache = NULL;
1271 b->fulltext_delivered = 0;
1272 b->current_fulltext = NULL;
1274 /* Save our output baton. */
1277 return SVN_NO_ERROR;
1280 /* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
1281 window into *NWIN. */
1282 static svn_error_t *
1283 read_delta_window(svn_txdelta_window_t **nwin, int this_chunk,
1284 rep_state_t *rs, apr_pool_t *result_pool,
1285 apr_pool_t *scratch_pool)
1287 svn_boolean_t is_cached;
1288 apr_off_t start_offset;
1289 apr_off_t end_offset;
1290 apr_pool_t *iterpool;
1291 svn_stream_t *stream;
1292 svn_fs_x__revision_file_t *file;
1293 svn_boolean_t cacheable = rs->chunk_index == 0
1294 && svn_fs_x__is_revision(rs->rep_id.change_set)
1295 && rs->window_cache;
1297 SVN_ERR_ASSERT(rs->chunk_index <= this_chunk);
1299 SVN_ERR(dbg__log_access(rs->sfile->fs, &rs->rep_id, NULL,
1300 SVN_FS_X__ITEM_TYPE_ANY_REP, scratch_pool));
1302 /* Read the next window. But first, try to find it in the cache. */
1305 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1306 result_pool, scratch_pool));
1308 return SVN_NO_ERROR;
1311 /* someone has to actually read the data from file. Open it */
1312 SVN_ERR(auto_open_shared_file(rs->sfile));
1313 file = rs->sfile->rfile;
1315 /* invoke the 'block-read' feature for non-txn data.
1316 However, don't do that if we are in the middle of some representation,
1317 because the block is unlikely to contain other data. */
1320 SVN_ERR(block_read(NULL, rs->sfile->fs, &rs->rep_id, file, NULL,
1321 result_pool, scratch_pool));
1323 /* reading the whole block probably also provided us with the
1324 desired txdelta window */
1325 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1326 result_pool, scratch_pool));
1328 return SVN_NO_ERROR;
1331 /* data is still not cached -> we need to read it.
1332 Make sure we have all the necessary info. */
1333 SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1334 SVN_ERR(auto_read_diff_version(rs, scratch_pool));
1336 /* RS->FILE may be shared between RS instances -> make sure we point
1337 * to the right data. */
1338 start_offset = rs->start + rs->current;
1339 SVN_ERR(svn_fs_x__rev_file_seek(file, NULL, start_offset));
1341 /* Skip windows to reach the current chunk if we aren't there yet. */
1342 iterpool = svn_pool_create(scratch_pool);
1343 while (rs->chunk_index < this_chunk)
1345 apr_file_t *apr_file;
1346 svn_pool_clear(iterpool);
1348 SVN_ERR(svn_fs_x__rev_file_get(&apr_file, file));
1349 SVN_ERR(svn_txdelta_skip_svndiff_window(apr_file, rs->ver, iterpool));
1351 SVN_ERR(svn_io_file_get_offset(&start_offset, apr_file, iterpool));
1353 rs->current = start_offset - rs->start;
1354 if (rs->current >= rs->size)
1355 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1356 _("Reading one svndiff window read "
1357 "beyond the end of the "
1360 svn_pool_destroy(iterpool);
1362 /* Actually read the next window. */
1363 SVN_ERR(svn_fs_x__rev_file_stream(&stream, file));
1364 SVN_ERR(svn_txdelta_read_svndiff_window(nwin, stream, rs->ver,
1366 SVN_ERR(svn_fs_x__rev_file_offset(&end_offset, file));
1367 rs->current = end_offset - rs->start;
1368 if (rs->current > rs->size)
1369 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1370 _("Reading one svndiff window read beyond "
1371 "the end of the representation"));
1373 /* the window has not been cached before, thus cache it now
1374 * (if caching is used for them at all) */
1376 SVN_ERR(set_cached_window(*nwin, rs, start_offset, scratch_pool));
1378 return SVN_NO_ERROR;
1381 /* Read the whole representation RS and return it in *NWIN. */
1382 static svn_error_t *
1383 read_container_window(svn_stringbuf_t **nwin,
1386 apr_pool_t *result_pool,
1387 apr_pool_t *scratch_pool)
1389 svn_fs_x__rep_extractor_t *extractor = NULL;
1390 svn_fs_t *fs = rs->sfile->fs;
1391 svn_fs_x__data_t *ffd = fs->fsap_data;
1392 svn_fs_x__pair_cache_key_t key;
1393 svn_revnum_t revision = svn_fs_x__get_revnum(rs->rep_id.change_set);
1394 svn_boolean_t is_cached = FALSE;
1395 svn_fs_x__reps_baton_t baton;
1397 SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1398 key.revision = svn_fs_x__packed_base_rev(fs, revision);
1399 key.second = rs->start;
1401 /* already in cache? */
1403 baton.idx = rs->sub_item;
1405 SVN_ERR(svn_cache__get_partial((void**)&extractor, &is_cached,
1406 ffd->reps_container_cache, &key,
1407 svn_fs_x__reps_get_func, &baton,
1410 /* read from disk, if necessary */
1411 if (extractor == NULL)
1413 SVN_ERR(auto_open_shared_file(rs->sfile));
1414 SVN_ERR(block_read((void **)&extractor, fs, &rs->rep_id,
1415 rs->sfile->rfile, NULL,
1416 result_pool, scratch_pool));
1419 SVN_ERR(svn_fs_x__extractor_drive(nwin, extractor, rs->current, size,
1420 result_pool, scratch_pool));
1423 rs->current += (apr_off_t)size;
1425 return SVN_NO_ERROR;
1428 /* Get the undeltified window that is a result of combining all deltas
1429 from the current desired representation identified in *RB with its
1430 base representation. Store the window in *RESULT. */
1431 static svn_error_t *
1432 get_combined_window(svn_stringbuf_t **result,
1433 rep_read_baton_t *rb)
1435 apr_pool_t *pool, *new_pool, *window_pool;
1437 apr_array_header_t *windows;
1438 svn_stringbuf_t *source, *buf = rb->base_window;
1440 apr_pool_t *iterpool;
1442 /* Read all windows that we need to combine. This is fine because
1443 the size of each window is relatively small (100kB) and skip-
1444 delta limits the number of deltas in a chain to well under 100.
1445 Stop early if one of them does not depend on its predecessors. */
1446 window_pool = svn_pool_create(rb->scratch_pool);
1447 windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *));
1448 iterpool = svn_pool_create(rb->scratch_pool);
1449 for (i = 0; i < rb->rs_list->nelts; ++i)
1451 svn_txdelta_window_t *window;
1453 svn_pool_clear(iterpool);
1455 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1456 SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool,
1459 APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window;
1460 if (window->src_ops == 0)
1467 /* Combine in the windows from the other delta reps. */
1468 pool = svn_pool_create(rb->scratch_pool);
1469 for (--i; i >= 0; --i)
1471 svn_txdelta_window_t *window;
1473 svn_pool_clear(iterpool);
1475 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1476 window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
1478 /* Maybe, we've got a start representation in a container. If we do,
1479 read as much data from it as the needed for the txdelta window's
1481 Note that BUF / SOURCE may only be NULL in the first iteration. */
1483 if (source == NULL && rb->src_state != NULL)
1484 SVN_ERR(read_container_window(&source, rb->src_state,
1485 window->sview_len, pool, iterpool));
1487 /* Combine this window with the current one. */
1488 new_pool = svn_pool_create(rb->scratch_pool);
1489 buf = svn_stringbuf_create_ensure(window->tview_len, new_pool);
1490 buf->len = window->tview_len;
1492 svn_txdelta_apply_instructions(window, source ? source->data : NULL,
1493 buf->data, &buf->len);
1494 if (buf->len != window->tview_len)
1495 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1496 _("svndiff window length is "
1499 /* Cache windows only if the whole rep content could be read as a
1500 single chunk. Only then will no other chunk need a deeper RS
1501 list than the cached chunk. */
1502 if ( (rb->chunk_index == 0) && (rs->current == rs->size)
1503 && svn_fs_x__is_revision(rs->rep_id.change_set)
1504 && rs->combined_cache)
1505 SVN_ERR(set_cached_combined_window(buf, rs, new_pool));
1509 /* Cycle pools so that we only need to hold three windows at a time. */
1510 svn_pool_destroy(pool);
1513 svn_pool_destroy(iterpool);
1515 svn_pool_destroy(window_pool);
1518 return SVN_NO_ERROR;
1521 /* Returns whether or not the expanded fulltext of the file is cachable
1522 * based on its size SIZE. The decision depends on the cache used by FFD.
1524 static svn_boolean_t
1525 fulltext_size_is_cachable(svn_fs_x__data_t *ffd,
1526 svn_filesize_t size)
1528 return (size < APR_SIZE_MAX)
1529 && svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size);
1532 /* Close method used on streams returned by read_representation().
1534 static svn_error_t *
1535 rep_read_contents_close(void *baton)
1537 rep_read_baton_t *rb = baton;
1539 svn_pool_destroy(rb->scratch_pool);
1540 svn_pool_destroy(rb->filehandle_pool);
1542 return SVN_NO_ERROR;
1545 /* Inialize the representation read state RS for the given REP_HEADER and
1546 * p2l index ENTRY. If not NULL, assign FILE and STREAM to RS.
1547 * Allocate all sub-structures of RS in RESULT_POOL.
1549 static svn_error_t *
1550 init_rep_state(rep_state_t *rs,
1551 svn_fs_x__rep_header_t *rep_header,
1553 svn_fs_x__revision_file_t *rev_file,
1554 svn_fs_x__p2l_entry_t* entry,
1555 apr_pool_t *result_pool)
1557 svn_fs_x__data_t *ffd = fs->fsap_data;
1558 shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file));
1560 /* this function does not apply to representation containers */
1561 SVN_ERR_ASSERT(entry->type >= SVN_FS_X__ITEM_TYPE_FILE_REP
1562 && entry->type <= SVN_FS_X__ITEM_TYPE_DIR_PROPS);
1563 SVN_ERR_ASSERT(entry->item_count == 1);
1565 shared_file->rfile = rev_file;
1566 shared_file->fs = fs;
1567 shared_file->revision = svn_fs_x__get_revnum(entry->items[0].change_set);
1568 shared_file->pool = result_pool;
1570 rs->sfile = shared_file;
1571 rs->rep_id = entry->items[0];
1572 rs->header_size = rep_header->header_size;
1573 rs->start = entry->offset + rs->header_size;
1575 rs->size = entry->size - rep_header->header_size - 7;
1577 rs->chunk_index = 0;
1578 rs->window_cache = ffd->txdelta_window_cache;
1579 rs->combined_cache = ffd->combined_window_cache;
1581 return SVN_NO_ERROR;
1584 /* Walk through all windows in the representation addressed by RS in FS
1585 * (excluding the delta bases) and put those not already cached into the
1586 * window caches. If MAX_OFFSET is not -1, don't read windows that start
1587 * at or beyond that offset. As a side effect, return the total sum of all
1588 * expanded window sizes in *FULLTEXT_LEN.
1589 * Use SCRATCH_POOL for temporary allocations.
1591 static svn_error_t *
1592 cache_windows(svn_filesize_t *fulltext_len,
1595 apr_off_t max_offset,
1596 apr_pool_t *scratch_pool)
1598 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1601 while (rs->current < rs->size)
1603 svn_boolean_t is_cached = FALSE;
1604 window_sizes_t *window_sizes;
1606 svn_pool_clear(iterpool);
1607 if (max_offset != -1 && rs->start + rs->current >= max_offset)
1609 svn_pool_destroy(iterpool);
1610 return SVN_NO_ERROR;
1613 /* efficiently skip windows that are still being cached instead
1614 * of fully decoding them */
1615 SVN_ERR(get_cached_window_sizes(&window_sizes, rs, &is_cached,
1619 *fulltext_len += window_sizes->target_len;
1620 rs->current += window_sizes->packed_len;
1624 svn_txdelta_window_t *window;
1625 svn_fs_x__revision_file_t *file = rs->sfile->rfile;
1626 svn_stream_t *stream;
1627 apr_off_t start_offset = rs->start + rs->current;
1628 apr_off_t end_offset;
1629 apr_off_t block_start;
1631 /* navigate to & read the current window */
1632 SVN_ERR(svn_fs_x__rev_file_stream(&stream, file));
1633 SVN_ERR(svn_fs_x__rev_file_seek(file, &block_start, start_offset));
1634 SVN_ERR(svn_txdelta_read_svndiff_window(&window, stream, rs->ver,
1637 /* aggregate expanded window size */
1638 *fulltext_len += window->tview_len;
1640 /* determine on-disk window size */
1641 SVN_ERR(svn_fs_x__rev_file_offset(&end_offset, rs->sfile->rfile));
1642 rs->current = end_offset - rs->start;
1643 if (rs->current > rs->size)
1644 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1645 _("Reading one svndiff window read beyond "
1646 "the end of the representation"));
1648 /* if the window has not been cached before, cache it now
1649 * (if caching is used for them at all) */
1651 SVN_ERR(set_cached_window(window, rs, start_offset, iterpool));
1657 svn_pool_destroy(iterpool);
1659 return SVN_NO_ERROR;
1662 /* Try to get the representation header identified by KEY from FS's cache.
1663 * If it has not been cached, read it from the current position in STREAM
1664 * and put it into the cache (if caching has been enabled for rep headers).
1665 * Return the result in *REP_HEADER. Use POOL for allocations.
1667 static svn_error_t *
1668 read_rep_header(svn_fs_x__rep_header_t **rep_header,
1670 svn_fs_x__revision_file_t *file,
1671 svn_fs_x__representation_cache_key_t *key,
1674 svn_fs_x__data_t *ffd = fs->fsap_data;
1675 svn_stream_t *stream;
1676 svn_boolean_t is_cached = FALSE;
1678 SVN_ERR(svn_cache__get((void**)rep_header, &is_cached,
1679 ffd->rep_header_cache, key, pool));
1681 return SVN_NO_ERROR;
1683 SVN_ERR(svn_fs_x__rev_file_stream(&stream, file));
1684 SVN_ERR(svn_fs_x__read_rep_header(rep_header, stream, pool, pool));
1685 SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header, pool));
1687 return SVN_NO_ERROR;
1691 svn_fs_x__get_representation_length(svn_filesize_t *packed_len,
1692 svn_filesize_t *expanded_len,
1694 svn_fs_x__revision_file_t *rev_file,
1695 svn_fs_x__p2l_entry_t* entry,
1696 apr_pool_t *scratch_pool)
1698 svn_fs_x__representation_cache_key_t key = { 0 };
1699 rep_state_t rs = { 0 };
1700 svn_fs_x__rep_header_t *rep_header;
1702 /* this function does not apply to representation containers */
1703 SVN_ERR_ASSERT(entry->type >= SVN_FS_X__ITEM_TYPE_FILE_REP
1704 && entry->type <= SVN_FS_X__ITEM_TYPE_DIR_PROPS);
1705 SVN_ERR_ASSERT(entry->item_count == 1);
1707 /* get / read the representation header */
1708 key.revision = svn_fs_x__get_revnum(entry->items[0].change_set);
1709 key.is_packed = svn_fs_x__is_packed_rev(fs, key.revision);
1710 key.item_index = entry->items[0].number;
1711 SVN_ERR(read_rep_header(&rep_header, fs, rev_file, &key, scratch_pool));
1713 /* prepare representation reader state (rs) structure */
1714 SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry,
1717 /* RS->SFILE may be shared between RS instances -> make sure we point
1718 * to the right data. */
1719 *packed_len = rs.size;
1720 SVN_ERR(cache_windows(expanded_len, fs, &rs, -1, scratch_pool));
1722 return SVN_NO_ERROR;
1725 /* Return the next *LEN bytes of the rep from our plain / delta windows
1726 and store them in *BUF. */
1727 static svn_error_t *
1728 get_contents_from_windows(rep_read_baton_t *rb,
1732 apr_size_t copy_len, remaining = *len;
1736 /* Special case for when there are no delta reps, only a
1737 containered text. */
1738 if (rb->rs_list->nelts == 0 && rb->buf == NULL)
1740 copy_len = remaining;
1743 /* reps in containers don't have a header */
1744 if (rs->header_size == 0 && rb->base_window == NULL)
1746 /* RS->SIZE is unreliable here because it is based upon
1747 * the delta rep size _before_ putting the data into a
1749 SVN_ERR(read_container_window(&rb->base_window, rs, rb->len,
1750 rb->scratch_pool, rb->scratch_pool));
1751 rs->current -= rb->base_window->len;
1754 if (rb->base_window != NULL)
1756 /* We got the desired rep directly from the cache.
1757 This is where we need the pseudo rep_state created
1758 by build_rep_list(). */
1759 apr_size_t offset = (apr_size_t)rs->current;
1760 if (offset >= rb->base_window->len)
1762 else if (copy_len > rb->base_window->len - offset)
1763 copy_len = rb->base_window->len - offset;
1765 memcpy (cur, rb->base_window->data + offset, copy_len);
1768 rs->current += copy_len;
1770 return SVN_NO_ERROR;
1773 while (remaining > 0)
1775 /* If we have buffered data from a previous chunk, use that. */
1778 /* Determine how much to copy from the buffer. */
1779 copy_len = rb->buf_len - rb->buf_pos;
1780 if (copy_len > remaining)
1781 copy_len = remaining;
1783 /* Actually copy the data. */
1784 memcpy(cur, rb->buf + rb->buf_pos, copy_len);
1785 rb->buf_pos += copy_len;
1787 remaining -= copy_len;
1789 /* If the buffer is all used up, clear it and empty the
1791 if (rb->buf_pos == rb->buf_len)
1793 svn_pool_clear(rb->scratch_pool);
1799 svn_stringbuf_t *sbuf = NULL;
1801 rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *);
1802 if (rs->current == rs->size)
1805 /* Get more buffered data by evaluating a chunk. */
1806 SVN_ERR(get_combined_window(&sbuf, rb));
1809 rb->buf_len = sbuf->len;
1810 rb->buf = sbuf->data;
1817 return SVN_NO_ERROR;
1820 /* Baton type for get_fulltext_partial. */
1821 typedef struct fulltext_baton_t
1823 /* Target buffer to write to; of at least LEN bytes. */
1826 /* Offset within the respective fulltext at which we shall start to
1827 copy data into BUFFER. */
1830 /* Number of bytes to copy. The actual amount may be less in case
1831 the fulltext is short(er). */
1834 /* Number of bytes actually copied into BUFFER. */
1838 /* Implement svn_cache__partial_getter_func_t for fulltext caches.
1839 * From the fulltext in DATA, we copy the range specified by the
1840 * fulltext_baton_t* BATON into the buffer provided by that baton.
1841 * OUT and RESULT_POOL are not used.
1843 static svn_error_t *
1844 get_fulltext_partial(void **out,
1846 apr_size_t data_len,
1848 apr_pool_t *result_pool)
1850 fulltext_baton_t *fulltext_baton = baton;
1852 /* We cached the fulltext with an NUL appended to it. */
1853 apr_size_t fulltext_len = data_len - 1;
1855 /* Clip the copy range to what the fulltext size allows. */
1856 apr_size_t start = MIN(fulltext_baton->start, fulltext_len);
1857 fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len);
1859 /* Copy the data to the output buffer and be done. */
1860 memcpy(fulltext_baton->buffer, (const char *)data + start,
1861 fulltext_baton->read);
1863 return SVN_NO_ERROR;
1866 /* Find the fulltext specified in BATON in the fulltext cache given
1867 * as well by BATON. If that succeeds, set *CACHED to TRUE and copy
1868 * up to the next *LEN bytes into BUFFER. Set *LEN to the actual
1869 * number of bytes copied.
1871 static svn_error_t *
1872 get_contents_from_fulltext(svn_boolean_t *cached,
1873 rep_read_baton_t *baton,
1878 fulltext_baton_t fulltext_baton;
1880 SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered
1881 == baton->fulltext_delivered);
1882 fulltext_baton.buffer = buffer;
1883 fulltext_baton.start = (apr_size_t)baton->fulltext_delivered;
1884 fulltext_baton.len = *len;
1885 fulltext_baton.read = 0;
1887 SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache,
1888 &baton->fulltext_cache_key,
1889 get_fulltext_partial, &fulltext_baton,
1890 baton->scratch_pool));
1894 baton->fulltext_delivered += fulltext_baton.read;
1895 *len = fulltext_baton.read;
1898 return SVN_NO_ERROR;
1901 /* Determine the optimal size of a string buf that shall receive a
1902 * (full-) text of NEEDED bytes.
1904 * The critical point is that those buffers may be very large and
1905 * can cause memory fragmentation. We apply simple heuristics to
1906 * make fragmentation less likely.
1909 optimimal_allocation_size(apr_size_t needed)
1911 /* For all allocations, assume some overhead that is shared between
1912 * OS memory managemnt, APR memory management and svn_stringbuf_t. */
1913 const apr_size_t overhead = 0x400;
1916 /* If an allocation size if safe for other ephemeral buffers, it should
1917 * be safe for ours. */
1918 if (needed <= SVN__STREAM_CHUNK_SIZE)
1921 /* Paranoia edge case:
1922 * Skip our heuristics if they created arithmetical overflow.
1923 * Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */
1924 if (needed >= APR_SIZE_MAX / 2 - overhead)
1927 /* As per definition SVN__STREAM_CHUNK_SIZE is a power of two.
1928 * Since we know NEEDED to be larger than that, use it as the
1931 * Heuristics: Allocate a power-of-two number of bytes that fit
1932 * NEEDED plus some OVERHEAD. The APR allocator
1933 * will round it up to the next full page size.
1935 optimal = SVN__STREAM_CHUNK_SIZE;
1936 while (optimal - overhead < needed)
1939 /* This is above or equal to NEEDED. */
1940 return optimal - overhead;
1943 /* After a fulltext cache lookup failure, we will continue to read from
1944 * combined delta or plain windows. However, we must first make that data
1945 * stream in BATON catch up tho the position LEN already delivered from the
1946 * fulltext cache. Also, we need to store the reconstructed fulltext if we
1947 * want to cache it at the end.
1949 static svn_error_t *
1950 skip_contents(rep_read_baton_t *baton,
1953 svn_error_t *err = SVN_NO_ERROR;
1955 /* Do we want to cache the reconstructed fulltext? */
1956 if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision))
1959 svn_filesize_t to_alloc = MAX(len, baton->len);
1961 /* This should only be happening if BATON->LEN and LEN are
1962 * cacheable, implying they fit into memory. */
1963 SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc);
1965 /* Allocate the fulltext buffer. */
1966 baton->current_fulltext = svn_stringbuf_create_ensure(
1967 optimimal_allocation_size((apr_size_t)to_alloc),
1968 baton->filehandle_pool);
1970 /* Read LEN bytes from the window stream and store the data
1971 * in the fulltext buffer (will be filled by further reads later). */
1972 baton->current_fulltext->len = (apr_size_t)len;
1973 baton->current_fulltext->data[(apr_size_t)len] = 0;
1975 buffer = baton->current_fulltext->data;
1976 while (len > 0 && !err)
1978 apr_size_t to_read = (apr_size_t)len;
1979 err = get_contents_from_windows(baton, buffer, &to_read);
1984 /* Make the MD5 calculation catch up with the data delivered
1985 * (we did not run MD5 on the data that we took from the cache). */
1988 SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
1989 baton->current_fulltext->data,
1990 baton->current_fulltext->len));
1991 baton->off += baton->current_fulltext->len;
1996 /* Simply drain LEN bytes from the window stream. */
1997 apr_pool_t *subpool = svn_pool_create(baton->scratch_pool);
1998 char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
2000 while (len > 0 && !err)
2002 apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE
2003 ? SVN__STREAM_CHUNK_SIZE
2006 err = get_contents_from_windows(baton, buffer, &to_read);
2009 /* Make the MD5 calculation catch up with the data delivered
2010 * (we did not run MD5 on the data that we took from the cache). */
2013 SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
2015 baton->off += to_read;
2019 svn_pool_destroy(subpool);
2022 return svn_error_trace(err);
2025 /* BATON is of type `rep_read_baton_t'; read the next *LEN bytes of the
2026 representation and store them in *BUF. Sum as we read and verify
2027 the MD5 sum at the end. */
2028 static svn_error_t *
2029 rep_read_contents(void *baton,
2033 rep_read_baton_t *rb = baton;
2035 /* Get data from the fulltext cache for as long as we can. */
2036 if (rb->fulltext_cache)
2038 svn_boolean_t cached;
2039 SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len));
2041 return SVN_NO_ERROR;
2043 /* Cache miss. From now on, we will never read from the fulltext
2044 * cache for this representation anymore. */
2045 rb->fulltext_cache = NULL;
2048 /* No fulltext cache to help us. We must read from the window stream. */
2051 /* Window stream not initialized, yet. Do it now. */
2052 SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2053 &rb->src_state, rb->fs, &rb->rep,
2054 rb->filehandle_pool, rb->scratch_pool));
2056 /* In case we did read from the fulltext cache before, make the
2057 * window stream catch up. Also, initialize the fulltext buffer
2058 * if we want to cache the fulltext at the end. */
2059 SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
2062 /* Get the next block of data.
2063 * Keep in mind that the representation might be empty and leave us
2064 * already positioned at the end of the rep. */
2065 if (rb->off == rb->len)
2068 SVN_ERR(get_contents_from_windows(rb, buf, len));
2070 if (rb->current_fulltext)
2071 svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
2073 /* Perform checksumming. We want to check the checksum as soon as
2074 the last byte of data is read, in case the caller never performs
2075 a short read, but we don't want to finalize the MD5 context
2077 if (!rb->checksum_finalized)
2079 SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len));
2081 if (rb->off == rb->len)
2083 svn_checksum_t *md5_checksum;
2084 svn_checksum_t expected;
2085 expected.kind = svn_checksum_md5;
2086 expected.digest = rb->md5_digest;
2088 rb->checksum_finalized = TRUE;
2089 SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx,
2091 if (!svn_checksum_match(md5_checksum, &expected))
2092 return svn_error_create(SVN_ERR_FS_CORRUPT,
2093 svn_checksum_mismatch_err(&expected, md5_checksum,
2095 _("Checksum mismatch while reading representation")),
2100 if (rb->off == rb->len && rb->current_fulltext)
2102 svn_fs_x__data_t *ffd = rb->fs->fsap_data;
2103 SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key,
2104 rb->current_fulltext, rb->scratch_pool));
2105 rb->current_fulltext = NULL;
2108 return SVN_NO_ERROR;
2112 svn_fs_x__get_contents(svn_stream_t **contents_p,
2114 svn_fs_x__representation_t *rep,
2115 svn_boolean_t cache_fulltext,
2116 apr_pool_t *result_pool)
2120 *contents_p = svn_stream_empty(result_pool);
2124 svn_fs_x__data_t *ffd = fs->fsap_data;
2125 svn_filesize_t len = rep->expanded_size;
2126 rep_read_baton_t *rb;
2127 svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
2129 svn_fs_x__pair_cache_key_t fulltext_cache_key = { 0 };
2130 fulltext_cache_key.revision = revision;
2131 fulltext_cache_key.second = rep->id.number;
2133 /* Initialize the reader baton. Some members may added lazily
2134 * while reading from the stream */
2135 SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key,
2138 /* Make the stream attempt fulltext cache lookups if the fulltext
2139 * is cacheable. If it is not, then also don't try to buffer and
2142 && SVN_IS_VALID_REVNUM(revision)
2143 && fulltext_size_is_cachable(ffd, len))
2145 rb->fulltext_cache = ffd->fulltext_cache;
2149 /* This will also prevent the reconstructed fulltext from being
2150 put into the cache. */
2151 rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM;
2154 *contents_p = svn_stream_create(rb, result_pool);
2155 svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2157 svn_stream_set_close(*contents_p, rep_read_contents_close);
2160 return SVN_NO_ERROR;
2164 svn_fs_x__get_contents_from_file(svn_stream_t **contents_p,
2166 svn_fs_x__representation_t *rep,
2171 rep_read_baton_t *rb;
2172 svn_fs_x__pair_cache_key_t fulltext_cache_key = { SVN_INVALID_REVNUM, 0 };
2173 rep_state_t *rs = apr_pcalloc(pool, sizeof(*rs));
2174 svn_fs_x__rep_header_t *rh;
2175 svn_stream_t *stream;
2177 /* Initialize the reader baton. Some members may added lazily
2178 * while reading from the stream. */
2179 SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
2181 /* Continue constructing RS. Leave caches as NULL. */
2182 rs->size = rep->size;
2183 rs->rep_id = rep->id;
2187 /* Provide just enough file access info to allow for a basic read from
2188 * FILE but leave all index / footer info with empty values b/c FILE
2189 * probably is not a complete revision file. */
2190 rs->sfile = apr_pcalloc(pool, sizeof(*rs->sfile));
2191 rs->sfile->revision = SVN_INVALID_REVNUM;
2192 rs->sfile->pool = pool;
2194 SVN_ERR(svn_fs_x__rev_file_wrap_temp(&rs->sfile->rfile, fs, file, pool));
2196 /* Read the rep header. */
2197 SVN_ERR(svn_fs_x__rev_file_seek(rs->sfile->rfile, NULL, offset));
2198 SVN_ERR(svn_fs_x__rev_file_stream(&stream, rs->sfile->rfile));
2199 SVN_ERR(svn_fs_x__read_rep_header(&rh, stream, pool, pool));
2200 SVN_ERR(svn_fs_x__rev_file_offset(&rs->start, rs->sfile->rfile));
2201 rs->header_size = rh->header_size;
2203 /* Log the access. */
2204 SVN_ERR(dbg__log_access(fs, &rep->id, rh,
2205 SVN_FS_X__ITEM_TYPE_ANY_REP, pool));
2207 /* Build the representation list (delta chain). */
2208 if (rh->type == svn_fs_x__rep_self_delta)
2210 rb->rs_list = apr_array_make(pool, 1, sizeof(rep_state_t *));
2211 APR_ARRAY_PUSH(rb->rs_list, rep_state_t *) = rs;
2212 rb->src_state = NULL;
2216 svn_fs_x__representation_t next_rep = { 0 };
2218 /* skip "SVNx" diff marker */
2221 /* REP's base rep is inside a proper revision.
2222 * It can be reconstructed in the usual way. */
2223 next_rep.id.change_set = svn_fs_x__change_set_by_rev(rh->base_revision);
2224 next_rep.id.number = rh->base_item_index;
2225 next_rep.size = rh->base_length;
2227 SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2228 &rb->src_state, rb->fs, &next_rep,
2229 rb->filehandle_pool, rb->scratch_pool));
2231 /* Insert the access to REP as the first element of the delta chain. */
2232 SVN_ERR(svn_sort__array_insert2(rb->rs_list, &rs, 0));
2235 /* Now, the baton is complete and we can assemble the stream around it. */
2236 *contents_p = svn_stream_create(rb, pool);
2237 svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2239 svn_stream_set_close(*contents_p, rep_read_contents_close);
2241 return SVN_NO_ERROR;
2244 /* Baton for cache_access_wrapper. Wraps the original parameters of
2245 * svn_fs_x__try_process_file_content().
2247 typedef struct cache_access_wrapper_baton_t
2249 svn_fs_process_contents_func_t func;
2251 } cache_access_wrapper_baton_t;
2253 /* Wrapper to translate between svn_fs_process_contents_func_t and
2254 * svn_cache__partial_getter_func_t.
2256 static svn_error_t *
2257 cache_access_wrapper(void **out,
2259 apr_size_t data_len,
2263 cache_access_wrapper_baton_t *wrapper_baton = baton;
2265 SVN_ERR(wrapper_baton->func((const unsigned char *)data,
2266 data_len - 1, /* cache adds terminating 0 */
2267 wrapper_baton->baton,
2270 /* non-NULL value to signal the calling cache that all went well */
2273 return SVN_NO_ERROR;
2277 svn_fs_x__try_process_file_contents(svn_boolean_t *success,
2279 svn_fs_x__noderev_t *noderev,
2280 svn_fs_process_contents_func_t processor,
2282 apr_pool_t *scratch_pool)
2284 svn_fs_x__representation_t *rep = noderev->data_rep;
2287 svn_fs_x__data_t *ffd = fs->fsap_data;
2288 svn_fs_x__pair_cache_key_t fulltext_cache_key = { 0 };
2290 fulltext_cache_key.revision = svn_fs_x__get_revnum(rep->id.change_set);
2291 fulltext_cache_key.second = rep->id.number;
2292 if ( SVN_IS_VALID_REVNUM(fulltext_cache_key.revision)
2293 && fulltext_size_is_cachable(ffd, rep->expanded_size))
2295 cache_access_wrapper_baton_t wrapper_baton;
2298 wrapper_baton.func = processor;
2299 wrapper_baton.baton = baton;
2300 return svn_cache__get_partial(&dummy, success,
2301 ffd->fulltext_cache,
2302 &fulltext_cache_key,
2303 cache_access_wrapper,
2310 return SVN_NO_ERROR;
2313 /* Baton used when reading delta windows. */
2314 typedef struct delta_read_baton_t
2316 struct rep_state_t *rs;
2317 unsigned char md5_digest[APR_MD5_DIGESTSIZE];
2318 } delta_read_baton_t;
2320 /* This implements the svn_txdelta_next_window_fn_t interface. */
2321 static svn_error_t *
2322 delta_read_next_window(svn_txdelta_window_t **window,
2326 delta_read_baton_t *drb = baton;
2327 apr_pool_t *scratch_pool = svn_pool_create(pool);
2330 if (drb->rs->current < drb->rs->size)
2332 SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool,
2334 drb->rs->chunk_index++;
2337 svn_pool_destroy(scratch_pool);
2339 return SVN_NO_ERROR;
2342 /* This implements the svn_txdelta_md5_digest_fn_t interface. */
2343 static const unsigned char *
2344 delta_read_md5_digest(void *baton)
2346 delta_read_baton_t *drb = baton;
2347 return drb->md5_digest;
2350 /* Return a txdelta stream for on-disk representation REP_STATE
2351 * of TARGET. Allocate the result in RESULT_POOL.
2353 static svn_txdelta_stream_t *
2354 get_storaged_delta_stream(rep_state_t *rep_state,
2355 svn_fs_x__noderev_t *target,
2356 apr_pool_t *result_pool)
2358 /* Create the delta read baton. */
2359 delta_read_baton_t *drb = apr_pcalloc(result_pool, sizeof(*drb));
2360 drb->rs = rep_state;
2361 memcpy(drb->md5_digest, target->data_rep->md5_digest,
2362 sizeof(drb->md5_digest));
2363 return svn_txdelta_stream_create(drb, delta_read_next_window,
2364 delta_read_md5_digest, result_pool);
2368 svn_fs_x__get_file_delta_stream(svn_txdelta_stream_t **stream_p,
2370 svn_fs_x__noderev_t *source,
2371 svn_fs_x__noderev_t *target,
2372 apr_pool_t *result_pool,
2373 apr_pool_t *scratch_pool)
2375 svn_stream_t *source_stream, *target_stream;
2376 rep_state_t *rep_state;
2377 svn_fs_x__rep_header_t *rep_header;
2379 /* Try a shortcut: if the target is stored as a delta against the source,
2380 then just use that delta. However, prefer using the fulltext cache
2381 whenever that is available. */
2382 if (target->data_rep && source)
2384 /* Read target's base rep if any. */
2385 SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL,
2386 target->data_rep, fs, result_pool,
2389 /* Try a shortcut: if the target is stored as a delta against the source,
2390 then just use that delta. */
2391 if (source && source->data_rep && target->data_rep)
2393 /* If that matches source, then use this delta as is.
2394 Note that we want an actual delta here. E.g. a self-delta would
2395 not be good enough. */
2396 if (rep_header->type == svn_fs_x__rep_delta
2397 && rep_header->base_revision
2398 == svn_fs_x__get_revnum(source->data_rep->id.change_set)
2399 && rep_header->base_item_index == source->data_rep->id.number)
2401 *stream_p = get_storaged_delta_stream(rep_state, target,
2403 return SVN_NO_ERROR;
2408 /* We want a self-delta. There is a fair chance that TARGET got
2409 added in this revision and is already stored in the requested
2411 if (rep_header->type == svn_fs_x__rep_self_delta)
2413 *stream_p = get_storaged_delta_stream(rep_state, target,
2415 return SVN_NO_ERROR;
2419 /* Don't keep file handles open for longer than necessary. */
2420 if (rep_state->sfile->rfile)
2422 SVN_ERR(svn_fs_x__close_revision_file(rep_state->sfile->rfile));
2423 rep_state->sfile->rfile = NULL;
2427 /* Read both fulltexts and construct a delta. */
2429 SVN_ERR(svn_fs_x__get_contents(&source_stream, fs, source->data_rep,
2430 TRUE, result_pool));
2432 source_stream = svn_stream_empty(result_pool);
2434 SVN_ERR(svn_fs_x__get_contents(&target_stream, fs, target->data_rep,
2435 TRUE, result_pool));
2437 /* Because source and target stream will already verify their content,
2438 * there is no need to do this once more. In particular if the stream
2439 * content is being fetched from cache. */
2440 svn_txdelta2(stream_p, source_stream, target_stream, FALSE, result_pool);
2442 return SVN_NO_ERROR;
2445 /* Return TRUE when all svn_fs_x__dirent_t* in ENTRIES are already sorted
2446 by their respective name. */
2447 static svn_boolean_t
2448 sorted(apr_array_header_t *entries)
2452 const svn_fs_x__dirent_t * const *dirents = (const void *)entries->elts;
2453 for (i = 0; i < entries->nelts-1; ++i)
2454 if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
2460 /* Compare the names of the two dirents given in **A and **B. */
2462 compare_dirents(const void *a,
2465 const svn_fs_x__dirent_t *lhs = *((const svn_fs_x__dirent_t * const *) a);
2466 const svn_fs_x__dirent_t *rhs = *((const svn_fs_x__dirent_t * const *) b);
2468 return strcmp(lhs->name, rhs->name);
2471 /* Compare the name of the dirents given in **A with the C string in *B. */
2473 compare_dirent_name(const void *a,
2476 const svn_fs_x__dirent_t *lhs = *((const svn_fs_x__dirent_t * const *) a);
2477 const char *rhs = b;
2479 return strcmp(lhs->name, rhs);
2482 /* Into ENTRIES, parse all directories entries from the serialized form in
2483 * DATA. If INCREMENTAL is TRUE, read until the end of the STREAM and
2484 * update the data. ID is provided for nicer error messages.
2486 * The contents of DATA will be shared with the items in ENTRIES, i.e. it
2487 * must not be modified afterwards and must remain valid as long as ENTRIES
2488 * is valid. Use SCRATCH_POOL for temporary allocations.
2490 static svn_error_t *
2491 parse_dir_entries(apr_array_header_t **entries_p,
2492 const svn_stringbuf_t *data,
2493 svn_boolean_t incremental,
2494 const svn_fs_x__id_t *id,
2495 apr_pool_t *result_pool,
2496 apr_pool_t *scratch_pool)
2498 const apr_byte_t *p = (const apr_byte_t *)data->data;
2499 const apr_byte_t *end = p + data->len;
2501 apr_hash_t *hash = incremental ? svn_hash__make(scratch_pool) : NULL;
2502 apr_array_header_t *entries;
2504 /* Construct the resulting container. */
2505 p = svn__decode_uint(&count, p, end);
2506 if (count > INT_MAX)
2507 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2508 _("Directory for '%s' is too large"),
2509 svn_fs_x__id_unparse(id, scratch_pool)->data);
2511 entries = apr_array_make(result_pool, (int)count,
2512 sizeof(svn_fs_x__dirent_t *));
2517 svn_fs_x__dirent_t *dirent;
2518 dirent = apr_pcalloc(result_pool, sizeof(*dirent));
2520 /* The part of the serialized entry that is not the name will be
2521 * about 6 bytes or less. Since APR allocates with an 8 byte
2522 * alignment (4 bytes loss on average per string), simply using
2523 * the name string in DATA already gives us near-optimal memory
2525 dirent->name = (const char *)p;
2526 len = strlen(dirent->name);
2529 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2530 _("Directory entry missing kind in '%s'"),
2531 svn_fs_x__id_unparse(id, scratch_pool)->data);
2533 dirent->kind = (svn_node_kind_t)*(p++);
2535 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2536 _("Directory entry missing change set in '%s'"),
2537 svn_fs_x__id_unparse(id, scratch_pool)->data);
2539 p = svn__decode_int(&dirent->id.change_set, p, end);
2541 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2542 _("Directory entry missing item number in '%s'"),
2543 svn_fs_x__id_unparse(id, scratch_pool)->data);
2545 p = svn__decode_uint(&dirent->id.number, p, end);
2547 /* In incremental mode, update the hash; otherwise, write to the
2551 /* Insertion / update or a deletion? */
2552 if (svn_fs_x__id_used(&dirent->id))
2553 apr_hash_set(hash, dirent->name, len, dirent);
2555 apr_hash_set(hash, dirent->name, len, NULL);
2559 APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = dirent;
2565 /* Convert container into a sorted array. */
2566 apr_hash_index_t *hi;
2567 for (hi = apr_hash_first(scratch_pool, hash); hi; hi = apr_hash_next(hi))
2568 APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = apr_hash_this_val(hi);
2570 if (!sorted(entries))
2571 svn_sort__array(entries, compare_dirents);
2575 /* Check that we read the expected amount of entries. */
2576 if ((apr_uint64_t)entries->nelts != count)
2577 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2578 _("Directory length mismatch in '%s'"),
2579 svn_fs_x__id_unparse(id, scratch_pool)->data);
2582 *entries_p = entries;
2584 return SVN_NO_ERROR;
2587 /* For directory NODEREV in FS, return the *FILESIZE of its in-txn
2588 * representation. If the directory representation is comitted data,
2589 * set *FILESIZE to SVN_INVALID_FILESIZE. Use SCRATCH_POOL for temporaries.
2591 static svn_error_t *
2592 get_txn_dir_info(svn_filesize_t *filesize,
2594 svn_fs_x__noderev_t *noderev,
2595 apr_pool_t *scratch_pool)
2597 if (noderev->data_rep
2598 && ! svn_fs_x__is_revision(noderev->data_rep->id.change_set))
2600 const svn_io_dirent2_t *dirent;
2601 const char *filename;
2603 filename = svn_fs_x__path_txn_node_children(fs, &noderev->noderev_id,
2604 scratch_pool, scratch_pool);
2606 SVN_ERR(svn_io_stat_dirent2(&dirent, filename, FALSE, FALSE,
2607 scratch_pool, scratch_pool));
2608 *filesize = dirent->filesize;
2612 *filesize = SVN_INVALID_FILESIZE;
2615 return SVN_NO_ERROR;
2618 /* Fetch the contents of a directory into DIR. Values are stored
2619 as filename to string mappings; further conversion is necessary to
2620 convert them into svn_fs_x__dirent_t values. */
2621 static svn_error_t *
2622 get_dir_contents(svn_fs_x__dir_data_t *dir,
2624 svn_fs_x__noderev_t *noderev,
2625 apr_pool_t *result_pool,
2626 apr_pool_t *scratch_pool)
2628 svn_stream_t *contents;
2629 const svn_fs_x__id_t *id = &noderev->noderev_id;
2631 svn_stringbuf_t *text;
2632 svn_boolean_t incremental;
2634 /* Initialize the result. */
2635 dir->txn_filesize = SVN_INVALID_FILESIZE;
2637 /* Read dir contents - unless there is none in which case we are done. */
2638 if (noderev->data_rep
2639 && ! svn_fs_x__is_revision(noderev->data_rep->id.change_set))
2641 /* Get location & current size of the directory representation. */
2642 const char *filename;
2645 filename = svn_fs_x__path_txn_node_children(fs, id, scratch_pool,
2648 /* The representation is mutable. Read the old directory
2649 contents from the mutable children file, followed by the
2650 changes we've made in this transaction. */
2651 SVN_ERR(svn_io_file_open(&file, filename, APR_READ | APR_BUFFERED,
2652 APR_OS_DEFAULT, scratch_pool));
2654 /* Obtain txn children file size. */
2655 SVN_ERR(svn_io_file_size_get(&dir->txn_filesize, file, scratch_pool));
2656 len = (apr_size_t)dir->txn_filesize;
2658 /* Finally, provide stream access to FILE. */
2659 contents = svn_stream_from_aprfile2(file, FALSE, scratch_pool);
2662 else if (noderev->data_rep)
2664 /* The representation is immutable. Read it normally. */
2665 len = noderev->data_rep->expanded_size;
2666 SVN_ERR(svn_fs_x__get_contents(&contents, fs, noderev->data_rep,
2667 FALSE, scratch_pool));
2668 incremental = FALSE;
2672 /* Empty representation == empty directory. */
2673 dir->entries = apr_array_make(result_pool, 0,
2674 sizeof(svn_fs_x__dirent_t *));
2675 return SVN_NO_ERROR;
2678 /* Read the whole stream contents into a single buffer.
2679 * Due to our LEN hint, no allocation overhead occurs.
2681 * Also, a large portion of TEXT will be file / dir names which we
2682 * directly reference from DIR->ENTRIES instead of copying them.
2683 * Hence, we need to use the RESULT_POOL here. */
2684 SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, result_pool));
2685 SVN_ERR(svn_stream_close(contents));
2687 /* de-serialize hash */
2688 SVN_ERR(parse_dir_entries(&dir->entries, text, incremental, id,
2689 result_pool, scratch_pool));
2691 return SVN_NO_ERROR;
2695 /* Return the cache object in FS responsible to storing the directory the
2696 * NODEREV plus the corresponding pre-allocated *KEY.
2698 static svn_cache__t *
2699 locate_dir_cache(svn_fs_t *fs,
2700 svn_fs_x__id_t *key,
2701 svn_fs_x__noderev_t *noderev)
2703 svn_fs_x__data_t *ffd = fs->fsap_data;
2705 if (!noderev->data_rep)
2707 /* no data rep -> empty directory.
2708 Use a key that does definitely not clash with non-NULL reps. */
2709 key->change_set = SVN_FS_X__INVALID_CHANGE_SET;
2710 key->number = SVN_FS_X__ITEM_INDEX_UNUSED;
2712 else if (svn_fs_x__is_txn(noderev->noderev_id.change_set))
2714 /* data in txns must be addressed by noderev ID since the
2715 representation has not been created, yet. */
2716 *key = noderev->noderev_id;
2720 /* committed data can use simple rev,item pairs */
2721 *key = noderev->data_rep->id;
2724 return ffd->dir_cache;
2728 svn_fs_x__rep_contents_dir(apr_array_header_t **entries_p,
2730 svn_fs_x__noderev_t *noderev,
2731 apr_pool_t *result_pool,
2732 apr_pool_t *scratch_pool)
2735 svn_fs_x__dir_data_t *dir;
2737 /* find the cache we may use */
2738 svn_cache__t *cache = locate_dir_cache(fs, &key, noderev);
2739 svn_boolean_t found;
2741 SVN_ERR(svn_cache__get((void **)&dir, &found, cache, &key, result_pool));
2744 /* Verify that the cached dir info is not stale
2745 * (no-op for committed data). */
2746 svn_filesize_t filesize;
2747 SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
2749 if (filesize == dir->txn_filesize)
2751 /* Still valid. Done. */
2752 *entries_p = dir->entries;
2753 return SVN_NO_ERROR;
2757 /* Read in the directory contents. */
2758 dir = apr_pcalloc(scratch_pool, sizeof(*dir));
2759 SVN_ERR(get_dir_contents(dir, fs, noderev, result_pool, scratch_pool));
2760 *entries_p = dir->entries;
2762 /* Update the cache, if we are to use one.
2764 * Don't even attempt to serialize very large directories; it would cause
2765 * an unnecessary memory allocation peak. 100 bytes/entry is about right.
2767 if (svn_cache__is_cachable(cache, 100 * dir->entries->nelts))
2768 SVN_ERR(svn_cache__set(cache, &key, dir, scratch_pool));
2770 return SVN_NO_ERROR;
2773 svn_fs_x__dirent_t *
2774 svn_fs_x__find_dir_entry(apr_array_header_t *entries,
2778 svn_fs_x__dirent_t **result
2779 = svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
2780 return result ? *result : NULL;
2784 svn_fs_x__rep_contents_dir_entry(svn_fs_x__dirent_t **dirent,
2786 svn_fs_x__noderev_t *noderev,
2789 apr_pool_t *result_pool,
2790 apr_pool_t *scratch_pool)
2792 svn_boolean_t found = FALSE;
2794 /* find the cache we may use */
2796 svn_cache__t *cache = locate_dir_cache(fs, &key, noderev);
2797 svn_fs_x__ede_baton_t baton;
2799 svn_filesize_t filesize;
2800 SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
2805 baton.txn_filesize = filesize;
2807 SVN_ERR(svn_cache__get_partial((void **)dirent,
2811 svn_fs_x__extract_dir_entry,
2815 /* Remember the new clue only if we found something at that spot. */
2819 /* fetch data from disk if we did not find it in the cache */
2820 if (! found || baton.out_of_date)
2822 svn_fs_x__dirent_t *entry;
2823 svn_fs_x__dirent_t *entry_copy = NULL;
2824 svn_fs_x__dir_data_t dir;
2826 /* Read in the directory contents. */
2827 SVN_ERR(get_dir_contents(&dir, fs, noderev, scratch_pool,
2830 /* Update the cache, if we are to use one.
2832 * Don't even attempt to serialize very large directories; it would
2833 * cause an unnecessary memory allocation peak. 150 bytes / entry is
2835 if (cache && svn_cache__is_cachable(cache, 150 * dir.entries->nelts))
2836 SVN_ERR(svn_cache__set(cache, &key, &dir, scratch_pool));
2838 /* find desired entry and return a copy in POOL, if found */
2839 entry = svn_fs_x__find_dir_entry(dir.entries, name, NULL);
2842 entry_copy = apr_pmemdup(result_pool, entry, sizeof(*entry_copy));
2843 entry_copy->name = apr_pstrdup(result_pool, entry->name);
2846 *dirent = entry_copy;
2849 return SVN_NO_ERROR;
2853 svn_fs_x__get_proplist(apr_hash_t **proplist,
2855 svn_fs_x__noderev_t *noderev,
2856 apr_pool_t *result_pool,
2857 apr_pool_t *scratch_pool)
2859 svn_stream_t *stream;
2860 const svn_fs_x__id_t *noderev_id = &noderev->noderev_id;
2862 if (noderev->prop_rep
2863 && !svn_fs_x__is_revision(noderev->prop_rep->id.change_set))
2865 svn_stringbuf_t *content;
2866 svn_string_t *as_string;
2867 const char *filename = svn_fs_x__path_txn_node_props(fs, noderev_id,
2870 SVN_ERR(svn_stringbuf_from_file2(&content, filename, result_pool));
2872 as_string = svn_stringbuf__morph_into_string(content);
2873 SVN_ERR_W(svn_fs_x__parse_properties(proplist, as_string, result_pool),
2874 apr_psprintf(scratch_pool,
2875 "malformed property list for node-revision '%s' in '%s'",
2876 svn_fs_x__id_unparse(&noderev->noderev_id,
2877 scratch_pool)->data,
2880 else if (noderev->prop_rep)
2882 svn_fs_x__data_t *ffd = fs->fsap_data;
2883 svn_fs_x__representation_t *rep = noderev->prop_rep;
2884 svn_fs_x__pair_cache_key_t key = { 0 };
2885 svn_string_t *content;
2886 svn_boolean_t is_cached;
2888 key.revision = svn_fs_x__get_revnum(rep->id.change_set);
2889 key.second = rep->id.number;
2890 SVN_ERR(svn_cache__get((void **) proplist, &is_cached,
2891 ffd->properties_cache, &key, result_pool));
2893 return SVN_NO_ERROR;
2895 SVN_ERR(svn_fs_x__get_contents(&stream, fs, rep, FALSE, scratch_pool));
2896 SVN_ERR(svn_string_from_stream2(&content, stream, rep->expanded_size,
2899 SVN_ERR_W(svn_fs_x__parse_properties(proplist, content, result_pool),
2900 apr_psprintf(scratch_pool,
2901 "malformed property list for node-revision '%s'",
2902 svn_fs_x__id_unparse(&noderev->noderev_id,
2903 scratch_pool)->data));
2905 SVN_ERR(svn_cache__set(ffd->properties_cache, &key, *proplist,
2910 /* return an empty prop list if the node doesn't have any props */
2911 *proplist = apr_hash_make(result_pool);
2914 return SVN_NO_ERROR;
2918 svn_fs_x__create_changes_context(svn_fs_x__changes_context_t **context,
2921 apr_pool_t *result_pool,
2922 apr_pool_t *scratch_pool)
2924 svn_fs_x__changes_context_t *result = apr_pcalloc(result_pool,
2927 result->revision = rev;
2929 SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool));
2930 SVN_ERR(svn_fs_x__rev_file_init(&result->revision_file, fs, rev,
2934 return SVN_NO_ERROR;
2938 svn_fs_x__get_changes(apr_array_header_t **changes,
2939 svn_fs_x__changes_context_t *context,
2940 apr_pool_t *result_pool,
2941 apr_pool_t *scratch_pool)
2943 svn_boolean_t found;
2944 svn_fs_x__data_t *ffd = context->fs->fsap_data;
2947 id.change_set = svn_fs_x__change_set_by_rev(context->revision);
2948 id.number = SVN_FS_X__ITEM_INDEX_CHANGES;
2950 /* try cache lookup first */
2952 if (svn_fs_x__is_packed_rev(context->fs, context->revision))
2955 svn_fs_x__pair_cache_key_t key;
2956 svn_fs_x__changes_get_list_baton_t baton;
2957 baton.start = (int)context->next;
2958 baton.eol = &context->eol;
2960 SVN_ERR(svn_fs_x__item_offset(&offset, &baton.sub_item, context->fs,
2961 context->revision_file,
2962 &id, scratch_pool));
2963 key.revision = svn_fs_x__packed_base_rev(context->fs,
2965 key.second = offset;
2967 SVN_ERR(svn_cache__get_partial((void **)changes, &found,
2968 ffd->changes_container_cache, &key,
2969 svn_fs_x__changes_get_list_func,
2970 &baton, result_pool));
2974 svn_fs_x__changes_list_t *changes_list;
2975 svn_fs_x__pair_cache_key_t key;
2976 key.revision = context->revision;
2977 key.second = context->next;
2979 SVN_ERR(svn_cache__get((void **)&changes_list, &found,
2980 ffd->changes_cache, &key, result_pool));
2984 /* Where to look next - if there is more data. */
2985 context->eol = changes_list->eol;
2986 context->next_offset = changes_list->end_offset;
2988 /* Return the block as a "proper" APR array. */
2989 (*changes) = apr_array_make(result_pool, 0, sizeof(void *));
2990 (*changes)->elts = (char *)changes_list->changes;
2991 (*changes)->nelts = changes_list->count;
2992 (*changes)->nalloc = changes_list->count;
2998 /* 'block-read' will also provide us with the desired data */
2999 SVN_ERR(block_read((void **)changes, context->fs, &id,
3000 context->revision_file, context,
3001 result_pool, scratch_pool));
3004 context->next += (*changes)->nelts;
3006 SVN_ERR(dbg__log_access(context->fs, &id, *changes,
3007 SVN_FS_X__ITEM_TYPE_CHANGES, scratch_pool));
3009 return SVN_NO_ERROR;
3012 /* Fetch the representation data (header, txdelta / plain windows)
3013 * addressed by ENTRY->ITEM in FS and cache it under KEY. Read the data
3014 * from REV_FILE. If MAX_OFFSET is not -1, don't read windows that start
3015 * at or beyond that offset. Use SCRATCH_POOL for temporary allocations.
3017 static svn_error_t *
3018 block_read_contents(svn_fs_t *fs,
3019 svn_fs_x__revision_file_t *rev_file,
3020 svn_fs_x__p2l_entry_t* entry,
3021 svn_fs_x__pair_cache_key_t *key,
3022 apr_off_t max_offset,
3023 apr_pool_t *scratch_pool)
3025 svn_fs_x__representation_cache_key_t header_key = { 0 };
3026 rep_state_t rs = { 0 };
3027 svn_filesize_t fulltext_len;
3028 svn_fs_x__rep_header_t *rep_header;
3030 header_key.revision = (apr_int32_t)key->revision;
3031 header_key.is_packed = svn_fs_x__is_packed_rev(fs, header_key.revision);
3032 header_key.item_index = key->second;
3034 SVN_ERR(read_rep_header(&rep_header, fs, rev_file, &header_key,
3036 SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry, scratch_pool));
3037 SVN_ERR(cache_windows(&fulltext_len, fs, &rs, max_offset, scratch_pool));
3039 return SVN_NO_ERROR;
3042 /* For the given REV_FILE in FS, in *STREAM return a stream covering the
3043 * item specified by ENTRY. Also, verify the item's content by low-level
3044 * checksum. Allocate the result in RESULT_POOL.
3046 static svn_error_t *
3047 read_item(svn_stream_t **stream,
3049 svn_fs_x__revision_file_t *rev_file,
3050 svn_fs_x__p2l_entry_t* entry,
3051 apr_pool_t *result_pool)
3053 apr_uint32_t digest;
3054 svn_checksum_t *expected, *actual;
3055 apr_uint32_t plain_digest;
3056 svn_stringbuf_t *text;
3058 /* Read item into string buffer. */
3059 text = svn_stringbuf_create_ensure(entry->size, result_pool);
3060 text->len = entry->size;
3061 text->data[text->len] = 0;
3062 SVN_ERR(svn_fs_x__rev_file_read(rev_file, text->data, text->len));
3064 /* Return (construct, calculate) stream and checksum. */
3065 *stream = svn_stream_from_stringbuf(text, result_pool);
3066 digest = svn__fnv1a_32x4(text->data, text->len);
3068 /* Checksums will match most of the time. */
3069 if (entry->fnv1_checksum == digest)
3070 return SVN_NO_ERROR;
3072 /* Construct proper checksum objects from their digests to allow for
3073 * nice error messages. */
3074 plain_digest = htonl(entry->fnv1_checksum);
3075 expected = svn_checksum__from_digest_fnv1a_32x4(
3076 (const unsigned char *)&plain_digest, result_pool);
3077 plain_digest = htonl(digest);
3078 actual = svn_checksum__from_digest_fnv1a_32x4(
3079 (const unsigned char *)&plain_digest, result_pool);
3081 /* Construct the full error message with all the info we have. */
3082 return svn_checksum_mismatch_err(expected, actual, result_pool,
3083 _("Low-level checksum mismatch while reading\n"
3084 "%s bytes of meta data at offset %s "),
3085 apr_off_t_toa(result_pool, entry->size),
3086 apr_off_t_toa(result_pool, entry->offset));
3089 /* If not already cached or if MUST_READ is set, read the changed paths
3090 * list addressed by ENTRY in FS and retúrn it in *CHANGES. Cache the
3091 * result if caching is enabled. Read the data from REV_FILE. Trim the
3092 * data in *CHANGES to the range given by CONTEXT. Allocate *CHANGES in
3093 * RESUSLT_POOL and allocate temporaries in SCRATCH_POOL.
3095 static svn_error_t *
3096 block_read_changes(apr_array_header_t **changes,
3098 svn_fs_x__revision_file_t *rev_file,
3099 svn_fs_x__p2l_entry_t* entry,
3100 svn_fs_x__changes_context_t *context,
3101 svn_boolean_t must_read,
3102 apr_pool_t *result_pool,
3103 apr_pool_t *scratch_pool)
3105 svn_fs_x__data_t *ffd = fs->fsap_data;
3106 svn_stream_t *stream;
3107 svn_fs_x__pair_cache_key_t key;
3108 svn_fs_x__changes_list_t changes_list;
3110 /* If we don't have to return any data, just read and cache the first
3111 block. This means we won't cache the remaining blocks from longer
3112 lists right away but only if they are actually needed. */
3113 apr_size_t next = must_read ? context->next : 0;
3114 apr_size_t next_offset = must_read ? context->next_offset : 0;
3116 /* we don't support containers, yet */
3117 SVN_ERR_ASSERT(entry->item_count == 1);
3119 /* The item to read / write. */
3120 key.revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3123 /* already in cache? */
3126 svn_boolean_t is_cached = FALSE;
3127 SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, &key,
3130 return SVN_NO_ERROR;
3133 /* Verify the whole list only once. We don't use the STREAM any further. */
3134 if (!must_read || next == 0)
3135 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3137 /* Seek to the block to read within the changes list. */
3138 SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL,
3139 entry->offset + next_offset));
3140 SVN_ERR(svn_fs_x__rev_file_stream(&stream, rev_file));
3142 /* read changes from revision file */
3143 SVN_ERR(svn_fs_x__read_changes(changes, stream, SVN_FS_X__CHANGES_BLOCK_SIZE,
3144 result_pool, scratch_pool));
3146 SVN_ERR(svn_fs_x__rev_file_offset(&changes_list.end_offset, rev_file));
3147 changes_list.end_offset -= entry->offset;
3148 changes_list.start_offset = next_offset;
3149 changes_list.count = (*changes)->nelts;
3150 changes_list.changes = (svn_fs_x__change_t **)(*changes)->elts;
3151 changes_list.eol = (changes_list.count < SVN_FS_X__CHANGES_BLOCK_SIZE)
3152 || (changes_list.end_offset + 1 >= entry->size);
3154 /* cache for future reference */
3156 SVN_ERR(svn_cache__set(ffd->changes_cache, &key, &changes_list,
3160 * Remove the entries that already been reported. */
3163 context->next_offset = changes_list.end_offset;
3164 context->eol = changes_list.eol;
3167 return SVN_NO_ERROR;
3170 /* If not already cached or if MUST_READ is set, read the changed paths
3171 * list container addressed by ENTRY in FS. Return the changes list
3172 * identified by SUB_ITEM in *CHANGES, using CONTEXT to select a sub-range
3173 * within that list. Read the data from REV_FILE and cache the result.
3175 * Allocate *CHANGES in RESUSLT_POOL and everything else in SCRATCH_POOL.
3177 static svn_error_t *
3178 block_read_changes_container(apr_array_header_t **changes,
3180 svn_fs_x__revision_file_t *rev_file,
3181 svn_fs_x__p2l_entry_t* entry,
3182 apr_uint32_t sub_item,
3183 svn_fs_x__changes_context_t *context,
3184 svn_boolean_t must_read,
3185 apr_pool_t *result_pool,
3186 apr_pool_t *scratch_pool)
3188 svn_fs_x__data_t *ffd = fs->fsap_data;
3189 svn_fs_x__changes_t *container;
3190 svn_fs_x__pair_cache_key_t key;
3191 svn_stream_t *stream;
3192 svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3194 key.revision = svn_fs_x__packed_base_rev(fs, revision);
3195 key.second = entry->offset;
3197 /* already in cache? */
3200 svn_boolean_t is_cached = FALSE;
3201 SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_container_cache,
3202 &key, scratch_pool));
3204 return SVN_NO_ERROR;
3207 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3209 /* read changes from revision file */
3211 SVN_ERR(svn_fs_x__read_changes_container(&container, stream, scratch_pool,
3214 /* extract requested data */
3217 SVN_ERR(svn_fs_x__changes_get_list(changes, container, sub_item,
3218 context, result_pool));
3219 SVN_ERR(svn_cache__set(ffd->changes_container_cache, &key, container,
3222 return SVN_NO_ERROR;
3225 /* If not already cached or if MUST_READ is set, read the node revision
3226 * addressed by ENTRY in FS and return it in *NODEREV_P. Cache the
3227 * result under KEY if caching is enabled. Read the data from REV_FILE.
3228 * Allocate *NODEREV_P in RESUSLT_POOL and allocate temporaries in
3231 static svn_error_t *
3232 block_read_noderev(svn_fs_x__noderev_t **noderev_p,
3234 svn_fs_x__revision_file_t *rev_file,
3235 svn_fs_x__p2l_entry_t* entry,
3236 svn_fs_x__pair_cache_key_t *key,
3237 svn_boolean_t must_read,
3238 apr_pool_t *result_pool,
3239 apr_pool_t *scratch_pool)
3241 svn_fs_x__data_t *ffd = fs->fsap_data;
3242 svn_stream_t *stream;
3244 /* we don't support containers, yet */
3245 SVN_ERR_ASSERT(entry->item_count == 1);
3247 /* already in cache? */
3250 svn_boolean_t is_cached = FALSE;
3251 SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache, key,
3254 return SVN_NO_ERROR;
3257 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3259 /* read node rev from revision file */
3261 SVN_ERR(svn_fs_x__read_noderev(noderev_p, stream, result_pool,
3263 SVN_ERR(svn_cache__set(ffd->node_revision_cache, key, *noderev_p,
3266 return SVN_NO_ERROR;
3269 /* If not already cached or if MUST_READ is set, read the node revision
3270 * container addressed by ENTRY in FS. Return the item identified by
3271 * SUB_ITEM in *NODEREV_P. Read the data from REV_FILE and cache it.
3272 * Allocate *NODEREV_P in RESUSLT_POOL and allocate temporaries in
3275 static svn_error_t *
3276 block_read_noderevs_container(svn_fs_x__noderev_t **noderev_p,
3278 svn_fs_x__revision_file_t *rev_file,
3279 svn_fs_x__p2l_entry_t* entry,
3280 apr_uint32_t sub_item,
3281 svn_boolean_t must_read,
3282 apr_pool_t *result_pool,
3283 apr_pool_t *scratch_pool)
3285 svn_fs_x__data_t *ffd = fs->fsap_data;
3286 svn_fs_x__noderevs_t *container;
3287 svn_stream_t *stream;
3288 svn_fs_x__pair_cache_key_t key;
3289 svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3291 key.revision = svn_fs_x__packed_base_rev(fs, revision);
3292 key.second = entry->offset;
3294 /* already in cache? */
3297 svn_boolean_t is_cached = FALSE;
3298 SVN_ERR(svn_cache__has_key(&is_cached, ffd->noderevs_container_cache,
3299 &key, scratch_pool));
3301 return SVN_NO_ERROR;
3304 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3306 /* read noderevs from revision file */
3307 SVN_ERR(svn_fs_x__read_noderevs_container(&container, stream, scratch_pool,
3310 /* extract requested data */
3312 SVN_ERR(svn_fs_x__noderevs_get(noderev_p, container, sub_item,
3315 SVN_ERR(svn_cache__set(ffd->noderevs_container_cache, &key, container,
3318 return SVN_NO_ERROR;
3321 /* If not already cached or if MUST_READ is set, read the representation
3322 * container addressed by ENTRY in FS. Return an extractor object for the
3323 * item identified by SUB_ITEM in *EXTRACTOR. Read the data from REV_FILE
3324 * and cache it. Allocate *EXTRACTOR in RESUSLT_POOL and all temporaries
3327 static svn_error_t *
3328 block_read_reps_container(svn_fs_x__rep_extractor_t **extractor,
3330 svn_fs_x__revision_file_t *rev_file,
3331 svn_fs_x__p2l_entry_t* entry,
3332 apr_uint32_t sub_item,
3333 svn_boolean_t must_read,
3334 apr_pool_t *result_pool,
3335 apr_pool_t *scratch_pool)
3337 svn_fs_x__data_t *ffd = fs->fsap_data;
3338 svn_fs_x__reps_t *container;
3339 svn_stream_t *stream;
3340 svn_fs_x__pair_cache_key_t key;
3341 svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3343 key.revision = svn_fs_x__packed_base_rev(fs, revision);
3344 key.second = entry->offset;
3346 /* already in cache? */
3349 svn_boolean_t is_cached = FALSE;
3350 SVN_ERR(svn_cache__has_key(&is_cached, ffd->reps_container_cache,
3351 &key, scratch_pool));
3353 return SVN_NO_ERROR;
3356 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3358 /* read noderevs from revision file */
3359 SVN_ERR(svn_fs_x__read_reps_container(&container, stream, result_pool,
3362 /* extract requested data */
3365 SVN_ERR(svn_fs_x__reps_get(extractor, fs, container, sub_item,
3368 SVN_ERR(svn_cache__set(ffd->reps_container_cache, &key, container,
3371 return SVN_NO_ERROR;
3374 /* Read the whole (e.g. 64kB) block containing the item identified by ID in
3375 * FS and put all data into cache. If necessary and depending on heuristics,
3376 * neighboring blocks may also get read. The data is being read from
3377 * already open REVISION_FILE, which must be the correct rev / pack file
3378 * w.r.t. ID->CHANGE_SET.
3380 * For noderevs and changed path lists, the item fetched can be allocated
3381 * RESULT_POOL and returned in *RESULT. Otherwise, RESULT must be NULL.
3382 * The BATON is passed along to the extractor sub-functions and will be
3383 * used only when constructing the *RESULT. SCRATCH_POOL will be used for
3384 * all temporary allocations.
3386 static svn_error_t *
3387 block_read(void **result,
3389 const svn_fs_x__id_t *id,
3390 svn_fs_x__revision_file_t *revision_file,
3392 apr_pool_t *result_pool,
3393 apr_pool_t *scratch_pool)
3395 svn_fs_x__data_t *ffd = fs->fsap_data;
3396 apr_off_t offset, wanted_offset = 0;
3397 apr_off_t block_start = 0;
3398 apr_uint32_t wanted_sub_item = 0;
3399 svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
3400 apr_array_header_t *entries;
3403 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
3405 /* don't try this on transaction protorev files */
3406 SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision));
3408 /* index lookup: find the OFFSET of the item we *must* read plus (in the
3409 * "do-while" block) the list of items in the same block. */
3410 SVN_ERR(svn_fs_x__item_offset(&wanted_offset, &wanted_sub_item, fs,
3411 revision_file, id, iterpool));
3413 offset = wanted_offset;
3416 /* fetch list of items in the block surrounding OFFSET */
3417 SVN_ERR(svn_fs_x__rev_file_seek(revision_file, &block_start, offset));
3418 SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, fs, revision_file,
3419 revision, block_start,
3420 ffd->block_size, scratch_pool,
3423 /* read all items from the block */
3424 for (i = 0; i < entries->nelts; ++i)
3426 svn_boolean_t is_result, is_wanted;
3429 svn_fs_x__p2l_entry_t* entry
3430 = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t);
3432 /* skip empty sections */
3433 if (entry->type == SVN_FS_X__ITEM_TYPE_UNUSED)
3436 /* the item / container we were looking for? */
3437 is_wanted = entry->offset == wanted_offset
3438 && entry->item_count >= wanted_sub_item
3439 && svn_fs_x__id_eq(entry->items + wanted_sub_item, id);
3440 is_result = result && is_wanted;
3442 /* select the pool that we want the item to be allocated in */
3443 pool = is_result ? result_pool : iterpool;
3445 /* handle all items that start within this block and are relatively
3446 * small (i.e. < block size). Always read the item we need to return.
3448 if (is_result || ( entry->offset >= block_start
3449 && entry->size < ffd->block_size))
3452 svn_fs_x__pair_cache_key_t key = { 0 };
3453 key.revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3454 key.second = entry->items[0].number;
3456 SVN_ERR(svn_fs_x__rev_file_seek(revision_file, NULL,
3458 switch (entry->type)
3460 case SVN_FS_X__ITEM_TYPE_FILE_REP:
3461 case SVN_FS_X__ITEM_TYPE_DIR_REP:
3462 case SVN_FS_X__ITEM_TYPE_FILE_PROPS:
3463 case SVN_FS_X__ITEM_TYPE_DIR_PROPS:
3464 SVN_ERR(block_read_contents(fs, revision_file,
3468 : block_start + ffd->block_size,
3472 case SVN_FS_X__ITEM_TYPE_NODEREV:
3473 SVN_ERR(block_read_noderev((svn_fs_x__noderev_t **)&item,
3475 entry, &key, is_result,
3479 case SVN_FS_X__ITEM_TYPE_CHANGES:
3480 SVN_ERR(block_read_changes((apr_array_header_t **)&item,
3482 entry, baton, is_result,
3486 case SVN_FS_X__ITEM_TYPE_CHANGES_CONT:
3487 SVN_ERR(block_read_changes_container
3488 ((apr_array_header_t **)&item,
3490 entry, wanted_sub_item,
3495 case SVN_FS_X__ITEM_TYPE_NODEREVS_CONT:
3496 SVN_ERR(block_read_noderevs_container
3497 ((svn_fs_x__noderev_t **)&item,
3499 entry, wanted_sub_item,
3500 is_result, pool, iterpool));
3503 case SVN_FS_X__ITEM_TYPE_REPS_CONT:
3504 SVN_ERR(block_read_reps_container
3505 ((svn_fs_x__rep_extractor_t **)&item,
3507 entry, wanted_sub_item,
3508 is_result, pool, iterpool));
3518 /* if we crossed a block boundary, read the remainder of
3519 * the last block as well */
3520 offset = entry->offset + entry->size;
3521 if (offset - block_start > ffd->block_size)
3524 svn_pool_clear(iterpool);
3528 while(run_count++ == 1); /* can only be true once and only if a block
3529 * boundary got crossed */
3531 /* if the caller requested a result, we must have provided one by now */
3532 assert(!result || *result);
3533 svn_pool_destroy(iterpool);
3535 return SVN_NO_ERROR;