1 /* cached_data.c --- cached (read) access to FSFS data
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
23 #include "cached_data.h"
28 #include "svn_ctype.h"
29 #include "svn_sorts.h"
30 #include "private/svn_delta_private.h"
31 #include "private/svn_io_private.h"
32 #include "private/svn_sorts_private.h"
33 #include "private/svn_subr_private.h"
34 #include "private/svn_temp_serializer.h"
39 #include "low_level.h"
42 #include "temp_serializer.h"
44 #include "../libsvn_fs/fs-loader.h"
45 #include "../libsvn_delta/delta.h" /* for SVN_DELTA_WINDOW_SIZE */
47 #include "svn_private_config.h"
49 /* forward-declare. See implementation for the docstring */
51 block_read(void **result,
53 svn_revnum_t revision,
54 apr_uint64_t item_index,
55 svn_fs_fs__revision_file_t *revision_file,
56 apr_pool_t *result_pool,
57 apr_pool_t *scratch_pool);
60 /* Defined this to enable access logging via dgb__log_access
61 #define SVN_FS_FS__LOG_ACCESS
64 /* When SVN_FS_FS__LOG_ACCESS has been defined, write a line to console
65 * showing where REVISION, ITEM_INDEX is located in FS and use ITEM to
66 * show details on it's contents if not NULL. To support format 6 and
67 * earlier repos, ITEM_TYPE (SVN_FS_FS__ITEM_TYPE_*) must match ITEM.
68 * Use SCRATCH_POOL for temporary allocations.
70 * For pre-format7 repos, the display will be restricted.
73 dbg_log_access(svn_fs_t *fs,
74 svn_revnum_t revision,
75 apr_uint64_t item_index,
77 apr_uint32_t item_type,
78 apr_pool_t *scratch_pool)
80 /* no-op if this macro is not defined */
81 #ifdef SVN_FS_FS__LOG_ACCESS
82 fs_fs_data_t *ffd = fs->fsap_data;
83 apr_off_t end_offset = 0;
84 svn_fs_fs__p2l_entry_t *entry = NULL;
85 static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop",
86 "node ", "chgs ", "rep "};
87 const char *description = "";
88 const char *type = types[item_type];
89 const char *pack = "";
91 svn_fs_fs__revision_file_t *rev_file;
93 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision,
96 /* determine rev / pack file offset */
97 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision, NULL,
98 item_index, scratch_pool));
100 /* constructing the pack file description */
101 if (revision < ffd->min_unpacked_rev)
102 pack = apr_psprintf(scratch_pool, "%4ld|",
103 revision / ffd->max_files_per_dir);
105 /* construct description if possible */
106 if (item_type == SVN_FS_FS__ITEM_TYPE_NODEREV && item != NULL)
108 node_revision_t *node = item;
111 ? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT,
112 node->data_rep->revision,
113 node->data_rep->item_index)
117 ? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT,
118 node->prop_rep->revision,
119 node->prop_rep->item_index)
121 description = apr_psprintf(scratch_pool, "%s (pc=%d%s%s)",
123 node->predecessor_count,
127 else if (item_type == SVN_FS_FS__ITEM_TYPE_ANY_REP)
129 svn_fs_fs__rep_header_t *header = item;
131 description = " (txdelta window)";
132 else if (header->type == svn_fs_fs__rep_plain)
133 description = " PLAIN";
134 else if (header->type == svn_fs_fs__rep_self_delta)
135 description = " DELTA";
137 description = apr_psprintf(scratch_pool,
138 " DELTA against %ld/%" APR_UINT64_T_FMT,
139 header->base_revision,
140 header->base_item_index);
142 else if (item_type == SVN_FS_FS__ITEM_TYPE_CHANGES && item != NULL)
144 apr_array_header_t *changes = item;
145 switch (changes->nelts)
147 case 0: description = " no change";
149 case 1: description = " 1 change";
151 default: description = apr_psprintf(scratch_pool, " %d changes",
156 /* some info is only available in format7 repos */
157 if (svn_fs_fs__use_log_addressing(fs))
159 /* reverse index lookup: get item description in ENTRY */
160 SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, revision,
161 offset, scratch_pool));
165 end_offset = offset + entry->size;
166 type = types[entry->type];
170 printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT" %s\n",
171 pack, (long)(offset / ffd->block_size),
172 (long)(offset % ffd->block_size),
173 (long)(end_offset / ffd->block_size),
174 (long)(end_offset % ffd->block_size),
175 type, revision, item_index, description);
179 /* reduced logging for format 6 and earlier */
180 printf("%5s%10" APR_UINT64_T_HEX_FMT " %s %7ld %7" APR_UINT64_T_FMT \
182 pack, (apr_uint64_t)(offset), type, revision, item_index,
191 /* Convenience wrapper around svn_io_file_aligned_seek, taking filesystem
192 FS instead of a block size. */
194 aligned_seek(svn_fs_t *fs,
196 apr_off_t *buffer_start,
200 fs_fs_data_t *ffd = fs->fsap_data;
201 return svn_error_trace(svn_io_file_aligned_seek(file, ffd->block_size,
202 buffer_start, offset,
206 /* Open the revision file for revision REV in filesystem FS and store
207 the newly opened file in FILE. Seek to location OFFSET before
208 returning. Perform temporary allocations in POOL. */
210 open_and_seek_revision(svn_fs_fs__revision_file_t **file,
216 svn_fs_fs__revision_file_t *rev_file;
217 apr_off_t offset = -1;
219 SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, pool));
221 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, pool, pool));
222 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rev, NULL, item,
225 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
232 /* Open the representation REP for a node-revision in filesystem FS, seek
233 to its position and store the newly opened file in FILE. Perform
234 temporary allocations in POOL. */
236 open_and_seek_transaction(svn_fs_fs__revision_file_t **file,
238 representation_t *rep,
243 SVN_ERR(svn_fs_fs__open_proto_rev_file(file, fs, &rep->txn_id, pool, pool));
245 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, NULL, SVN_INVALID_REVNUM,
246 &rep->txn_id, rep->item_index, pool));
247 SVN_ERR(aligned_seek(fs, (*file)->file, NULL, offset, pool));
252 /* Given a node-id ID, and a representation REP in filesystem FS, open
253 the correct file and seek to the correction location. Store this
254 file in *FILE_P. Perform any allocations in POOL. */
256 open_and_seek_representation(svn_fs_fs__revision_file_t **file_p,
258 representation_t *rep,
261 if (! svn_fs_fs__id_txn_used(&rep->txn_id))
262 return open_and_seek_revision(file_p, fs, rep->revision, rep->item_index,
265 return open_and_seek_transaction(file_p, fs, rep, pool);
271 err_dangling_id(svn_fs_t *fs, const svn_fs_id_t *id)
273 svn_string_t *id_str = svn_fs_fs__id_unparse(id, fs->pool);
274 return svn_error_createf
275 (SVN_ERR_FS_ID_NOT_FOUND, 0,
276 _("Reference to non-existent node '%s' in filesystem '%s'"),
277 id_str->data, fs->path);
280 /* Return TRUE, if FS is of a format that supports block-read and the
281 feature has been enabled. */
283 use_block_read(svn_fs_t *fs)
285 fs_fs_data_t *ffd = fs->fsap_data;
286 return svn_fs_fs__use_log_addressing(fs) && ffd->use_block_read;
289 /* Get the node-revision for the node ID in FS.
290 Set *NODEREV_P to the new node-revision structure, allocated in POOL.
291 See svn_fs_fs__get_node_revision, which wraps this and adds another
294 get_node_revision_body(node_revision_t **noderev_p,
296 const svn_fs_id_t *id,
297 apr_pool_t *result_pool,
298 apr_pool_t *scratch_pool)
301 svn_boolean_t is_cached = FALSE;
302 fs_fs_data_t *ffd = fs->fsap_data;
304 if (svn_fs_fs__id_is_txn(id))
308 /* This is a transaction node-rev. Its storage logic is very
309 different from that of rev / pack files. */
310 err = svn_io_file_open(&file,
311 svn_fs_fs__path_txn_node_rev(fs, id,
313 APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
317 if (APR_STATUS_IS_ENOENT(err->apr_err))
319 svn_error_clear(err);
320 return svn_error_trace(err_dangling_id(fs, id));
323 return svn_error_trace(err);
326 SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
327 svn_stream_from_aprfile2(file,
330 result_pool, scratch_pool));
334 svn_fs_fs__revision_file_t *revision_file;
336 /* noderevs in rev / pack files can be cached */
337 const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
338 pair_cache_key_t key = { 0 };
339 key.revision = rev_item->revision;
340 key.second = rev_item->number;
342 /* Not found or not applicable. Try a noderev cache lookup.
343 * If that succeeds, we are done here. */
344 if (ffd->node_revision_cache)
346 SVN_ERR(svn_cache__get((void **) noderev_p,
348 ffd->node_revision_cache,
355 /* read the data from disk */
356 SVN_ERR(open_and_seek_revision(&revision_file, fs,
361 if (use_block_read(fs))
363 /* block-read will parse the whole block and will also return
364 the one noderev that we need right now. */
365 SVN_ERR(block_read((void **)noderev_p, fs,
374 /* physical addressing mode reading, parsing and caching */
375 SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
376 revision_file->stream,
380 /* Workaround issue #4031: is-fresh-txn-root in revision files. */
381 (*noderev_p)->is_fresh_txn_root = FALSE;
383 /* The noderev is not in cache, yet. Add it, if caching has been enabled. */
384 if (ffd->node_revision_cache)
385 SVN_ERR(svn_cache__set(ffd->node_revision_cache,
391 SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
398 svn_fs_fs__get_node_revision(node_revision_t **noderev_p,
400 const svn_fs_id_t *id,
401 apr_pool_t *result_pool,
402 apr_pool_t *scratch_pool)
404 const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
406 svn_error_t *err = get_node_revision_body(noderev_p, fs, id,
407 result_pool, scratch_pool);
408 if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
410 svn_string_t *id_string = svn_fs_fs__id_unparse(id, scratch_pool);
411 return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
412 "Corrupt node-revision '%s'",
416 SVN_ERR(dbg_log_access(fs,
420 SVN_FS_FS__ITEM_TYPE_NODEREV,
423 return svn_error_trace(err);
427 /* Given a revision file REV_FILE, opened to REV in FS, find the Node-ID
428 of the header located at OFFSET and store it in *ID_P. Allocate
429 temporary variables from POOL. */
431 get_fs_id_at_offset(svn_fs_id_t **id_p,
432 svn_fs_fs__revision_file_t *rev_file,
438 node_revision_t *noderev;
440 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
441 SVN_ERR(svn_fs_fs__read_noderev(&noderev,
445 /* noderev->id is const, get rid of that */
446 *id_p = svn_fs_fs__id_copy(noderev->id, pool);
448 /* assert that the txn_id is REV
449 * (asserting on offset would be harder because we the rev_offset is not
451 assert(svn_fs_fs__id_rev(*id_p) == rev);
457 /* Given an open revision file REV_FILE in FS for REV, locate the trailer that
458 specifies the offset to the root node-id and to the changed path
459 information. Store the root node offset in *ROOT_OFFSET and the
460 changed path offset in *CHANGES_OFFSET. If either of these
461 pointers is NULL, do nothing with it.
463 Allocate temporary variables from POOL. */
465 get_root_changes_offset(apr_off_t *root_offset,
466 apr_off_t *changes_offset,
467 svn_fs_fs__revision_file_t *rev_file,
472 fs_fs_data_t *ffd = fs->fsap_data;
473 apr_off_t rev_offset;
474 apr_seek_where_t seek_relative;
475 svn_stringbuf_t *trailer;
481 /* Determine where to seek to in the file.
483 If we've got a pack file, we want to seek to the end of the desired
484 revision. But we don't track that, so we seek to the beginning of the
487 Unless the next revision is in a different file, in which case, we can
488 just seek to the end of the pack file -- just like we do in the
490 if (rev_file->is_packed && ((rev + 1) % ffd->max_files_per_dir != 0))
492 SVN_ERR(svn_fs_fs__get_packed_offset(&end, fs, rev + 1, pool));
493 seek_relative = APR_SET;
497 seek_relative = APR_END;
501 /* Offset of the revision from the start of the pack file, if applicable. */
502 if (rev_file->is_packed)
503 SVN_ERR(svn_fs_fs__get_packed_offset(&rev_offset, fs, rev, pool));
507 /* We will assume that the last line containing the two offsets
508 will never be longer than 64 characters. */
509 SVN_ERR(svn_io_file_seek(rev_file->file, seek_relative, &end, pool));
511 if (end < sizeof(buffer))
513 len = (apr_size_t)end;
518 len = sizeof(buffer);
519 start = end - sizeof(buffer);
522 /* Read in this last block, from which we will identify the last line. */
523 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, start, pool));
524 SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, len, NULL, NULL,
527 /* Parse the last line. */
528 trailer = svn_stringbuf_ncreate(buffer, len, pool);
529 SVN_ERR(svn_fs_fs__parse_revision_trailer(root_offset,
534 /* return absolute offsets */
536 *root_offset += rev_offset;
538 *changes_offset += rev_offset;
544 svn_fs_fs__rev_get_root(svn_fs_id_t **root_id_p,
547 apr_pool_t *result_pool,
548 apr_pool_t *scratch_pool)
550 fs_fs_data_t *ffd = fs->fsap_data;
551 SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool));
553 if (svn_fs_fs__use_log_addressing(fs))
555 *root_id_p = svn_fs_fs__id_create_root(rev, result_pool);
559 svn_fs_fs__revision_file_t *revision_file;
560 apr_off_t root_offset;
561 svn_fs_id_t *root_id = NULL;
562 svn_boolean_t is_cached;
564 SVN_ERR(svn_cache__get((void **) root_id_p, &is_cached,
565 ffd->rev_root_id_cache, &rev, result_pool));
569 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev,
570 scratch_pool, scratch_pool));
571 SVN_ERR(get_root_changes_offset(&root_offset, NULL,
572 revision_file, fs, rev,
575 SVN_ERR(get_fs_id_at_offset(&root_id, revision_file, fs, rev,
576 root_offset, result_pool));
578 SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
580 SVN_ERR(svn_cache__set(ffd->rev_root_id_cache, &rev, root_id,
583 *root_id_p = root_id;
589 /* Describes a lazily opened rev / pack file. Instances will be shared
590 between multiple instances of rep_state_t. */
591 typedef struct shared_file_t
593 /* The opened file. NULL while file is not open, yet. */
594 svn_fs_fs__revision_file_t *rfile;
596 /* file system to open the file in */
599 /* a revision contained in the FILE. Since this file may be shared,
600 that value may be different from REP_STATE_T->REVISION. */
601 svn_revnum_t revision;
603 /* pool to use when creating the FILE. This guarantees that the file
604 remains open / valid beyond the respective local context that required
605 the file to be opened eventually. */
609 /* Represents where in the current svndiff data block each
610 representation is. */
611 typedef struct rep_state_t
613 /* shared lazy-open rev/pack file structure */
614 shared_file_t *sfile;
615 /* The txdelta window cache to use or NULL. */
616 svn_cache__t *raw_window_cache;
617 /* Caches raw (unparsed) windows. May be NULL. */
618 svn_cache__t *window_cache;
619 /* Caches un-deltified windows. May be NULL. */
620 svn_cache__t *combined_cache;
621 /* revision containing the representation */
622 svn_revnum_t revision;
623 /* representation's item index in REVISION */
624 apr_uint64_t item_index;
625 /* length of the header at the start of the rep.
626 0 iff this is rep is stored in a container
627 (i.e. does not have a header) */
628 apr_size_t header_size;
629 apr_off_t start; /* The starting offset for the raw
630 svndiff/plaintext data minus header.
631 -1 if the offset is yet unknown. */
632 apr_off_t current;/* The current offset relative to START. */
633 apr_off_t size; /* The on-disk size of the representation. */
634 int ver; /* If a delta, what svndiff version?
635 -1 for unknown delta version. */
636 int chunk_index; /* number of the window to read */
639 /* Simple wrapper around svn_fs_fs__get_file_offset to simplify callers. */
641 get_file_offset(apr_off_t *offset,
645 return svn_error_trace(svn_fs_fs__get_file_offset(offset,
646 rs->sfile->rfile->file,
650 /* Simple wrapper around svn_io_file_aligned_seek to simplify callers. */
652 rs_aligned_seek(rep_state_t *rs,
653 apr_off_t *buffer_start,
657 fs_fs_data_t *ffd = rs->sfile->fs->fsap_data;
658 return svn_error_trace(svn_io_file_aligned_seek(rs->sfile->rfile->file,
660 buffer_start, offset,
664 /* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */
666 auto_open_shared_file(shared_file_t *file)
668 if (file->rfile == NULL)
669 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&file->rfile, file->fs,
670 file->revision, file->pool,
676 /* Set RS->START to the begin of the representation raw in RS->FILE->FILE,
677 if that hasn't been done yet. Use POOL for temporary allocations. */
679 auto_set_start_offset(rep_state_t *rs, apr_pool_t *pool)
683 SVN_ERR(svn_fs_fs__item_offset(&rs->start, rs->sfile->fs,
684 rs->sfile->rfile, rs->revision, NULL,
685 rs->item_index, pool));
686 rs->start += rs->header_size;
692 /* Set RS->VER depending on what is found in the already open RS->FILE->FILE
693 if the diff version is still unknown. Use POOL for temporary allocations.
696 auto_read_diff_version(rep_state_t *rs, apr_pool_t *pool)
701 SVN_ERR(rs_aligned_seek(rs, NULL, rs->start, pool));
702 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
703 sizeof(buf), NULL, NULL, pool));
705 /* ### Layering violation */
706 if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N')))
707 return svn_error_create
708 (SVN_ERR_FS_CORRUPT, NULL,
709 _("Malformed svndiff data in representation"));
719 /* See create_rep_state, which wraps this and adds another error. */
721 create_rep_state_body(rep_state_t **rep_state,
722 svn_fs_fs__rep_header_t **rep_header,
723 shared_file_t **shared_file,
724 representation_t *rep,
726 apr_pool_t *result_pool,
727 apr_pool_t *scratch_pool)
729 fs_fs_data_t *ffd = fs->fsap_data;
730 rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs));
731 svn_fs_fs__rep_header_t *rh;
732 svn_boolean_t is_cached = FALSE;
733 apr_uint64_t estimated_window_storage;
737 * - refers to a valid revision,
738 * - refers to a packed revision,
739 * - as does the rep we want to read, and
740 * - refers to the same pack file as the rep
741 * we can re-use the same, already open file object
743 svn_boolean_t reuse_shared_file
744 = shared_file && *shared_file && (*shared_file)->rfile
745 && SVN_IS_VALID_REVNUM((*shared_file)->revision)
746 && (*shared_file)->revision < ffd->min_unpacked_rev
747 && rep->revision < ffd->min_unpacked_rev
748 && ( ((*shared_file)->revision / ffd->max_files_per_dir)
749 == (rep->revision / ffd->max_files_per_dir));
751 pair_cache_key_t key;
752 key.revision = rep->revision;
753 key.second = rep->item_index;
755 /* continue constructing RS and RA */
756 rs->size = rep->size;
757 rs->revision = rep->revision;
758 rs->item_index = rep->item_index;
759 rs->raw_window_cache = ffd->raw_window_cache;
763 /* Very long files stored as self-delta will produce a huge number of
764 delta windows. Don't cache them lest we don't thrash the cache.
765 Since we don't know the depth of the delta chain, let's assume, the
766 whole contents get rewritten 3 times.
768 estimated_window_storage
769 = 4 * ( (rep->expanded_size ? rep->expanded_size : rep->size)
770 + SVN_DELTA_WINDOW_SIZE);
771 estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX);
773 rs->window_cache = ffd->txdelta_window_cache
774 && svn_cache__is_cachable(ffd->txdelta_window_cache,
775 (apr_size_t)estimated_window_storage)
776 ? ffd->txdelta_window_cache
778 rs->combined_cache = ffd->combined_window_cache
779 && svn_cache__is_cachable(ffd->combined_window_cache,
780 (apr_size_t)estimated_window_storage)
781 ? ffd->combined_window_cache
784 /* cache lookup, i.e. skip reading the rep header if possible */
785 if (ffd->rep_header_cache && !svn_fs_fs__id_txn_used(&rep->txn_id))
786 SVN_ERR(svn_cache__get((void **) &rh, &is_cached,
787 ffd->rep_header_cache, &key, result_pool));
789 /* initialize the (shared) FILE member in RS */
790 if (reuse_shared_file)
792 rs->sfile = *shared_file;
796 shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file));
797 file->revision = rep->revision;
798 file->pool = result_pool;
802 /* remember the current file, if suggested by the caller */
807 /* read rep header, if necessary */
810 /* ensure file is open and navigate to the start of rep header */
811 if (reuse_shared_file)
815 /* ... we can re-use the same, already open file object.
816 * This implies that we don't read from a txn.
818 rs->sfile = *shared_file;
819 SVN_ERR(auto_open_shared_file(rs->sfile));
820 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rs->sfile->rfile,
821 rep->revision, NULL, rep->item_index,
823 SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
827 /* otherwise, create a new file object. May or may not be
830 SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep,
834 SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
835 result_pool, scratch_pool));
836 SVN_ERR(get_file_offset(&rs->start, rs, result_pool));
838 /* populate the cache if appropriate */
839 if (! svn_fs_fs__id_txn_used(&rep->txn_id))
841 if (use_block_read(fs))
842 SVN_ERR(block_read(NULL, fs, rep->revision, rep->item_index,
843 rs->sfile->rfile, result_pool, scratch_pool));
845 if (ffd->rep_header_cache)
846 SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh,
852 SVN_ERR(dbg_log_access(fs, rep->revision, rep->item_index, rh,
853 SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
855 rs->header_size = rh->header_size;
859 if (rh->type == svn_fs_fs__rep_plain)
860 /* This is a plaintext, so just return the current rep_state. */
863 /* skip "SVNx" diff marker */
869 /* Read the rep args for REP in filesystem FS and create a rep_state
870 for reading the representation. Return the rep_state in *REP_STATE
871 and the rep header in *REP_HEADER, both allocated in POOL.
873 When reading multiple reps, i.e. a skip delta chain, you may provide
874 non-NULL SHARED_FILE. (If SHARED_FILE is not NULL, in the first
875 call it should be a pointer to NULL.) The function will use this
876 variable to store the previous call results and tries to re-use it.
877 This may result in significant savings in I/O for packed files and
878 number of open file handles.
881 create_rep_state(rep_state_t **rep_state,
882 svn_fs_fs__rep_header_t **rep_header,
883 shared_file_t **shared_file,
884 representation_t *rep,
886 apr_pool_t *result_pool,
887 apr_pool_t *scratch_pool)
889 svn_error_t *err = create_rep_state_body(rep_state, rep_header,
890 shared_file, rep, fs,
891 result_pool, scratch_pool);
892 if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
894 fs_fs_data_t *ffd = fs->fsap_data;
897 /* ### This always returns "-1" for transaction reps, because
898 ### this particular bit of code doesn't know if the rep is
899 ### stored in the protorev or in the mutable area (for props
900 ### or dir contents). It is pretty rare for FSFS to *read*
901 ### from the protorev file, though, so this is probably OK.
902 ### And anyone going to debug corruption errors is probably
903 ### going to jump straight to this comment anyway! */
905 ? svn_fs_fs__unparse_representation
906 (rep, ffd->format, TRUE, scratch_pool, scratch_pool)->data
909 return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
910 "Corrupt representation '%s'",
913 /* ### Call representation_string() ? */
914 return svn_error_trace(err);
918 svn_fs_fs__check_rep(representation_t *rep,
921 apr_pool_t *scratch_pool)
923 if (svn_fs_fs__use_log_addressing(fs))
926 svn_fs_fs__p2l_entry_t *entry;
927 svn_fs_fs__revision_file_t *rev_file = NULL;
929 /* Reuse the revision file provided by *HINT, if it is given and
930 * actually the rev / pack file that we want. */
931 svn_revnum_t start_rev = svn_fs_fs__packed_base_rev(fs, rep->revision);
933 rev_file = *(svn_fs_fs__revision_file_t **)hint;
935 if (rev_file == NULL || rev_file->start_revision != start_rev)
936 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rep->revision,
937 scratch_pool, scratch_pool));
942 /* This will auto-retry if there was a background pack. */
943 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rep->revision,
944 NULL, rep->item_index, scratch_pool));
946 /* This may fail if there is a background pack operation (can't auto-
947 retry because the item offset lookup has to be redone as well). */
948 SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file,
949 rep->revision, offset,
950 scratch_pool, scratch_pool));
953 || entry->type < SVN_FS_FS__ITEM_TYPE_FILE_REP
954 || entry->type > SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
955 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
956 _("No representation found at offset %s "
957 "for item %s in revision %ld"),
958 apr_off_t_toa(scratch_pool, offset),
959 apr_psprintf(scratch_pool,
960 "%" APR_UINT64_T_FMT,
967 svn_fs_fs__rep_header_t *rep_header;
969 /* ### Should this be using read_rep_line() directly? */
970 SVN_ERR(create_rep_state(&rs, &rep_header, (shared_file_t**)hint,
971 rep, fs, scratch_pool, scratch_pool));
978 svn_fs_fs__rep_chain_length(int *chain_length,
980 representation_t *rep,
982 apr_pool_t *scratch_pool)
984 fs_fs_data_t *ffd = fs->fsap_data;
985 svn_revnum_t shard_size = ffd->max_files_per_dir
986 ? ffd->max_files_per_dir
988 apr_pool_t *subpool = svn_pool_create(scratch_pool);
989 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
990 svn_boolean_t is_delta = FALSE;
993 svn_revnum_t last_shard = rep->revision / shard_size;
995 /* Check whether the length of the deltification chain is acceptable.
996 * Otherwise, shared reps may form a non-skipping delta chain in
998 representation_t base_rep = *rep;
1000 /* re-use open files between iterations */
1001 shared_file_t *file_hint = NULL;
1003 svn_fs_fs__rep_header_t *header;
1005 /* follow the delta chain towards the end but for at most
1006 * MAX_CHAIN_LENGTH steps. */
1009 rep_state_t *rep_state;
1011 svn_pool_clear(iterpool);
1013 if (base_rep.revision / shard_size != last_shard)
1015 last_shard = base_rep.revision / shard_size;
1019 SVN_ERR(create_rep_state_body(&rep_state,
1027 base_rep.revision = header->base_revision;
1028 base_rep.item_index = header->base_item_index;
1029 base_rep.size = header->base_length;
1030 svn_fs_fs__id_txn_reset(&base_rep.txn_id);
1031 is_delta = header->type == svn_fs_fs__rep_delta;
1033 /* Clear it the SUBPOOL once in a while. Doing it too frequently
1034 * renders the FILE_HINT ineffective. Doing too infrequently, may
1035 * leave us with too many open file handles.
1037 * Note that this is mostly about efficiency, with larger values
1038 * being more efficient, and any non-zero value is legal here. When
1039 * reading deltified contents, we may keep 10s of rev files open at
1040 * the same time and the system has to cope with that. Thus, the
1041 * limit of 16 chosen below is in the same ballpark.
1044 if (count % 16 == 0)
1047 svn_pool_clear(subpool);
1050 while (is_delta && base_rep.revision);
1052 *chain_length = count;
1053 *shard_count = shards;
1054 svn_pool_destroy(subpool);
1055 svn_pool_destroy(iterpool);
1057 return SVN_NO_ERROR;
1060 struct rep_read_baton
1062 /* The FS from which we're reading. */
1065 /* Representation to read. */
1066 representation_t rep;
1068 /* If not NULL, this is the base for the first delta window in rs_list */
1069 svn_stringbuf_t *base_window;
1071 /* The state of all prior delta representations. */
1072 apr_array_header_t *rs_list;
1074 /* The plaintext state, if there is a plaintext. */
1075 rep_state_t *src_state;
1077 /* The index of the current delta chunk, if we are reading a delta. */
1080 /* The buffer where we store undeltified data. */
1085 /* A checksum context for summing the data read in order to verify it.
1086 Note: we don't need to use the sha1 checksum because we're only doing
1087 data verification, for which md5 is perfectly safe. */
1088 svn_checksum_ctx_t *md5_checksum_ctx;
1090 svn_boolean_t checksum_finalized;
1092 /* The stored checksum of the representation we are reading, its
1093 length, and the amount we've read so far. Some of this
1094 information is redundant with rs_list and src_state, but it's
1095 convenient for the checksumming code to have it here. */
1096 unsigned char md5_digest[APR_MD5_DIGESTSIZE];
1101 /* The key for the fulltext cache for this rep, if there is a
1103 pair_cache_key_t fulltext_cache_key;
1104 /* The text we've been reading, if we're going to cache it. */
1105 svn_stringbuf_t *current_fulltext;
1107 /* If not NULL, attempt to read the data from this cache.
1108 Once that lookup fails, reset it to NULL. */
1109 svn_cache__t *fulltext_cache;
1111 /* Bytes delivered from the FULLTEXT_CACHE so far. If the next
1112 lookup fails, we need to skip that much data from the reconstructed
1113 window stream before we continue normal operation. */
1114 svn_filesize_t fulltext_delivered;
1116 /* Used for temporary allocations during the read. */
1119 /* Pool used to store file handles and other data that is persistant
1120 for the entire stream read. */
1121 apr_pool_t *filehandle_pool;
1124 /* Set window key in *KEY to address the window described by RS.
1125 For convenience, return the KEY. */
1126 static window_cache_key_t *
1127 get_window_key(window_cache_key_t *key, rep_state_t *rs)
1129 assert(rs->revision <= APR_UINT32_MAX);
1130 key->revision = (apr_uint32_t)rs->revision;
1131 key->item_index = rs->item_index;
1132 key->chunk_index = rs->chunk_index;
1137 /* Implement svn_cache__partial_getter_func_t for raw txdelta windows.
1138 * Parse the raw data and return a svn_fs_fs__txdelta_cached_window_t.
1140 static svn_error_t *
1141 parse_raw_window(void **out,
1143 apr_size_t data_len,
1145 apr_pool_t *result_pool)
1147 svn_string_t raw_window;
1148 svn_stream_t *stream;
1150 /* unparsed and parsed window */
1151 const svn_fs_fs__raw_cached_window_t *window
1152 = (const svn_fs_fs__raw_cached_window_t *)data;
1153 svn_fs_fs__txdelta_cached_window_t *result
1154 = apr_pcalloc(result_pool, sizeof(*result));
1156 /* create a read stream taking the raw window as input */
1157 raw_window.data = svn_temp_deserializer__ptr(window,
1158 (const void * const *)&window->window.data);
1159 raw_window.len = window->window.len;
1160 stream = svn_stream_from_string(&raw_window, result_pool);
1163 SVN_ERR(svn_txdelta_read_svndiff_window(&result->window, stream, 1,
1166 /* complete the window and return it */
1167 result->end_offset = window->end_offset;
1170 return SVN_NO_ERROR;
1174 /* Read the WINDOW_P number CHUNK_INDEX for the representation given in
1175 * rep state RS from the current FSFS session's cache. This will be a
1176 * no-op and IS_CACHED will be set to FALSE if no cache has been given.
1177 * If a cache is available IS_CACHED will inform the caller about the
1178 * success of the lookup. Allocations of the window in will be made
1179 * from RESULT_POOL. Use SCRATCH_POOL for temporary allocations.
1181 * If the information could be found, put RS to CHUNK_INDEX.
1183 static svn_error_t *
1184 get_cached_window(svn_txdelta_window_t **window_p,
1187 svn_boolean_t *is_cached,
1188 apr_pool_t *result_pool,
1189 apr_pool_t *scratch_pool)
1191 if (! rs->window_cache)
1193 /* txdelta window has not been enabled */
1198 /* ask the cache for the desired txdelta window */
1199 svn_fs_fs__txdelta_cached_window_t *cached_window;
1200 window_cache_key_t key = { 0 };
1201 get_window_key(&key, rs);
1202 key.chunk_index = chunk_index;
1203 SVN_ERR(svn_cache__get((void **) &cached_window,
1209 /* If we did not find a parsed txdelta window, we might have a raw
1210 version of it in our cache. If so, read, parse and re-cache it. */
1211 if (!*is_cached && rs->raw_window_cache)
1213 SVN_ERR(svn_cache__get_partial((void **) &cached_window, is_cached,
1214 rs->raw_window_cache, &key,
1215 parse_raw_window, NULL, result_pool));
1217 SVN_ERR(svn_cache__set(rs->window_cache, &key, cached_window,
1221 /* Return cached information. */
1224 /* found it. Pass it back to the caller. */
1225 *window_p = cached_window->window;
1227 /* manipulate the RS as if we just read the data */
1228 rs->current = cached_window->end_offset;
1229 rs->chunk_index = chunk_index;
1233 return SVN_NO_ERROR;
1236 /* Store the WINDOW read for the rep state RS in the current FSFS
1237 * session's cache. This will be a no-op if no cache has been given.
1238 * Temporary allocations will be made from SCRATCH_POOL. */
1239 static svn_error_t *
1240 set_cached_window(svn_txdelta_window_t *window,
1242 apr_pool_t *scratch_pool)
1244 if (rs->window_cache)
1246 /* store the window and the first offset _past_ it */
1247 svn_fs_fs__txdelta_cached_window_t cached_window;
1248 window_cache_key_t key = {0};
1250 cached_window.window = window;
1251 cached_window.end_offset = rs->current;
1253 /* but key it with the start offset because that is the known state
1254 * when we will look it up */
1255 SVN_ERR(svn_cache__set(rs->window_cache,
1256 get_window_key(&key, rs),
1261 return SVN_NO_ERROR;
1264 /* Read the WINDOW_P for the rep state RS from the current FSFS session's
1265 * cache. This will be a no-op and IS_CACHED will be set to FALSE if no
1266 * cache has been given. If a cache is available IS_CACHED will inform
1267 * the caller about the success of the lookup. Allocations (of the window
1268 * in particular) will be made from POOL.
1270 static svn_error_t *
1271 get_cached_combined_window(svn_stringbuf_t **window_p,
1273 svn_boolean_t *is_cached,
1276 if (! rs->combined_cache)
1278 /* txdelta window has not been enabled */
1283 /* ask the cache for the desired txdelta window */
1284 window_cache_key_t key = { 0 };
1285 return svn_cache__get((void **)window_p,
1288 get_window_key(&key, rs),
1292 return SVN_NO_ERROR;
1295 /* Store the WINDOW read for the rep state RS in the current FSFS session's
1296 * cache. This will be a no-op if no cache has been given.
1297 * Temporary allocations will be made from SCRATCH_POOL. */
1298 static svn_error_t *
1299 set_cached_combined_window(svn_stringbuf_t *window,
1301 apr_pool_t *scratch_pool)
1303 if (rs->combined_cache)
1305 /* but key it with the start offset because that is the known state
1306 * when we will look it up */
1307 window_cache_key_t key = { 0 };
1308 return svn_cache__set(rs->combined_cache,
1309 get_window_key(&key, rs),
1314 return SVN_NO_ERROR;
1317 /* Build an array of rep_state structures in *LIST giving the delta
1318 reps from first_rep to a plain-text or self-compressed rep. Set
1319 *SRC_STATE to the plain-text rep we find at the end of the chain,
1320 or to NULL if the final delta representation is self-compressed.
1321 The representation to start from is designated by filesystem FS, id
1322 ID, and representation REP.
1323 Also, set *WINDOW_P to the base window content for *LIST, if it
1324 could be found in cache. Otherwise, *LIST will contain the base
1325 representation for the whole delta chain.
1326 Finally, return the expanded size of the representation in
1327 *EXPANDED_SIZE. It will take care of cases where only the on-disk
1329 static svn_error_t *
1330 build_rep_list(apr_array_header_t **list,
1331 svn_stringbuf_t **window_p,
1332 rep_state_t **src_state,
1333 svn_filesize_t *expanded_size,
1335 representation_t *first_rep,
1338 representation_t rep;
1339 rep_state_t *rs = NULL;
1340 svn_fs_fs__rep_header_t *rep_header;
1341 svn_boolean_t is_cached = FALSE;
1342 shared_file_t *shared_file = NULL;
1343 apr_pool_t *iterpool = svn_pool_create(pool);
1345 *list = apr_array_make(pool, 1, sizeof(rep_state_t *));
1348 /* The value as stored in the data struct.
1349 0 is either for unknown length or actually zero length. */
1350 *expanded_size = first_rep->expanded_size;
1352 /* for the top-level rep, we need the rep_args */
1353 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs, pool,
1356 /* Unknown size or empty representation?
1357 That implies the this being the first iteration.
1358 Usually size equals on-disk size, except for empty,
1359 compressed representations (delta, size = 4).
1360 Please note that for all non-empty deltas have
1361 a 4-byte header _plus_ some data. */
1362 if (*expanded_size == 0)
1363 if (rep_header->type == svn_fs_fs__rep_plain || first_rep->size != 4)
1364 *expanded_size = first_rep->size;
1368 svn_pool_clear(iterpool);
1370 /* fetch state, if that has not been done already */
1372 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file,
1373 &rep, fs, pool, iterpool));
1375 /* for txn reps, there won't be a cached combined window */
1376 if (!svn_fs_fs__id_txn_used(&rep.txn_id))
1377 SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached, pool));
1381 /* We already have a reconstructed window in our cache.
1382 Write a pseudo rep_state with the full length. */
1385 rs->size = (*window_p)->len;
1390 if (rep_header->type == svn_fs_fs__rep_plain)
1392 /* This is a plaintext, so just return the current rep_state. */
1397 /* Push this rep onto the list. If it's self-compressed, we're done. */
1398 APR_ARRAY_PUSH(*list, rep_state_t *) = rs;
1399 if (rep_header->type == svn_fs_fs__rep_self_delta)
1405 rep.revision = rep_header->base_revision;
1406 rep.item_index = rep_header->base_item_index;
1407 rep.size = rep_header->base_length;
1408 svn_fs_fs__id_txn_reset(&rep.txn_id);
1412 svn_pool_destroy(iterpool);
1414 return SVN_NO_ERROR;
1418 /* Create a rep_read_baton structure for node revision NODEREV in
1419 filesystem FS and store it in *RB_P. Perform all allocations in
1420 POOL. If rep is mutable, it must be for file contents. */
1421 static svn_error_t *
1422 rep_read_get_baton(struct rep_read_baton **rb_p,
1424 representation_t *rep,
1425 pair_cache_key_t fulltext_cache_key,
1428 struct rep_read_baton *b;
1430 b = apr_pcalloc(pool, sizeof(*b));
1433 b->base_window = NULL;
1436 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
1437 b->checksum_finalized = FALSE;
1438 memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest));
1439 b->len = rep->expanded_size;
1441 b->fulltext_cache_key = fulltext_cache_key;
1442 b->pool = svn_pool_create(pool);
1443 b->filehandle_pool = svn_pool_create(pool);
1444 b->fulltext_cache = NULL;
1445 b->fulltext_delivered = 0;
1446 b->current_fulltext = NULL;
1448 /* Save our output baton. */
1451 return SVN_NO_ERROR;
1454 /* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
1455 window into *NWIN. Note that RS->CHUNK_INDEX will be THIS_CHUNK rather
1456 than THIS_CHUNK + 1 when this function returns. */
1457 static svn_error_t *
1458 read_delta_window(svn_txdelta_window_t **nwin, int this_chunk,
1459 rep_state_t *rs, apr_pool_t *result_pool,
1460 apr_pool_t *scratch_pool)
1462 svn_boolean_t is_cached;
1463 apr_off_t start_offset;
1464 apr_off_t end_offset;
1465 apr_pool_t *iterpool;
1467 SVN_ERR_ASSERT(rs->chunk_index <= this_chunk);
1469 SVN_ERR(dbg_log_access(rs->sfile->fs, rs->revision, rs->item_index,
1470 NULL, SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
1472 /* Read the next window. But first, try to find it in the cache. */
1473 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1474 result_pool, scratch_pool));
1476 return SVN_NO_ERROR;
1478 /* someone has to actually read the data from file. Open it */
1479 SVN_ERR(auto_open_shared_file(rs->sfile));
1481 /* invoke the 'block-read' feature for non-txn data.
1482 However, don't do that if we are in the middle of some representation,
1483 because the block is unlikely to contain other data. */
1484 if ( rs->chunk_index == 0
1485 && SVN_IS_VALID_REVNUM(rs->revision)
1486 && use_block_read(rs->sfile->fs)
1487 && rs->raw_window_cache)
1489 SVN_ERR(block_read(NULL, rs->sfile->fs, rs->revision, rs->item_index,
1490 rs->sfile->rfile, result_pool, scratch_pool));
1492 /* reading the whole block probably also provided us with the
1493 desired txdelta window */
1494 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1495 result_pool, scratch_pool));
1497 return SVN_NO_ERROR;
1500 /* data is still not cached -> we need to read it.
1501 Make sure we have all the necessary info. */
1502 SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1503 SVN_ERR(auto_read_diff_version(rs, scratch_pool));
1505 /* RS->FILE may be shared between RS instances -> make sure we point
1506 * to the right data. */
1507 start_offset = rs->start + rs->current;
1508 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, scratch_pool));
1510 /* Skip windows to reach the current chunk if we aren't there yet. */
1511 iterpool = svn_pool_create(scratch_pool);
1512 while (rs->chunk_index < this_chunk)
1514 svn_pool_clear(iterpool);
1515 SVN_ERR(svn_txdelta_skip_svndiff_window(rs->sfile->rfile->file,
1516 rs->ver, iterpool));
1518 SVN_ERR(get_file_offset(&start_offset, rs, iterpool));
1519 rs->current = start_offset - rs->start;
1520 if (rs->current >= rs->size)
1521 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1522 _("Reading one svndiff window read "
1523 "beyond the end of the "
1526 svn_pool_destroy(iterpool);
1528 /* Actually read the next window. */
1529 SVN_ERR(svn_txdelta_read_svndiff_window(nwin, rs->sfile->rfile->stream,
1530 rs->ver, result_pool));
1531 SVN_ERR(get_file_offset(&end_offset, rs, scratch_pool));
1532 rs->current = end_offset - rs->start;
1533 if (rs->current > rs->size)
1534 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1535 _("Reading one svndiff window read beyond "
1536 "the end of the representation"));
1538 /* the window has not been cached before, thus cache it now
1539 * (if caching is used for them at all) */
1540 if (SVN_IS_VALID_REVNUM(rs->revision))
1541 SVN_ERR(set_cached_window(*nwin, rs, scratch_pool));
1543 return SVN_NO_ERROR;
1546 /* Read SIZE bytes from the representation RS and return it in *NWIN. */
1547 static svn_error_t *
1548 read_plain_window(svn_stringbuf_t **nwin, rep_state_t *rs,
1549 apr_size_t size, apr_pool_t *result_pool,
1550 apr_pool_t *scratch_pool)
1554 /* RS->FILE may be shared between RS instances -> make sure we point
1555 * to the right data. */
1556 SVN_ERR(auto_open_shared_file(rs->sfile));
1557 SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1559 offset = rs->start + rs->current;
1560 SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
1562 /* Read the plain data. */
1563 *nwin = svn_stringbuf_create_ensure(size, result_pool);
1564 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, (*nwin)->data, size,
1565 NULL, NULL, result_pool));
1566 (*nwin)->data[size] = 0;
1569 rs->current += (apr_off_t)size;
1571 return SVN_NO_ERROR;
1574 /* Skip SIZE bytes from the PLAIN representation RS. */
1575 static svn_error_t *
1576 skip_plain_window(rep_state_t *rs,
1580 rs->current += (apr_off_t)size;
1582 return SVN_NO_ERROR;
1585 /* Get the undeltified window that is a result of combining all deltas
1586 from the current desired representation identified in *RB with its
1587 base representation. Store the window in *RESULT. */
1588 static svn_error_t *
1589 get_combined_window(svn_stringbuf_t **result,
1590 struct rep_read_baton *rb)
1592 apr_pool_t *pool, *new_pool, *window_pool;
1594 apr_array_header_t *windows;
1595 svn_stringbuf_t *source, *buf = rb->base_window;
1597 apr_pool_t *iterpool;
1599 /* Read all windows that we need to combine. This is fine because
1600 the size of each window is relatively small (100kB) and skip-
1601 delta limits the number of deltas in a chain to well under 100.
1602 Stop early if one of them does not depend on its predecessors. */
1603 window_pool = svn_pool_create(rb->pool);
1604 windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *));
1605 iterpool = svn_pool_create(rb->pool);
1606 for (i = 0; i < rb->rs_list->nelts; ++i)
1608 svn_txdelta_window_t *window;
1610 svn_pool_clear(iterpool);
1612 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1613 SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool,
1616 APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window;
1617 if (window->src_ops == 0)
1624 /* Combine in the windows from the other delta reps. */
1625 pool = svn_pool_create(rb->pool);
1626 for (--i; i >= 0; --i)
1628 svn_txdelta_window_t *window;
1630 svn_pool_clear(iterpool);
1632 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1633 window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
1635 /* Maybe, we've got a PLAIN start representation. If we do, read
1636 as much data from it as the needed for the txdelta window's source
1638 Note that BUF / SOURCE may only be NULL in the first iteration.
1639 Also note that we may have short-cut reading the delta chain --
1640 in which case SRC_OPS is 0 and it might not be a PLAIN rep. */
1642 if (source == NULL && rb->src_state != NULL)
1644 /* Even if we don't need the source rep now, we still must keep
1645 * its read offset in sync with what we might need for the next
1647 if (window->src_ops)
1648 SVN_ERR(read_plain_window(&source, rb->src_state,
1652 SVN_ERR(skip_plain_window(rb->src_state, window->sview_len));
1655 /* Combine this window with the current one. */
1656 new_pool = svn_pool_create(rb->pool);
1657 buf = svn_stringbuf_create_ensure(window->tview_len, new_pool);
1658 buf->len = window->tview_len;
1660 svn_txdelta_apply_instructions(window, source ? source->data : NULL,
1661 buf->data, &buf->len);
1662 if (buf->len != window->tview_len)
1663 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1664 _("svndiff window length is "
1667 /* Cache windows only if the whole rep content could be read as a
1668 single chunk. Only then will no other chunk need a deeper RS
1669 list than the cached chunk. */
1670 if ( (rb->chunk_index == 0) && (rs->current == rs->size)
1671 && SVN_IS_VALID_REVNUM(rs->revision))
1672 SVN_ERR(set_cached_combined_window(buf, rs, new_pool));
1676 /* Cycle pools so that we only need to hold three windows at a time. */
1677 svn_pool_destroy(pool);
1680 svn_pool_destroy(iterpool);
1682 svn_pool_destroy(window_pool);
1685 return SVN_NO_ERROR;
1688 /* Returns whether or not the expanded fulltext of the file is cachable
1689 * based on its size SIZE. The decision depends on the cache used by RB.
1691 static svn_boolean_t
1692 fulltext_size_is_cachable(fs_fs_data_t *ffd, svn_filesize_t size)
1694 return (size < APR_SIZE_MAX)
1695 && svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size);
1698 /* Close method used on streams returned by read_representation().
1700 static svn_error_t *
1701 rep_read_contents_close(void *baton)
1703 struct rep_read_baton *rb = baton;
1705 svn_pool_destroy(rb->pool);
1706 svn_pool_destroy(rb->filehandle_pool);
1708 return SVN_NO_ERROR;
1711 /* Return the next *LEN bytes of the rep from our plain / delta windows
1712 and store them in *BUF. */
1713 static svn_error_t *
1714 get_contents_from_windows(struct rep_read_baton *rb,
1718 apr_size_t copy_len, remaining = *len;
1722 /* Special case for when there are no delta reps, only a plain
1724 if (rb->rs_list->nelts == 0)
1726 copy_len = remaining;
1729 if (rb->base_window != NULL)
1731 /* We got the desired rep directly from the cache.
1732 This is where we need the pseudo rep_state created
1733 by build_rep_list(). */
1734 apr_size_t offset = (apr_size_t)rs->current;
1735 if (copy_len + offset > rb->base_window->len)
1736 copy_len = offset < rb->base_window->len
1737 ? rb->base_window->len - offset
1740 memcpy (cur, rb->base_window->data + offset, copy_len);
1745 if (((apr_off_t) copy_len) > rs->size - rs->current)
1746 copy_len = (apr_size_t) (rs->size - rs->current);
1748 SVN_ERR(auto_open_shared_file(rs->sfile));
1749 SVN_ERR(auto_set_start_offset(rs, rb->pool));
1751 offset = rs->start + rs->current;
1752 SVN_ERR(rs_aligned_seek(rs, NULL, offset, rb->pool));
1753 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, cur,
1754 copy_len, NULL, NULL, rb->pool));
1757 rs->current += copy_len;
1759 return SVN_NO_ERROR;
1762 while (remaining > 0)
1764 /* If we have buffered data from a previous chunk, use that. */
1767 /* Determine how much to copy from the buffer. */
1768 copy_len = rb->buf_len - rb->buf_pos;
1769 if (copy_len > remaining)
1770 copy_len = remaining;
1772 /* Actually copy the data. */
1773 memcpy(cur, rb->buf + rb->buf_pos, copy_len);
1774 rb->buf_pos += copy_len;
1776 remaining -= copy_len;
1778 /* If the buffer is all used up, clear it and empty the
1780 if (rb->buf_pos == rb->buf_len)
1782 svn_pool_clear(rb->pool);
1788 svn_stringbuf_t *sbuf = NULL;
1790 rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *);
1791 if (rs->current == rs->size)
1794 /* Get more buffered data by evaluating a chunk. */
1795 SVN_ERR(get_combined_window(&sbuf, rb));
1798 rb->buf_len = sbuf->len;
1799 rb->buf = sbuf->data;
1806 return SVN_NO_ERROR;
1809 /* Baton type for get_fulltext_partial. */
1810 typedef struct fulltext_baton_t
1812 /* Target buffer to write to; of at least LEN bytes. */
1815 /* Offset within the respective fulltext at which we shall start to
1816 copy data into BUFFER. */
1819 /* Number of bytes to copy. The actual amount may be less in case
1820 the fulltext is short(er). */
1823 /* Number of bytes actually copied into BUFFER. */
1827 /* Implement svn_cache__partial_getter_func_t for fulltext caches.
1828 * From the fulltext in DATA, we copy the range specified by the
1829 * fulltext_baton_t* BATON into the buffer provided by that baton.
1830 * OUT and RESULT_POOL are not used.
1832 static svn_error_t *
1833 get_fulltext_partial(void **out,
1835 apr_size_t data_len,
1837 apr_pool_t *result_pool)
1839 fulltext_baton_t *fulltext_baton = baton;
1841 /* We cached the fulltext with an NUL appended to it. */
1842 apr_size_t fulltext_len = data_len - 1;
1844 /* Clip the copy range to what the fulltext size allows. */
1845 apr_size_t start = MIN(fulltext_baton->start, fulltext_len);
1846 fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len);
1848 /* Copy the data to the output buffer and be done. */
1849 memcpy(fulltext_baton->buffer, (const char *)data + start,
1850 fulltext_baton->read);
1852 return SVN_NO_ERROR;
1855 /* Find the fulltext specified in BATON in the fulltext cache given
1856 * as well by BATON. If that succeeds, set *CACHED to TRUE and copy
1857 * up to the next *LEN bytes into BUFFER. Set *LEN to the actual
1858 * number of bytes copied.
1860 static svn_error_t *
1861 get_contents_from_fulltext(svn_boolean_t *cached,
1862 struct rep_read_baton *baton,
1867 fulltext_baton_t fulltext_baton;
1869 SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered
1870 == baton->fulltext_delivered);
1871 fulltext_baton.buffer = buffer;
1872 fulltext_baton.start = (apr_size_t)baton->fulltext_delivered;
1873 fulltext_baton.len = *len;
1874 fulltext_baton.read = 0;
1876 SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache,
1877 &baton->fulltext_cache_key,
1878 get_fulltext_partial, &fulltext_baton,
1883 baton->fulltext_delivered += fulltext_baton.read;
1884 *len = fulltext_baton.read;
1887 return SVN_NO_ERROR;
1890 /* Determine the optimal size of a string buf that shall receive a
1891 * (full-) text of NEEDED bytes.
1893 * The critical point is that those buffers may be very large and
1894 * can cause memory fragmentation. We apply simple heuristics to
1895 * make fragmentation less likely.
1898 optimimal_allocation_size(apr_size_t needed)
1900 /* For all allocations, assume some overhead that is shared between
1901 * OS memory managemnt, APR memory management and svn_stringbuf_t. */
1902 const apr_size_t overhead = 0x400;
1905 /* If an allocation size if safe for other ephemeral buffers, it should
1906 * be safe for ours. */
1907 if (needed <= SVN__STREAM_CHUNK_SIZE)
1910 /* Paranoia edge case:
1911 * Skip our heuristics if they created arithmetical overflow.
1912 * Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */
1913 if (needed >= APR_SIZE_MAX / 2 - overhead)
1916 /* As per definition SVN__STREAM_CHUNK_SIZE is a power of two.
1917 * Since we know NEEDED to be larger than that, use it as the
1920 * Heuristics: Allocate a power-of-two number of bytes that fit
1921 * NEEDED plus some OVERHEAD. The APR allocator
1922 * will round it up to the next full page size.
1924 optimal = SVN__STREAM_CHUNK_SIZE;
1925 while (optimal - overhead < needed)
1928 /* This is above or equal to NEEDED. */
1929 return optimal - overhead;
1932 /* After a fulltext cache lookup failure, we will continue to read from
1933 * combined delta or plain windows. However, we must first make that data
1934 * stream in BATON catch up tho the position LEN already delivered from the
1935 * fulltext cache. Also, we need to store the reconstructed fulltext if we
1936 * want to cache it at the end.
1938 static svn_error_t *
1939 skip_contents(struct rep_read_baton *baton,
1942 svn_error_t *err = SVN_NO_ERROR;
1944 /* Do we want to cache the reconstructed fulltext? */
1945 if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision))
1948 svn_filesize_t to_alloc = MAX(len, baton->len);
1950 /* This should only be happening if BATON->LEN and LEN are
1951 * cacheable, implying they fit into memory. */
1952 SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc);
1954 /* Allocate the fulltext buffer. */
1955 baton->current_fulltext = svn_stringbuf_create_ensure(
1956 optimimal_allocation_size((apr_size_t)to_alloc),
1957 baton->filehandle_pool);
1959 /* Read LEN bytes from the window stream and store the data
1960 * in the fulltext buffer (will be filled by further reads later). */
1961 baton->current_fulltext->len = (apr_size_t)len;
1962 baton->current_fulltext->data[(apr_size_t)len] = 0;
1964 buffer = baton->current_fulltext->data;
1965 while (len > 0 && !err)
1967 apr_size_t to_read = (apr_size_t)len;
1968 err = get_contents_from_windows(baton, buffer, &to_read);
1975 /* Simply drain LEN bytes from the window stream. */
1976 apr_pool_t *subpool = subpool = svn_pool_create(baton->pool);
1977 char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
1979 while (len > 0 && !err)
1981 apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE
1982 ? SVN__STREAM_CHUNK_SIZE
1985 err = get_contents_from_windows(baton, buffer, &to_read);
1989 svn_pool_destroy(subpool);
1992 return svn_error_trace(err);
1995 /* BATON is of type `rep_read_baton'; read the next *LEN bytes of the
1996 representation and store them in *BUF. Sum as we read and verify
1997 the MD5 sum at the end. */
1998 static svn_error_t *
1999 rep_read_contents(void *baton,
2003 struct rep_read_baton *rb = baton;
2005 /* Get data from the fulltext cache for as long as we can. */
2006 if (rb->fulltext_cache)
2008 svn_boolean_t cached;
2009 SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len));
2011 return SVN_NO_ERROR;
2013 /* Cache miss. From now on, we will never read from the fulltext
2014 * cache for this representation anymore. */
2015 rb->fulltext_cache = NULL;
2018 /* No fulltext cache to help us. We must read from the window stream. */
2021 /* Window stream not initialized, yet. Do it now. */
2022 SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2023 &rb->src_state, &rb->len, rb->fs, &rb->rep,
2024 rb->filehandle_pool));
2026 /* In case we did read from the fulltext cache before, make the
2027 * window stream catch up. Also, initialize the fulltext buffer
2028 * if we want to cache the fulltext at the end. */
2029 SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
2032 /* Get the next block of data. */
2033 SVN_ERR(get_contents_from_windows(rb, buf, len));
2035 if (rb->current_fulltext)
2036 svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
2038 /* Perform checksumming. We want to check the checksum as soon as
2039 the last byte of data is read, in case the caller never performs
2040 a short read, but we don't want to finalize the MD5 context
2042 if (!rb->checksum_finalized)
2044 SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len));
2046 if (rb->off == rb->len)
2048 svn_checksum_t *md5_checksum;
2049 svn_checksum_t expected;
2050 expected.kind = svn_checksum_md5;
2051 expected.digest = rb->md5_digest;
2053 rb->checksum_finalized = TRUE;
2054 SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx,
2056 if (!svn_checksum_match(md5_checksum, &expected))
2057 return svn_error_create(SVN_ERR_FS_CORRUPT,
2058 svn_checksum_mismatch_err(&expected, md5_checksum,
2060 _("Checksum mismatch while reading representation")),
2065 if (rb->off == rb->len && rb->current_fulltext)
2067 fs_fs_data_t *ffd = rb->fs->fsap_data;
2068 SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key,
2069 rb->current_fulltext, rb->pool));
2070 rb->current_fulltext = NULL;
2073 return SVN_NO_ERROR;
2077 svn_fs_fs__get_contents(svn_stream_t **contents_p,
2079 representation_t *rep,
2080 svn_boolean_t cache_fulltext,
2085 *contents_p = svn_stream_empty(pool);
2089 fs_fs_data_t *ffd = fs->fsap_data;
2090 svn_filesize_t len = rep->expanded_size ? rep->expanded_size : rep->size;
2091 struct rep_read_baton *rb;
2093 pair_cache_key_t fulltext_cache_key = { 0 };
2094 fulltext_cache_key.revision = rep->revision;
2095 fulltext_cache_key.second = rep->item_index;
2097 /* Initialize the reader baton. Some members may added lazily
2098 * while reading from the stream */
2099 SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
2101 /* Make the stream attempt fulltext cache lookups if the fulltext
2102 * is cacheable. If it is not, then also don't try to buffer and
2104 if (ffd->fulltext_cache && cache_fulltext
2105 && SVN_IS_VALID_REVNUM(rep->revision)
2106 && fulltext_size_is_cachable(ffd, len))
2108 rb->fulltext_cache = ffd->fulltext_cache;
2112 /* This will also prevent the reconstructed fulltext from being
2113 put into the cache. */
2114 rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM;
2117 *contents_p = svn_stream_create(rb, pool);
2118 svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2120 svn_stream_set_close(*contents_p, rep_read_contents_close);
2123 return SVN_NO_ERROR;
2126 /* Baton for cache_access_wrapper. Wraps the original parameters of
2127 * svn_fs_fs__try_process_file_content().
2129 typedef struct cache_access_wrapper_baton_t
2131 svn_fs_process_contents_func_t func;
2133 } cache_access_wrapper_baton_t;
2135 /* Wrapper to translate between svn_fs_process_contents_func_t and
2136 * svn_cache__partial_getter_func_t.
2138 static svn_error_t *
2139 cache_access_wrapper(void **out,
2141 apr_size_t data_len,
2145 cache_access_wrapper_baton_t *wrapper_baton = baton;
2147 SVN_ERR(wrapper_baton->func((const unsigned char *)data,
2148 data_len - 1, /* cache adds terminating 0 */
2149 wrapper_baton->baton,
2152 /* non-NULL value to signal the calling cache that all went well */
2155 return SVN_NO_ERROR;
2159 svn_fs_fs__try_process_file_contents(svn_boolean_t *success,
2161 node_revision_t *noderev,
2162 svn_fs_process_contents_func_t processor,
2166 representation_t *rep = noderev->data_rep;
2169 fs_fs_data_t *ffd = fs->fsap_data;
2170 pair_cache_key_t fulltext_cache_key = { 0 };
2172 fulltext_cache_key.revision = rep->revision;
2173 fulltext_cache_key.second = rep->item_index;
2174 if (ffd->fulltext_cache && SVN_IS_VALID_REVNUM(rep->revision)
2175 && fulltext_size_is_cachable(ffd, rep->expanded_size))
2177 cache_access_wrapper_baton_t wrapper_baton;
2180 wrapper_baton.func = processor;
2181 wrapper_baton.baton = baton;
2182 return svn_cache__get_partial(&dummy, success,
2183 ffd->fulltext_cache,
2184 &fulltext_cache_key,
2185 cache_access_wrapper,
2192 return SVN_NO_ERROR;
2196 /* Baton used when reading delta windows. */
2197 struct delta_read_baton
2200 unsigned char md5_digest[APR_MD5_DIGESTSIZE];
2203 /* This implements the svn_txdelta_next_window_fn_t interface. */
2204 static svn_error_t *
2205 delta_read_next_window(svn_txdelta_window_t **window, void *baton,
2208 struct delta_read_baton *drb = baton;
2209 apr_pool_t *scratch_pool = svn_pool_create(pool);
2212 if (drb->rs->current < drb->rs->size)
2214 SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool,
2216 drb->rs->chunk_index++;
2219 svn_pool_destroy(scratch_pool);
2221 return SVN_NO_ERROR;
2224 /* This implements the svn_txdelta_md5_digest_fn_t interface. */
2225 static const unsigned char *
2226 delta_read_md5_digest(void *baton)
2228 struct delta_read_baton *drb = baton;
2229 return drb->md5_digest;
2232 /* Return a txdelta stream for on-disk representation REP_STATE
2233 * of TARGET. Allocate the result in POOL.
2235 static svn_txdelta_stream_t *
2236 get_storaged_delta_stream(rep_state_t *rep_state,
2237 node_revision_t *target,
2240 /* Create the delta read baton. */
2241 struct delta_read_baton *drb = apr_pcalloc(pool, sizeof(*drb));
2242 drb->rs = rep_state;
2243 memcpy(drb->md5_digest, target->data_rep->md5_digest,
2244 sizeof(drb->md5_digest));
2245 return svn_txdelta_stream_create(drb, delta_read_next_window,
2246 delta_read_md5_digest, pool);
2250 svn_fs_fs__get_file_delta_stream(svn_txdelta_stream_t **stream_p,
2252 node_revision_t *source,
2253 node_revision_t *target,
2256 svn_stream_t *source_stream, *target_stream;
2257 rep_state_t *rep_state;
2258 svn_fs_fs__rep_header_t *rep_header;
2259 fs_fs_data_t *ffd = fs->fsap_data;
2261 /* Try a shortcut: if the target is stored as a delta against the source,
2262 then just use that delta. However, prefer using the fulltext cache
2263 whenever that is available. */
2264 if (target->data_rep && (source || ! ffd->fulltext_cache))
2266 /* Read target's base rep if any. */
2267 SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL,
2268 target->data_rep, fs, pool, pool));
2270 if (source && source->data_rep && target->data_rep)
2272 /* If that matches source, then use this delta as is.
2273 Note that we want an actual delta here. E.g. a self-delta would
2274 not be good enough. */
2275 if (rep_header->type == svn_fs_fs__rep_delta
2276 && rep_header->base_revision == source->data_rep->revision
2277 && rep_header->base_item_index == source->data_rep->item_index)
2279 *stream_p = get_storaged_delta_stream(rep_state, target, pool);
2280 return SVN_NO_ERROR;
2285 /* We want a self-delta. There is a fair chance that TARGET got
2286 added in this revision and is already stored in the requested
2288 if (rep_header->type == svn_fs_fs__rep_self_delta)
2290 *stream_p = get_storaged_delta_stream(rep_state, target, pool);
2291 return SVN_NO_ERROR;
2295 /* Don't keep file handles open for longer than necessary. */
2296 if (rep_state->sfile->rfile)
2298 SVN_ERR(svn_fs_fs__close_revision_file(rep_state->sfile->rfile));
2299 rep_state->sfile->rfile = NULL;
2303 /* Read both fulltexts and construct a delta. */
2305 SVN_ERR(svn_fs_fs__get_contents(&source_stream, fs, source->data_rep,
2308 source_stream = svn_stream_empty(pool);
2309 SVN_ERR(svn_fs_fs__get_contents(&target_stream, fs, target->data_rep,
2312 /* Because source and target stream will already verify their content,
2313 * there is no need to do this once more. In particular if the stream
2314 * content is being fetched from cache. */
2315 svn_txdelta2(stream_p, source_stream, target_stream, FALSE, pool);
2317 return SVN_NO_ERROR;
2320 /* Return TRUE when all svn_fs_dirent_t* in ENTRIES are already sorted
2321 by their respective name. */
2322 static svn_boolean_t
2323 sorted(apr_array_header_t *entries)
2327 const svn_fs_dirent_t * const *dirents = (const void *)entries->elts;
2328 for (i = 0; i < entries->nelts-1; ++i)
2329 if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
2335 /* Compare the names of the two dirents given in **A and **B. */
2337 compare_dirents(const void *a, const void *b)
2339 const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
2340 const svn_fs_dirent_t *rhs = *((const svn_fs_dirent_t * const *) b);
2342 return strcmp(lhs->name, rhs->name);
2345 /* Compare the name of the dirents given in **A with the C string in *B. */
2347 compare_dirent_name(const void *a, const void *b)
2349 const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
2350 const char *rhs = b;
2352 return strcmp(lhs->name, rhs);
2355 /* Into ENTRIES, read all directories entries from the key-value text in
2356 * STREAM. If INCREMENTAL is TRUE, read until the end of the STREAM and
2357 * update the data. ID is provided for nicer error messages.
2359 static svn_error_t *
2360 read_dir_entries(apr_array_header_t *entries,
2361 svn_stream_t *stream,
2362 svn_boolean_t incremental,
2363 const svn_fs_id_t *id,
2364 apr_pool_t *result_pool,
2365 apr_pool_t *scratch_pool)
2367 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2368 apr_hash_t *hash = incremental ? svn_hash__make(scratch_pool) : NULL;
2369 const char *terminator = SVN_HASH_TERMINATOR;
2371 /* Read until the terminator (non-incremental) or the end of STREAM
2372 (incremental mode). In the latter mode, we use a temporary HASH
2373 to make updating and removing entries cheaper. */
2376 svn_hash__entry_t entry;
2377 svn_fs_dirent_t *dirent;
2380 svn_pool_clear(iterpool);
2381 SVN_ERR(svn_hash__read_entry(&entry, stream, terminator,
2382 incremental, iterpool));
2384 /* End of directory? */
2385 if (entry.key == NULL)
2387 /* In incremental mode, we skip the terminator and read the
2388 increments following it until the end of the stream. */
2389 if (incremental && terminator)
2395 /* Deleted entry? */
2396 if (entry.val == NULL)
2398 /* We must be in incremental mode */
2400 apr_hash_set(hash, entry.key, entry.keylen, NULL);
2404 /* Add a new directory entry. */
2405 dirent = apr_pcalloc(result_pool, sizeof(*dirent));
2406 dirent->name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
2408 str = svn_cstring_tokenize(" ", &entry.val);
2410 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2411 _("Directory entry corrupt in '%s'"),
2412 svn_fs_fs__id_unparse(id, scratch_pool)->data);
2414 if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0)
2416 dirent->kind = svn_node_file;
2418 else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0)
2420 dirent->kind = svn_node_dir;
2424 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2425 _("Directory entry corrupt in '%s'"),
2426 svn_fs_fs__id_unparse(id, scratch_pool)->data);
2429 str = svn_cstring_tokenize(" ", &entry.val);
2431 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2432 _("Directory entry corrupt in '%s'"),
2433 svn_fs_fs__id_unparse(id, scratch_pool)->data);
2435 SVN_ERR(svn_fs_fs__id_parse(&dirent->id, str, result_pool));
2437 /* In incremental mode, update the hash; otherwise, write to the
2438 * final array. Be sure to use hash keys that survive this iteration.
2441 apr_hash_set(hash, dirent->name, entry.keylen, dirent);
2443 APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = dirent;
2446 /* Convert container to a sorted array. */
2449 apr_hash_index_t *hi;
2450 for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi))
2451 APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = apr_hash_this_val(hi);
2454 if (!sorted(entries))
2455 svn_sort__array(entries, compare_dirents);
2457 svn_pool_destroy(iterpool);
2459 return SVN_NO_ERROR;
2462 /* Fetch the contents of a directory into ENTRIES. Values are stored
2463 as filename to string mappings; further conversion is necessary to
2464 convert them into svn_fs_dirent_t values. */
2465 static svn_error_t *
2466 get_dir_contents(apr_array_header_t **entries,
2468 node_revision_t *noderev,
2469 apr_pool_t *result_pool,
2470 apr_pool_t *scratch_pool)
2472 svn_stream_t *contents;
2474 *entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *));
2475 if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
2477 const char *filename
2478 = svn_fs_fs__path_txn_node_children(fs, noderev->id, scratch_pool);
2480 /* The representation is mutable. Read the old directory
2481 contents from the mutable children file, followed by the
2482 changes we've made in this transaction. */
2483 SVN_ERR(svn_stream_open_readonly(&contents, filename, scratch_pool,
2485 SVN_ERR(read_dir_entries(*entries, contents, TRUE, noderev->id,
2486 result_pool, scratch_pool));
2487 SVN_ERR(svn_stream_close(contents));
2489 else if (noderev->data_rep)
2491 /* Undeltify content before parsing it. Otherwise, we could only
2492 * parse it byte-by-byte.
2494 apr_size_t len = noderev->data_rep->expanded_size
2495 ? (apr_size_t)noderev->data_rep->expanded_size
2496 : (apr_size_t)noderev->data_rep->size;
2497 svn_stringbuf_t *text;
2499 /* The representation is immutable. Read it normally. */
2500 SVN_ERR(svn_fs_fs__get_contents(&contents, fs, noderev->data_rep,
2501 FALSE, scratch_pool));
2502 SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, scratch_pool));
2503 SVN_ERR(svn_stream_close(contents));
2505 /* de-serialize hash */
2506 contents = svn_stream_from_stringbuf(text, scratch_pool);
2507 SVN_ERR(read_dir_entries(*entries, contents, FALSE, noderev->id,
2508 result_pool, scratch_pool));
2511 return SVN_NO_ERROR;
2515 /* Return the cache object in FS responsible to storing the directory the
2516 * NODEREV plus the corresponding *KEY. If no cache exists, return NULL.
2517 * PAIR_KEY must point to some key struct, which does not need to be
2518 * initialized. We use it to avoid dynamic allocation.
2520 static svn_cache__t *
2521 locate_dir_cache(svn_fs_t *fs,
2523 pair_cache_key_t *pair_key,
2524 node_revision_t *noderev,
2527 fs_fs_data_t *ffd = fs->fsap_data;
2528 if (svn_fs_fs__id_is_txn(noderev->id))
2530 /* data in txns requires the expensive fs_id-based addressing mode */
2531 *key = svn_fs_fs__id_unparse(noderev->id, pool)->data;
2532 return ffd->txn_dir_cache;
2536 /* committed data can use simple rev,item pairs */
2537 if (noderev->data_rep)
2539 pair_key->revision = noderev->data_rep->revision;
2540 pair_key->second = noderev->data_rep->item_index;
2545 /* no data rep -> empty directory.
2546 A NULL key causes a cache miss. */
2550 return ffd->dir_cache;
2555 svn_fs_fs__rep_contents_dir(apr_array_header_t **entries_p,
2557 node_revision_t *noderev,
2558 apr_pool_t *result_pool,
2559 apr_pool_t *scratch_pool)
2561 pair_cache_key_t pair_key = { 0 };
2564 /* find the cache we may use */
2565 svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
2569 svn_boolean_t found;
2571 SVN_ERR(svn_cache__get((void **)entries_p, &found, cache, key,
2574 return SVN_NO_ERROR;
2577 /* Read in the directory contents. */
2578 SVN_ERR(get_dir_contents(entries_p, fs, noderev, result_pool,
2581 /* Update the cache, if we are to use one.
2583 * Don't even attempt to serialize very large directories; it would cause
2584 * an unnecessary memory allocation peak. 150 bytes/entry is about right.
2586 if (cache && svn_cache__is_cachable(cache, 150 * (*entries_p)->nelts))
2587 SVN_ERR(svn_cache__set(cache, key, *entries_p, scratch_pool));
2589 return SVN_NO_ERROR;
2593 svn_fs_fs__find_dir_entry(apr_array_header_t *entries,
2597 svn_fs_dirent_t **result
2598 = svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
2599 return result ? *result : NULL;
2603 svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
2605 node_revision_t *noderev,
2607 apr_pool_t *result_pool,
2608 apr_pool_t *scratch_pool)
2610 svn_boolean_t found = FALSE;
2612 /* find the cache we may use */
2613 pair_cache_key_t pair_key = { 0 };
2615 svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
2620 SVN_ERR(svn_cache__get_partial((void **)dirent,
2624 svn_fs_fs__extract_dir_entry,
2629 /* fetch data from disk if we did not find it in the cache */
2632 apr_array_header_t *entries;
2633 svn_fs_dirent_t *entry;
2634 svn_fs_dirent_t *entry_copy = NULL;
2636 /* read the dir from the file system. It will probably be put it
2637 into the cache for faster lookup in future calls. */
2638 SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, fs, noderev,
2639 scratch_pool, scratch_pool));
2641 /* find desired entry and return a copy in POOL, if found */
2642 entry = svn_fs_fs__find_dir_entry(entries, name, NULL);
2645 entry_copy = apr_palloc(result_pool, sizeof(*entry_copy));
2646 entry_copy->name = apr_pstrdup(result_pool, entry->name);
2647 entry_copy->id = svn_fs_fs__id_copy(entry->id, result_pool);
2648 entry_copy->kind = entry->kind;
2651 *dirent = entry_copy;
2654 return SVN_NO_ERROR;
2658 svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
2660 node_revision_t *noderev,
2663 apr_hash_t *proplist;
2664 svn_stream_t *stream;
2666 if (noderev->prop_rep && svn_fs_fs__id_txn_used(&noderev->prop_rep->txn_id))
2669 const char *filename
2670 = svn_fs_fs__path_txn_node_props(fs, noderev->id, pool);
2671 proplist = apr_hash_make(pool);
2673 SVN_ERR(svn_stream_open_readonly(&stream, filename, pool, pool));
2674 err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
2677 svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
2679 svn_error_clear(svn_stream_close(stream));
2680 return svn_error_quick_wrapf(err,
2681 _("malformed property list for node-revision '%s' in '%s'"),
2682 id_str->data, filename);
2684 SVN_ERR(svn_stream_close(stream));
2686 else if (noderev->prop_rep)
2689 fs_fs_data_t *ffd = fs->fsap_data;
2690 representation_t *rep = noderev->prop_rep;
2691 pair_cache_key_t key = { 0 };
2693 key.revision = rep->revision;
2694 key.second = rep->item_index;
2695 if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
2697 svn_boolean_t is_cached;
2698 SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached,
2699 ffd->properties_cache, &key, pool));
2701 return SVN_NO_ERROR;
2704 proplist = apr_hash_make(pool);
2705 SVN_ERR(svn_fs_fs__get_contents(&stream, fs, noderev->prop_rep, FALSE,
2707 err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
2710 svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
2712 svn_error_clear(svn_stream_close(stream));
2713 return svn_error_quick_wrapf(err,
2714 _("malformed property list for node-revision '%s'"),
2717 SVN_ERR(svn_stream_close(stream));
2719 if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
2720 SVN_ERR(svn_cache__set(ffd->properties_cache, &key, proplist, pool));
2724 /* return an empty prop list if the node doesn't have any props */
2725 proplist = apr_hash_make(pool);
2728 *proplist_p = proplist;
2730 return SVN_NO_ERROR;
2734 svn_fs_fs__get_changes(apr_array_header_t **changes,
2737 apr_pool_t *result_pool)
2739 apr_off_t changes_offset = SVN_FS_FS__ITEM_INDEX_CHANGES;
2740 svn_fs_fs__revision_file_t *revision_file;
2741 svn_boolean_t found;
2742 fs_fs_data_t *ffd = fs->fsap_data;
2743 apr_pool_t *scratch_pool = svn_pool_create(result_pool);
2745 /* try cache lookup first */
2747 if (ffd->changes_cache)
2749 SVN_ERR(svn_cache__get((void **) changes, &found, ffd->changes_cache,
2750 &rev, result_pool));
2759 /* read changes from revision file */
2761 SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool));
2762 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev,
2763 scratch_pool, scratch_pool));
2765 if (use_block_read(fs))
2767 /* 'block-read' will also provide us with the desired data */
2768 SVN_ERR(block_read((void **)changes, fs,
2769 rev, SVN_FS_FS__ITEM_INDEX_CHANGES,
2770 revision_file, result_pool, scratch_pool));
2774 /* Addressing is very different for old formats
2775 * (needs to read the revision trailer). */
2776 if (svn_fs_fs__use_log_addressing(fs))
2777 SVN_ERR(svn_fs_fs__item_offset(&changes_offset, fs,
2778 revision_file, rev, NULL,
2779 SVN_FS_FS__ITEM_INDEX_CHANGES,
2782 SVN_ERR(get_root_changes_offset(NULL, &changes_offset,
2783 revision_file, fs, rev,
2786 /* Actual reading and parsing are the same, though. */
2787 SVN_ERR(aligned_seek(fs, revision_file->file, NULL, changes_offset,
2789 SVN_ERR(svn_fs_fs__read_changes(changes, revision_file->stream,
2790 result_pool, scratch_pool));
2792 /* cache for future reference */
2794 if (ffd->changes_cache)
2796 /* Guesstimate for the size of the in-cache representation. */
2797 apr_size_t estimated_size = (apr_size_t)250 * (*changes)->nelts;
2799 /* Don't even serialize data that probably won't fit into the
2800 * cache. This often implies that either CHANGES is very
2801 * large, memory is scarce or both. Having a huge temporary
2802 * copy would not be a good thing in either case. */
2803 if (svn_cache__is_cachable(ffd->changes_cache, estimated_size))
2804 SVN_ERR(svn_cache__set(ffd->changes_cache, &rev, *changes,
2809 SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
2812 SVN_ERR(dbg_log_access(fs, rev, changes_offset, *changes,
2813 SVN_FS_FS__ITEM_TYPE_CHANGES, scratch_pool));
2815 svn_pool_destroy(scratch_pool);
2816 return SVN_NO_ERROR;
2819 /* Inialize the representation read state RS for the given REP_HEADER and
2820 * p2l index ENTRY. If not NULL, assign FILE and STREAM to RS.
2821 * Use RESULT_POOL for allocations.
2823 static svn_error_t *
2824 init_rep_state(rep_state_t *rs,
2825 svn_fs_fs__rep_header_t *rep_header,
2827 svn_fs_fs__revision_file_t *file,
2828 svn_fs_fs__p2l_entry_t* entry,
2829 apr_pool_t *result_pool)
2831 fs_fs_data_t *ffd = fs->fsap_data;
2832 shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file));
2834 /* this function does not apply to representation containers */
2835 SVN_ERR_ASSERT(entry->type >= SVN_FS_FS__ITEM_TYPE_FILE_REP
2836 && entry->type <= SVN_FS_FS__ITEM_TYPE_DIR_PROPS);
2838 shared_file->rfile = file;
2839 shared_file->fs = fs;
2840 shared_file->revision = entry->item.revision;
2841 shared_file->pool = result_pool;
2843 rs->sfile = shared_file;
2844 rs->revision = entry->item.revision;
2845 rs->item_index = entry->item.number;
2846 rs->header_size = rep_header->header_size;
2847 rs->start = entry->offset + rs->header_size;
2848 rs->current = rep_header->type == svn_fs_fs__rep_plain ? 0 : 4;
2849 rs->size = entry->size - rep_header->header_size - 7;
2851 rs->chunk_index = 0;
2852 rs->raw_window_cache = ffd->raw_window_cache;
2853 rs->window_cache = ffd->txdelta_window_cache;
2854 rs->combined_cache = ffd->combined_window_cache;
2856 return SVN_NO_ERROR;
2859 /* Implement svn_cache__partial_getter_func_t for txdelta windows.
2860 * Instead of the whole window data, return only END_OFFSET member.
2862 static svn_error_t *
2863 get_txdelta_window_end(void **out,
2865 apr_size_t data_len,
2867 apr_pool_t *result_pool)
2869 const svn_fs_fs__txdelta_cached_window_t *window
2870 = (const svn_fs_fs__txdelta_cached_window_t *)data;
2871 *(apr_off_t*)out = window->end_offset;
2873 return SVN_NO_ERROR;
2876 /* Implement svn_cache__partial_getter_func_t for raw windows.
2877 * Instead of the whole window data, return only END_OFFSET member.
2879 static svn_error_t *
2880 get_raw_window_end(void **out,
2882 apr_size_t data_len,
2884 apr_pool_t *result_pool)
2886 const svn_fs_fs__raw_cached_window_t *window
2887 = (const svn_fs_fs__raw_cached_window_t *)data;
2888 *(apr_off_t*)out = window->end_offset;
2890 return SVN_NO_ERROR;
2893 /* Walk through all windows in the representation addressed by RS in FS
2894 * (excluding the delta bases) and put those not already cached into the
2895 * window caches. If MAX_OFFSET is not -1, don't read windows that start
2896 * at or beyond that offset. Use POOL for temporary allocations.
2898 * This function requires RS->RAW_WINDOW_CACHE and RS->WINDOW_CACHE to
2901 static svn_error_t *
2902 cache_windows(svn_fs_t *fs,
2904 apr_off_t max_offset,
2907 apr_pool_t *iterpool = svn_pool_create(pool);
2908 while (rs->current < rs->size)
2910 apr_off_t end_offset;
2911 svn_boolean_t found = FALSE;
2912 window_cache_key_t key = { 0 };
2914 svn_pool_clear(iterpool);
2916 if (max_offset != -1 && rs->start + rs->current >= max_offset)
2918 svn_pool_destroy(iterpool);
2919 return SVN_NO_ERROR;
2922 /* We don't need to read the data again if it is already in cache.
2923 * It might be cached as either raw or parsed window.
2925 SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
2926 rs->raw_window_cache,
2927 get_window_key(&key, rs),
2928 get_raw_window_end, NULL,
2931 SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
2932 rs->window_cache, &key,
2933 get_txdelta_window_end, NULL,
2938 rs->current = end_offset;
2942 /* Read, decode and cache the window. */
2943 svn_fs_fs__raw_cached_window_t window;
2944 apr_off_t start_offset = rs->start + rs->current;
2945 apr_size_t window_len;
2948 /* navigate to the current window */
2949 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
2950 SVN_ERR(svn_txdelta__read_raw_window_len(&window_len,
2951 rs->sfile->rfile->stream,
2954 /* Read the raw window. */
2955 buf = apr_palloc(iterpool, window_len + 1);
2956 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
2957 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
2958 window_len, NULL, NULL, iterpool));
2959 buf[window_len] = 0;
2961 /* update relative offset in representation */
2962 rs->current += window_len;
2964 /* Construct the cachable raw window object. */
2965 window.end_offset = rs->current;
2966 window.window.len = window_len;
2967 window.window.data = buf;
2969 /* cache the window now */
2970 SVN_ERR(svn_cache__set(rs->raw_window_cache, &key, &window,
2974 if (rs->current > rs->size)
2975 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
2976 _("Reading one svndiff window read beyond "
2977 "the end of the representation"));
2982 svn_pool_destroy(iterpool);
2983 return SVN_NO_ERROR;
2986 /* Read all txdelta / plain windows following REP_HEADER in FS as described
2987 * by ENTRY. Read the data from the already open FILE and the wrapping
2988 * STREAM object. If MAX_OFFSET is not -1, don't read windows that start
2989 * at or beyond that offset. Use SCRATCH_POOL for temporary allocations.
2990 * If caching is not enabled, this is a no-op.
2992 static svn_error_t *
2993 block_read_windows(svn_fs_fs__rep_header_t *rep_header,
2995 svn_fs_fs__revision_file_t *rev_file,
2996 svn_fs_fs__p2l_entry_t* entry,
2997 apr_off_t max_offset,
2998 apr_pool_t *result_pool,
2999 apr_pool_t *scratch_pool)
3001 fs_fs_data_t *ffd = fs->fsap_data;
3002 rep_state_t rs = { 0 };
3004 window_cache_key_t key = { 0 };
3006 if ( (rep_header->type != svn_fs_fs__rep_plain
3007 && (!ffd->txdelta_window_cache || !ffd->raw_window_cache))
3008 || (rep_header->type == svn_fs_fs__rep_plain
3009 && !ffd->combined_window_cache))
3010 return SVN_NO_ERROR;
3012 SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry,
3015 /* RS->FILE may be shared between RS instances -> make sure we point
3016 * to the right data. */
3017 offset = rs.start + rs.current;
3018 if (rep_header->type == svn_fs_fs__rep_plain)
3020 svn_stringbuf_t *plaintext;
3021 svn_boolean_t is_cached;
3023 /* already in cache? */
3024 SVN_ERR(svn_cache__has_key(&is_cached, rs.combined_cache,
3025 get_window_key(&key, &rs),
3028 return SVN_NO_ERROR;
3030 /* for larger reps, the header may have crossed a block boundary.
3031 * make sure we still read blocks properly aligned, i.e. don't use
3032 * plain seek here. */
3033 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, scratch_pool));
3035 plaintext = svn_stringbuf_create_ensure(rs.size, result_pool);
3036 SVN_ERR(svn_io_file_read_full2(rev_file->file, plaintext->data,
3037 rs.size, &plaintext->len, NULL,
3039 plaintext->data[plaintext->len] = 0;
3040 rs.current += rs.size;
3042 SVN_ERR(set_cached_combined_window(plaintext, &rs, scratch_pool));
3046 SVN_ERR(cache_windows(fs, &rs, max_offset, scratch_pool));
3049 return SVN_NO_ERROR;
3052 /* Try to get the representation header identified by KEY from FS's cache.
3053 * If it has not been cached, read it from the current position in STREAM
3054 * and put it into the cache (if caching has been enabled for rep headers).
3055 * Return the result in *REP_HEADER. Use POOL for allocations.
3057 static svn_error_t *
3058 read_rep_header(svn_fs_fs__rep_header_t **rep_header,
3060 svn_stream_t *stream,
3061 pair_cache_key_t *key,
3062 apr_pool_t *result_pool,
3063 apr_pool_t *scratch_pool)
3065 fs_fs_data_t *ffd = fs->fsap_data;
3066 svn_boolean_t is_cached = FALSE;
3068 if (ffd->rep_header_cache)
3070 SVN_ERR(svn_cache__get((void**)rep_header, &is_cached,
3071 ffd->rep_header_cache, key,
3074 return SVN_NO_ERROR;
3077 SVN_ERR(svn_fs_fs__read_rep_header(rep_header, stream, result_pool,
3080 if (ffd->rep_header_cache)
3081 SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header,
3084 return SVN_NO_ERROR;
3087 /* Fetch the representation data (header, txdelta / plain windows)
3088 * addressed by ENTRY->ITEM in FS and cache it if caches are enabled.
3089 * Read the data from the already open FILE and the wrapping
3090 * STREAM object. If MAX_OFFSET is not -1, don't read windows that start
3091 * at or beyond that offset.
3092 * Use SCRATCH_POOL for temporary allocations.
3094 static svn_error_t *
3095 block_read_contents(svn_fs_t *fs,
3096 svn_fs_fs__revision_file_t *rev_file,
3097 svn_fs_fs__p2l_entry_t* entry,
3098 apr_off_t max_offset,
3099 apr_pool_t *result_pool,
3100 apr_pool_t *scratch_pool)
3102 pair_cache_key_t header_key = { 0 };
3103 svn_fs_fs__rep_header_t *rep_header;
3105 header_key.revision = (apr_int32_t)entry->item.revision;
3106 header_key.second = entry->item.number;
3108 SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &header_key,
3109 result_pool, scratch_pool));
3110 SVN_ERR(block_read_windows(rep_header, fs, rev_file, entry, max_offset,
3111 result_pool, scratch_pool));
3113 return SVN_NO_ERROR;
3116 /* For the given REV_FILE in FS, in *STREAM return a stream covering the
3117 * item specified by ENTRY. Also, verify the item's content by low-level
3118 * checksum. Allocate the result in POOL.
3120 static svn_error_t *
3121 read_item(svn_stream_t **stream,
3123 svn_fs_fs__revision_file_t *rev_file,
3124 svn_fs_fs__p2l_entry_t* entry,
3127 apr_uint32_t digest;
3128 svn_checksum_t *expected, *actual;
3129 apr_uint32_t plain_digest;
3131 /* Read item into string buffer. */
3132 svn_stringbuf_t *text = svn_stringbuf_create_ensure(entry->size, pool);
3133 text->len = entry->size;
3134 text->data[text->len] = 0;
3135 SVN_ERR(svn_io_file_read_full2(rev_file->file, text->data, text->len,
3138 /* Return (construct, calculate) stream and checksum. */
3139 *stream = svn_stream_from_stringbuf(text, pool);
3140 digest = svn__fnv1a_32x4(text->data, text->len);
3142 /* Checksums will match most of the time. */
3143 if (entry->fnv1_checksum == digest)
3144 return SVN_NO_ERROR;
3146 /* Construct proper checksum objects from their digests to allow for
3147 * nice error messages. */
3148 plain_digest = htonl(entry->fnv1_checksum);
3149 expected = svn_checksum__from_digest_fnv1a_32x4(
3150 (const unsigned char *)&plain_digest, pool);
3151 plain_digest = htonl(digest);
3152 actual = svn_checksum__from_digest_fnv1a_32x4(
3153 (const unsigned char *)&plain_digest, pool);
3155 /* Construct the full error message with all the info we have. */
3156 return svn_checksum_mismatch_err(expected, actual, pool,
3157 _("Low-level checksum mismatch while reading\n"
3158 "%s bytes of meta data at offset %s "
3159 "for item %s in revision %ld"),
3160 apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->size),
3161 apr_psprintf(pool, "%" APR_OFF_T_FMT, entry->offset),
3162 apr_psprintf(pool, "%" APR_UINT64_T_FMT, entry->item.number),
3163 entry->item.revision);
3166 /* If not already cached or if MUST_READ is set, read the changed paths
3167 * list addressed by ENTRY in FS and retúrn it in *CHANGES. Cache the
3168 * result if caching is enabled. Read the data from the already open
3169 * FILE and wrapping FILE_STREAM. Use POOL for allocations.
3171 static svn_error_t *
3172 block_read_changes(apr_array_header_t **changes,
3174 svn_fs_fs__revision_file_t *rev_file,
3175 svn_fs_fs__p2l_entry_t *entry,
3176 svn_boolean_t must_read,
3177 apr_pool_t *result_pool,
3178 apr_pool_t *scratch_pool)
3180 fs_fs_data_t *ffd = fs->fsap_data;
3181 svn_stream_t *stream;
3182 if (!must_read && !ffd->changes_cache)
3183 return SVN_NO_ERROR;
3185 /* already in cache? */
3186 if (!must_read && ffd->changes_cache)
3188 svn_boolean_t is_cached;
3189 SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache,
3190 &entry->item.revision,
3193 return SVN_NO_ERROR;
3196 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3198 /* read changes from revision file */
3199 SVN_ERR(svn_fs_fs__read_changes(changes, stream, result_pool,
3202 /* cache for future reference */
3203 if (ffd->changes_cache)
3204 SVN_ERR(svn_cache__set(ffd->changes_cache, &entry->item.revision,
3205 *changes, scratch_pool));
3207 return SVN_NO_ERROR;
3210 /* If not already cached or if MUST_READ is set, read the nod revision
3211 * addressed by ENTRY in FS and retúrn it in *NODEREV_P. Cache the
3212 * result if caching is enabled. Read the data from the already open
3213 * FILE and wrapping FILE_STREAM. Use SCRATCH_POOL for temporary allocations.
3215 static svn_error_t *
3216 block_read_noderev(node_revision_t **noderev_p,
3218 svn_fs_fs__revision_file_t *rev_file,
3219 svn_fs_fs__p2l_entry_t *entry,
3220 svn_boolean_t must_read,
3221 apr_pool_t *result_pool,
3222 apr_pool_t *scratch_pool)
3224 fs_fs_data_t *ffd = fs->fsap_data;
3225 svn_stream_t *stream;
3227 pair_cache_key_t key = { 0 };
3228 key.revision = entry->item.revision;
3229 key.second = entry->item.number;
3231 if (!must_read && !ffd->node_revision_cache)
3232 return SVN_NO_ERROR;
3234 /* already in cache? */
3235 if (!must_read && ffd->node_revision_cache)
3237 svn_boolean_t is_cached;
3238 SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache,
3239 &key, scratch_pool));
3241 return SVN_NO_ERROR;
3244 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3246 /* read node rev from revision file */
3247 SVN_ERR(svn_fs_fs__read_noderev(noderev_p, stream,
3248 result_pool, scratch_pool));
3250 /* Workaround issue #4031: is-fresh-txn-root in revision files. */
3251 (*noderev_p)->is_fresh_txn_root = FALSE;
3253 if (ffd->node_revision_cache)
3254 SVN_ERR(svn_cache__set(ffd->node_revision_cache, &key, *noderev_p,
3257 return SVN_NO_ERROR;
3260 /* Read the whole (e.g. 64kB) block containing ITEM_INDEX of REVISION in FS
3261 * and put all data into cache. If necessary and depending on heuristics,
3262 * neighboring blocks may also get read. The data is being read from
3263 * already open REVISION_FILE, which must be the correct rev / pack file
3266 * For noderevs and changed path lists, the item fetched can be allocated
3267 * RESULT_POOL and returned in *RESULT. Otherwise, RESULT must be NULL.
3269 static svn_error_t *
3270 block_read(void **result,
3272 svn_revnum_t revision,
3273 apr_uint64_t item_index,
3274 svn_fs_fs__revision_file_t *revision_file,
3275 apr_pool_t *result_pool,
3276 apr_pool_t *scratch_pool)
3278 fs_fs_data_t *ffd = fs->fsap_data;
3279 apr_off_t offset, wanted_offset = 0;
3280 apr_off_t block_start = 0;
3281 apr_array_header_t *entries;
3284 apr_pool_t *iterpool;
3286 /* Block read is an optional feature. If the caller does not want anything
3287 * specific we may not have to read anything. */
3289 return SVN_NO_ERROR;
3291 iterpool = svn_pool_create(scratch_pool);
3293 /* don't try this on transaction protorev files */
3294 SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision));
3296 /* index lookup: find the OFFSET of the item we *must* read plus (in the
3297 * "do-while" block) the list of items in the same block. */
3298 SVN_ERR(svn_fs_fs__item_offset(&wanted_offset, fs, revision_file,
3299 revision, NULL, item_index, iterpool));
3301 offset = wanted_offset;
3305 * Read this block. If the last item crosses the block boundary, read
3306 * the next block but stop there. Because cross-boundary items cause
3307 * blocks to be read twice, this heuristics will limit this effect to
3308 * approx. 50% of blocks, probably less, while providing a sensible
3309 * amount of read-ahead.
3313 /* fetch list of items in the block surrounding OFFSET */
3314 block_start = offset - (offset % ffd->block_size);
3315 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, revision_file,
3316 revision, block_start,
3317 ffd->block_size, scratch_pool,
3320 SVN_ERR(aligned_seek(fs, revision_file->file, &block_start, offset,
3323 /* read all items from the block */
3324 for (i = 0; i < entries->nelts; ++i)
3326 svn_boolean_t is_result, is_wanted;
3328 svn_fs_fs__p2l_entry_t* entry;
3330 svn_pool_clear(iterpool);
3332 /* skip empty sections */
3333 entry = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
3334 if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
3337 /* the item / container we were looking for? */
3338 is_wanted = entry->offset == wanted_offset
3339 && entry->item.revision == revision
3340 && entry->item.number == item_index;
3341 is_result = result && is_wanted;
3343 /* select the pool that we want the item to be allocated in */
3344 pool = is_result ? result_pool : iterpool;
3346 /* handle all items that start within this block and are relatively
3347 * small (i.e. < block size). Always read the item we need to return.
3349 if (is_result || ( entry->offset >= block_start
3350 && entry->size < ffd->block_size))
3353 SVN_ERR(svn_io_file_seek(revision_file->file, APR_SET,
3354 &entry->offset, iterpool));
3355 switch (entry->type)
3357 case SVN_FS_FS__ITEM_TYPE_FILE_REP:
3358 case SVN_FS_FS__ITEM_TYPE_DIR_REP:
3359 case SVN_FS_FS__ITEM_TYPE_FILE_PROPS:
3360 case SVN_FS_FS__ITEM_TYPE_DIR_PROPS:
3361 SVN_ERR(block_read_contents(fs, revision_file, entry,
3364 : block_start + ffd->block_size,
3368 case SVN_FS_FS__ITEM_TYPE_NODEREV:
3369 if (ffd->node_revision_cache || is_result)
3370 SVN_ERR(block_read_noderev((node_revision_t **)&item,
3372 entry, is_result, pool,
3376 case SVN_FS_FS__ITEM_TYPE_CHANGES:
3377 SVN_ERR(block_read_changes((apr_array_header_t **)&item,
3390 /* if we crossed a block boundary, read the remainder of
3391 * the last block as well */
3392 offset = entry->offset + entry->size;
3393 if (offset > block_start + ffd->block_size)
3399 while(run_count++ == 1); /* can only be true once and only if a block
3400 * boundary got crossed */
3402 /* if the caller requested a result, we must have provided one by now */
3403 assert(!result || *result);
3404 svn_pool_destroy(iterpool);
3406 return SVN_NO_ERROR;