1 /* cached_data.c --- cached (read) access to FSFS data
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
23 #include "cached_data.h"
28 #include "svn_ctype.h"
29 #include "svn_sorts.h"
30 #include "private/svn_delta_private.h"
31 #include "private/svn_io_private.h"
32 #include "private/svn_sorts_private.h"
33 #include "private/svn_subr_private.h"
34 #include "private/svn_temp_serializer.h"
39 #include "low_level.h"
42 #include "temp_serializer.h"
44 #include "../libsvn_fs/fs-loader.h"
45 #include "../libsvn_delta/delta.h" /* for SVN_DELTA_WINDOW_SIZE */
47 #include "svn_private_config.h"
49 /* forward-declare. See implementation for the docstring */
51 block_read(void **result,
53 svn_revnum_t revision,
54 apr_uint64_t item_index,
55 svn_fs_fs__revision_file_t *revision_file,
56 apr_pool_t *result_pool,
57 apr_pool_t *scratch_pool);
60 /* Define this to enable access logging via dbg_log_access
61 #define SVN_FS_FS__LOG_ACCESS
64 /* When SVN_FS_FS__LOG_ACCESS has been defined, write a line to console
65 * showing where REVISION, ITEM_INDEX is located in FS and use ITEM to
66 * show details on it's contents if not NULL. To support format 6 and
67 * earlier repos, ITEM_TYPE (SVN_FS_FS__ITEM_TYPE_*) must match ITEM.
68 * Use SCRATCH_POOL for temporary allocations.
70 * For pre-format7 repos, the display will be restricted.
73 dbg_log_access(svn_fs_t *fs,
74 svn_revnum_t revision,
75 apr_uint64_t item_index,
77 apr_uint32_t item_type,
78 apr_pool_t *scratch_pool)
80 /* no-op if this macro is not defined */
81 #ifdef SVN_FS_FS__LOG_ACCESS
82 fs_fs_data_t *ffd = fs->fsap_data;
83 apr_off_t end_offset = 0;
84 svn_fs_fs__p2l_entry_t *entry = NULL;
85 static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop",
86 "node ", "chgs ", "rep "};
87 const char *description = "";
88 const char *type = types[item_type];
89 const char *pack = "";
91 svn_fs_fs__revision_file_t *rev_file;
93 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision,
94 scratch_pool, scratch_pool));
96 /* determine rev / pack file offset */
97 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision, NULL,
98 item_index, scratch_pool));
100 /* constructing the pack file description */
101 if (revision < ffd->min_unpacked_rev)
102 pack = apr_psprintf(scratch_pool, "%4ld|",
103 revision / ffd->max_files_per_dir);
105 /* construct description if possible */
106 if (item_type == SVN_FS_FS__ITEM_TYPE_NODEREV && item != NULL)
108 node_revision_t *node = item;
111 ? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT,
112 node->data_rep->revision,
113 node->data_rep->item_index)
117 ? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT,
118 node->prop_rep->revision,
119 node->prop_rep->item_index)
121 description = apr_psprintf(scratch_pool, "%s (pc=%d%s%s)",
123 node->predecessor_count,
127 else if (item_type == SVN_FS_FS__ITEM_TYPE_ANY_REP)
129 svn_fs_fs__rep_header_t *header = item;
131 description = " (txdelta window)";
132 else if (header->type == svn_fs_fs__rep_plain)
133 description = " PLAIN";
134 else if (header->type == svn_fs_fs__rep_self_delta)
135 description = " DELTA";
137 description = apr_psprintf(scratch_pool,
138 " DELTA against %ld/%" APR_UINT64_T_FMT,
139 header->base_revision,
140 header->base_item_index);
142 else if (item_type == SVN_FS_FS__ITEM_TYPE_CHANGES && item != NULL)
144 apr_array_header_t *changes = item;
145 switch (changes->nelts)
147 case 0: description = " no change";
149 case 1: description = " 1 change";
151 default: description = apr_psprintf(scratch_pool, " %d changes",
156 /* some info is only available in format7 repos */
157 if (svn_fs_fs__use_log_addressing(fs))
159 /* reverse index lookup: get item description in ENTRY */
160 SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, revision,
161 offset, scratch_pool,
166 end_offset = offset + entry->size;
167 type = types[entry->type];
171 printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT" %s\n",
172 pack, (long)(offset / ffd->block_size),
173 (long)(offset % ffd->block_size),
174 (long)(end_offset / ffd->block_size),
175 (long)(end_offset % ffd->block_size),
176 type, revision, item_index, description);
180 /* reduced logging for format 6 and earlier */
181 printf("%5s%10" APR_UINT64_T_HEX_FMT " %s %7ld %7" APR_UINT64_T_FMT \
183 pack, (apr_uint64_t)(offset), type, revision, item_index,
187 /* We don't know when SCRATCH_POOL will be cleared, so close the rev file
189 SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
196 /* Convenience wrapper around svn_io_file_aligned_seek, taking filesystem
197 FS instead of a block size. */
199 aligned_seek(svn_fs_t *fs,
201 apr_off_t *buffer_start,
205 fs_fs_data_t *ffd = fs->fsap_data;
206 return svn_error_trace(svn_io_file_aligned_seek(file, ffd->block_size,
207 buffer_start, offset,
211 /* Open the revision file for revision REV in filesystem FS and store
212 the newly opened file in FILE. Seek to location OFFSET before
213 returning. Perform temporary allocations in POOL. */
215 open_and_seek_revision(svn_fs_fs__revision_file_t **file,
221 svn_fs_fs__revision_file_t *rev_file;
222 apr_off_t offset = -1;
224 SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, pool));
226 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, pool, pool));
227 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rev, NULL, item,
230 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
237 /* Open the representation REP for a node-revision in filesystem FS, seek
238 to its position and store the newly opened file in FILE. Perform
239 temporary allocations in POOL. */
241 open_and_seek_transaction(svn_fs_fs__revision_file_t **file,
243 representation_t *rep,
248 SVN_ERR(svn_fs_fs__open_proto_rev_file(file, fs, &rep->txn_id, pool, pool));
250 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, NULL, SVN_INVALID_REVNUM,
251 &rep->txn_id, rep->item_index, pool));
252 SVN_ERR(aligned_seek(fs, (*file)->file, NULL, offset, pool));
257 /* Given a node-id ID, and a representation REP in filesystem FS, open
258 the correct file and seek to the correction location. Store this
259 file in *FILE_P. Perform any allocations in POOL. */
261 open_and_seek_representation(svn_fs_fs__revision_file_t **file_p,
263 representation_t *rep,
266 if (! svn_fs_fs__id_txn_used(&rep->txn_id))
267 return open_and_seek_revision(file_p, fs, rep->revision, rep->item_index,
270 return open_and_seek_transaction(file_p, fs, rep, pool);
276 err_dangling_id(svn_fs_t *fs, const svn_fs_id_t *id)
278 svn_string_t *id_str = svn_fs_fs__id_unparse(id, fs->pool);
279 return svn_error_createf
280 (SVN_ERR_FS_ID_NOT_FOUND, 0,
281 _("Reference to non-existent node '%s' in filesystem '%s'"),
282 id_str->data, fs->path);
285 /* Return TRUE, if FS is of a format that supports block-read and the
286 feature has been enabled. */
288 use_block_read(svn_fs_t *fs)
290 fs_fs_data_t *ffd = fs->fsap_data;
291 return svn_fs_fs__use_log_addressing(fs) && ffd->use_block_read;
295 svn_fs_fs__fixup_expanded_size(svn_fs_t *fs,
296 representation_t *rep,
297 apr_pool_t *scratch_pool)
299 svn_checksum_t checksum;
300 svn_checksum_t *empty_md5;
301 svn_fs_fs__revision_file_t *revision_file;
302 svn_fs_fs__rep_header_t *rep_header;
304 /* Anything to do at all?
306 * Note that a 0 SIZE is only possible for PLAIN reps due to the SVN\1
307 * magic prefix in any DELTA rep. */
308 if (!rep || rep->expanded_size != 0 || rep->size == 0)
311 /* This function may only be called for committed data. */
312 assert(!svn_fs_fs__id_txn_used(&rep->txn_id));
314 /* EXPANDED_SIZE is 0. If the MD5 does not match the one for empty
315 * contents, we know that EXPANDED_SIZE == 0 is wrong and needs to
316 * be set to the actual value given by SIZE.
318 * Using svn_checksum_match() will also accept all-zero values for
319 * the MD5 digest and only report a mismatch if the MD5 has actually
321 empty_md5 = svn_checksum_empty_checksum(svn_checksum_md5, scratch_pool);
323 checksum.digest = rep->md5_digest;
324 checksum.kind = svn_checksum_md5;
325 if (!svn_checksum_match(empty_md5, &checksum))
327 rep->expanded_size = rep->size;
331 /* Data in the rep-cache.db does not have MD5 checksums (all zero) on it.
332 * Compare SHA1 instead. */
335 svn_checksum_t *empty_sha1
336 = svn_checksum_empty_checksum(svn_checksum_sha1, scratch_pool);
338 checksum.digest = rep->sha1_digest;
339 checksum.kind = svn_checksum_sha1;
340 if (!svn_checksum_match(empty_sha1, &checksum))
342 rep->expanded_size = rep->size;
347 /* Only two cases are left here.
348 * (1) A non-empty PLAIN rep with a MD5 collision on EMPTY_MD5.
349 * (2) A DELTA rep with zero-length output. */
351 /* SVN always stores a DELTA rep with zero-length output as an empty
352 * sequence of txdelta windows, i.e. as "SVN\1". In that case, SIZE is
353 * 4 bytes. There is no other possible DELTA rep of that size and any
354 * PLAIN rep of 4 bytes would produce a different MD5. Hence, if SIZE is
355 * actually 4 here, we know that this is an empty DELTA rep.
357 * Note that it is technically legal to have DELTA reps with a 0 length
358 * output window. Their on-disk size would be longer. We handle that
359 * case later together with the equally unlikely MD5 collision. */
362 /* EXPANDED_SIZE is already 0. */
366 /* We still have the two options, PLAIN or DELTA rep. At this point, we
367 * are in an extremely unlikely case and can spend some time to figure it
368 * out. So, let's just look at the representation header. */
369 SVN_ERR(open_and_seek_revision(&revision_file, fs, rep->revision,
370 rep->item_index, scratch_pool));
371 SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, revision_file->stream,
372 scratch_pool, scratch_pool));
373 SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
375 /* Only for PLAIN reps do we have to correct EXPANDED_SIZE. */
376 if (rep_header->type == svn_fs_fs__rep_plain)
377 rep->expanded_size = rep->size;
382 /* Correct known issues with committed NODEREV in FS.
383 * Uses SCRATCH_POOL for temporaries.
386 fixup_node_revision(svn_fs_t *fs,
387 node_revision_t *noderev,
388 apr_pool_t *scratch_pool)
390 /* Workaround issue #4031: is-fresh-txn-root in revision files. */
391 noderev->is_fresh_txn_root = FALSE;
393 /* Make sure EXPANDED_SIZE has the correct value for every rep. */
394 SVN_ERR(svn_fs_fs__fixup_expanded_size(fs, noderev->data_rep,
396 SVN_ERR(svn_fs_fs__fixup_expanded_size(fs, noderev->prop_rep,
402 /* Get the node-revision for the node ID in FS.
403 Set *NODEREV_P to the new node-revision structure, allocated in POOL.
404 See svn_fs_fs__get_node_revision, which wraps this and adds another
407 get_node_revision_body(node_revision_t **noderev_p,
409 const svn_fs_id_t *id,
410 apr_pool_t *result_pool,
411 apr_pool_t *scratch_pool)
414 svn_boolean_t is_cached = FALSE;
415 fs_fs_data_t *ffd = fs->fsap_data;
417 if (svn_fs_fs__id_is_txn(id))
421 /* This is a transaction node-rev. Its storage logic is very
422 different from that of rev / pack files. */
423 err = svn_io_file_open(&file,
424 svn_fs_fs__path_txn_node_rev(fs, id,
426 APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
428 if (err && APR_STATUS_IS_ENOENT(err->apr_err))
430 svn_error_clear(err);
431 return svn_error_trace(err_dangling_id(fs, id));
435 return svn_error_trace(err);
438 SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
439 svn_stream_from_aprfile2(file,
442 result_pool, scratch_pool));
446 svn_fs_fs__revision_file_t *revision_file;
448 /* noderevs in rev / pack files can be cached */
449 const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
450 pair_cache_key_t key = { 0 };
451 key.revision = rev_item->revision;
452 key.second = rev_item->number;
454 /* Not found or not applicable. Try a noderev cache lookup.
455 * If that succeeds, we are done here. */
456 if (ffd->node_revision_cache)
458 SVN_ERR(svn_cache__get((void **) noderev_p,
460 ffd->node_revision_cache,
467 /* read the data from disk */
468 SVN_ERR(open_and_seek_revision(&revision_file, fs,
473 if (use_block_read(fs))
475 /* block-read will parse the whole block and will also return
476 the one noderev that we need right now. */
477 SVN_ERR(block_read((void **)noderev_p, fs,
486 /* physical addressing mode reading, parsing and caching */
487 SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
488 revision_file->stream,
491 SVN_ERR(fixup_node_revision(fs, *noderev_p, scratch_pool));
493 /* The noderev is not in cache, yet. Add it, if caching has been enabled. */
494 if (ffd->node_revision_cache)
495 SVN_ERR(svn_cache__set(ffd->node_revision_cache,
501 SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
508 svn_fs_fs__get_node_revision(node_revision_t **noderev_p,
510 const svn_fs_id_t *id,
511 apr_pool_t *result_pool,
512 apr_pool_t *scratch_pool)
514 const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
516 svn_error_t *err = get_node_revision_body(noderev_p, fs, id,
517 result_pool, scratch_pool);
518 if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
520 svn_string_t *id_string = svn_fs_fs__id_unparse(id, scratch_pool);
521 return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
522 "Corrupt node-revision '%s'",
526 SVN_ERR(dbg_log_access(fs,
530 SVN_FS_FS__ITEM_TYPE_NODEREV,
533 return svn_error_trace(err);
537 /* Given a revision file REV_FILE, opened to REV in FS, find the Node-ID
538 of the header located at OFFSET and store it in *ID_P. Allocate
539 temporary variables from POOL. */
541 get_fs_id_at_offset(svn_fs_id_t **id_p,
542 svn_fs_fs__revision_file_t *rev_file,
548 node_revision_t *noderev;
550 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
551 SVN_ERR(svn_fs_fs__read_noderev(&noderev,
555 /* noderev->id is const, get rid of that */
556 *id_p = svn_fs_fs__id_copy(noderev->id, pool);
558 /* assert that the txn_id is REV
559 * (asserting on offset would be harder because we the rev_offset is not
561 assert(svn_fs_fs__id_rev(*id_p) == rev);
567 /* Given an open revision file REV_FILE in FS for REV, locate the trailer that
568 specifies the offset to the root node-id and to the changed path
569 information. Store the root node offset in *ROOT_OFFSET and the
570 changed path offset in *CHANGES_OFFSET. If either of these
571 pointers is NULL, do nothing with it.
573 Allocate temporary variables from POOL. */
575 get_root_changes_offset(apr_off_t *root_offset,
576 apr_off_t *changes_offset,
577 svn_fs_fs__revision_file_t *rev_file,
582 fs_fs_data_t *ffd = fs->fsap_data;
583 apr_off_t rev_offset;
584 apr_seek_where_t seek_relative;
585 svn_stringbuf_t *trailer;
591 /* Determine where to seek to in the file.
593 If we've got a pack file, we want to seek to the end of the desired
594 revision. But we don't track that, so we seek to the beginning of the
597 Unless the next revision is in a different file, in which case, we can
598 just seek to the end of the pack file -- just like we do in the
600 if (rev_file->is_packed && ((rev + 1) % ffd->max_files_per_dir != 0))
602 SVN_ERR(svn_fs_fs__get_packed_offset(&end, fs, rev + 1, pool));
603 seek_relative = APR_SET;
607 seek_relative = APR_END;
611 /* Offset of the revision from the start of the pack file, if applicable. */
612 if (rev_file->is_packed)
613 SVN_ERR(svn_fs_fs__get_packed_offset(&rev_offset, fs, rev, pool));
617 /* We will assume that the last line containing the two offsets
618 will never be longer than 64 characters. */
619 SVN_ERR(svn_io_file_seek(rev_file->file, seek_relative, &end, pool));
621 if (end < sizeof(buffer))
623 len = (apr_size_t)end;
628 len = sizeof(buffer);
629 start = end - sizeof(buffer);
632 /* Read in this last block, from which we will identify the last line. */
633 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, start, pool));
634 SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, len, NULL, NULL,
637 /* Parse the last line. */
638 trailer = svn_stringbuf_ncreate(buffer, len, pool);
639 SVN_ERR(svn_fs_fs__parse_revision_trailer(root_offset,
644 /* return absolute offsets */
646 *root_offset += rev_offset;
648 *changes_offset += rev_offset;
654 svn_fs_fs__rev_get_root(svn_fs_id_t **root_id_p,
657 apr_pool_t *result_pool,
658 apr_pool_t *scratch_pool)
660 fs_fs_data_t *ffd = fs->fsap_data;
661 SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool));
663 if (svn_fs_fs__use_log_addressing(fs))
665 *root_id_p = svn_fs_fs__id_create_root(rev, result_pool);
669 svn_fs_fs__revision_file_t *revision_file;
670 apr_off_t root_offset;
671 svn_fs_id_t *root_id = NULL;
672 svn_boolean_t is_cached;
674 SVN_ERR(svn_cache__get((void **) root_id_p, &is_cached,
675 ffd->rev_root_id_cache, &rev, result_pool));
679 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev,
680 scratch_pool, scratch_pool));
681 SVN_ERR(get_root_changes_offset(&root_offset, NULL,
682 revision_file, fs, rev,
685 SVN_ERR(get_fs_id_at_offset(&root_id, revision_file, fs, rev,
686 root_offset, result_pool));
688 SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
690 SVN_ERR(svn_cache__set(ffd->rev_root_id_cache, &rev, root_id,
693 *root_id_p = root_id;
699 /* Describes a lazily opened rev / pack file. Instances will be shared
700 between multiple instances of rep_state_t. */
701 typedef struct shared_file_t
703 /* The opened file. NULL while file is not open, yet. */
704 svn_fs_fs__revision_file_t *rfile;
706 /* file system to open the file in */
709 /* a revision contained in the FILE. Since this file may be shared,
710 that value may be different from REP_STATE_T->REVISION. */
711 svn_revnum_t revision;
713 /* pool to use when creating the FILE. This guarantees that the file
714 remains open / valid beyond the respective local context that required
715 the file to be opened eventually. */
719 /* Represents where in the current svndiff data block each
720 representation is. */
721 typedef struct rep_state_t
723 /* shared lazy-open rev/pack file structure */
724 shared_file_t *sfile;
725 /* The txdelta window cache to use or NULL. */
726 svn_cache__t *raw_window_cache;
727 /* Caches raw (unparsed) windows. May be NULL. */
728 svn_cache__t *window_cache;
729 /* Caches un-deltified windows. May be NULL. */
730 svn_cache__t *combined_cache;
731 /* revision containing the representation */
732 svn_revnum_t revision;
733 /* representation's item index in REVISION */
734 apr_uint64_t item_index;
735 /* length of the header at the start of the rep.
736 0 iff this is rep is stored in a container
737 (i.e. does not have a header) */
738 apr_size_t header_size;
739 apr_off_t start; /* The starting offset for the raw
740 svndiff/plaintext data minus header.
741 -1 if the offset is yet unknown. */
742 apr_off_t current;/* The current offset relative to START. */
743 apr_off_t size; /* The on-disk size of the representation. */
744 int ver; /* If a delta, what svndiff version?
745 -1 for unknown delta version. */
746 int chunk_index; /* number of the window to read */
749 /* Simple wrapper around svn_io_file_get_offset to simplify callers. */
751 get_file_offset(apr_off_t *offset,
755 return svn_error_trace(svn_io_file_get_offset(offset,
756 rs->sfile->rfile->file,
760 /* Simple wrapper around svn_io_file_aligned_seek to simplify callers. */
762 rs_aligned_seek(rep_state_t *rs,
763 apr_off_t *buffer_start,
767 fs_fs_data_t *ffd = rs->sfile->fs->fsap_data;
768 return svn_error_trace(svn_io_file_aligned_seek(rs->sfile->rfile->file,
770 buffer_start, offset,
774 /* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */
776 auto_open_shared_file(shared_file_t *file)
778 if (file->rfile == NULL)
779 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&file->rfile, file->fs,
780 file->revision, file->pool,
786 /* Set RS->START to the begin of the representation raw in RS->FILE->FILE,
787 if that hasn't been done yet. Use POOL for temporary allocations. */
789 auto_set_start_offset(rep_state_t *rs, apr_pool_t *pool)
793 SVN_ERR(svn_fs_fs__item_offset(&rs->start, rs->sfile->fs,
794 rs->sfile->rfile, rs->revision, NULL,
795 rs->item_index, pool));
796 rs->start += rs->header_size;
802 /* Set RS->VER depending on what is found in the already open RS->FILE->FILE
803 if the diff version is still unknown. Use POOL for temporary allocations.
806 auto_read_diff_version(rep_state_t *rs, apr_pool_t *pool)
811 SVN_ERR(rs_aligned_seek(rs, NULL, rs->start, pool));
812 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
813 sizeof(buf), NULL, NULL, pool));
815 /* ### Layering violation */
816 if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N')))
817 return svn_error_create
818 (SVN_ERR_FS_CORRUPT, NULL,
819 _("Malformed svndiff data in representation"));
829 /* See create_rep_state, which wraps this and adds another error. */
831 create_rep_state_body(rep_state_t **rep_state,
832 svn_fs_fs__rep_header_t **rep_header,
833 shared_file_t **shared_file,
834 representation_t *rep,
836 apr_pool_t *result_pool,
837 apr_pool_t *scratch_pool)
839 fs_fs_data_t *ffd = fs->fsap_data;
840 rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs));
841 svn_fs_fs__rep_header_t *rh;
842 svn_boolean_t is_cached = FALSE;
843 apr_uint64_t estimated_window_storage;
847 * - refers to a valid revision,
848 * - refers to a packed revision,
849 * - as does the rep we want to read, and
850 * - refers to the same pack file as the rep
851 * we can re-use the same, already open file object
853 svn_boolean_t reuse_shared_file
854 = shared_file && *shared_file && (*shared_file)->rfile
855 && SVN_IS_VALID_REVNUM((*shared_file)->revision)
856 && (*shared_file)->revision < ffd->min_unpacked_rev
857 && rep->revision < ffd->min_unpacked_rev
858 && ( ((*shared_file)->revision / ffd->max_files_per_dir)
859 == (rep->revision / ffd->max_files_per_dir));
861 pair_cache_key_t key;
862 key.revision = rep->revision;
863 key.second = rep->item_index;
865 /* continue constructing RS and RA */
866 rs->size = rep->size;
867 rs->revision = rep->revision;
868 rs->item_index = rep->item_index;
869 rs->raw_window_cache = use_block_read(fs) ? ffd->raw_window_cache : NULL;
873 /* Very long files stored as self-delta will produce a huge number of
874 delta windows. Don't cache them lest we don't thrash the cache.
875 Since we don't know the depth of the delta chain, let's assume, the
876 whole contents get rewritten 3 times.
878 estimated_window_storage = 4 * (rep->expanded_size + SVN_DELTA_WINDOW_SIZE);
879 estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX);
881 rs->window_cache = ffd->txdelta_window_cache
882 && svn_cache__is_cachable(ffd->txdelta_window_cache,
883 (apr_size_t)estimated_window_storage)
884 ? ffd->txdelta_window_cache
886 rs->combined_cache = ffd->combined_window_cache
887 && svn_cache__is_cachable(ffd->combined_window_cache,
888 (apr_size_t)estimated_window_storage)
889 ? ffd->combined_window_cache
892 /* cache lookup, i.e. skip reading the rep header if possible */
893 if (ffd->rep_header_cache && !svn_fs_fs__id_txn_used(&rep->txn_id))
894 SVN_ERR(svn_cache__get((void **) &rh, &is_cached,
895 ffd->rep_header_cache, &key, result_pool));
897 /* initialize the (shared) FILE member in RS */
898 if (reuse_shared_file)
900 rs->sfile = *shared_file;
904 shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file));
905 file->revision = rep->revision;
906 file->pool = result_pool;
910 /* remember the current file, if suggested by the caller */
915 /* read rep header, if necessary */
918 /* ensure file is open and navigate to the start of rep header */
919 if (reuse_shared_file)
923 /* ... we can re-use the same, already open file object.
924 * This implies that we don't read from a txn.
926 rs->sfile = *shared_file;
927 SVN_ERR(auto_open_shared_file(rs->sfile));
928 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rs->sfile->rfile,
929 rep->revision, NULL, rep->item_index,
931 SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
935 /* otherwise, create a new file object. May or may not be
938 SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep,
942 SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
943 result_pool, scratch_pool));
944 SVN_ERR(get_file_offset(&rs->start, rs, result_pool));
946 /* populate the cache if appropriate */
947 if (! svn_fs_fs__id_txn_used(&rep->txn_id))
949 if (use_block_read(fs))
950 SVN_ERR(block_read(NULL, fs, rep->revision, rep->item_index,
951 rs->sfile->rfile, result_pool, scratch_pool));
953 if (ffd->rep_header_cache)
954 SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh,
960 SVN_ERR(dbg_log_access(fs, rep->revision, rep->item_index, rh,
961 SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
963 rs->header_size = rh->header_size;
967 if (rh->type == svn_fs_fs__rep_plain)
968 /* This is a plaintext, so just return the current rep_state. */
971 /* skip "SVNx" diff marker */
977 /* Read the rep args for REP in filesystem FS and create a rep_state
978 for reading the representation. Return the rep_state in *REP_STATE
979 and the rep header in *REP_HEADER, both allocated in POOL.
981 When reading multiple reps, i.e. a skip delta chain, you may provide
982 non-NULL SHARED_FILE. (If SHARED_FILE is not NULL, in the first
983 call it should be a pointer to NULL.) The function will use this
984 variable to store the previous call results and tries to re-use it.
985 This may result in significant savings in I/O for packed files and
986 number of open file handles.
989 create_rep_state(rep_state_t **rep_state,
990 svn_fs_fs__rep_header_t **rep_header,
991 shared_file_t **shared_file,
992 representation_t *rep,
994 apr_pool_t *result_pool,
995 apr_pool_t *scratch_pool)
997 svn_error_t *err = create_rep_state_body(rep_state, rep_header,
998 shared_file, rep, fs,
999 result_pool, scratch_pool);
1000 if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
1002 fs_fs_data_t *ffd = fs->fsap_data;
1003 const char *rep_str;
1005 /* ### This always returns "-1" for transaction reps, because
1006 ### this particular bit of code doesn't know if the rep is
1007 ### stored in the protorev or in the mutable area (for props
1008 ### or dir contents). It is pretty rare for FSFS to *read*
1009 ### from the protorev file, though, so this is probably OK.
1010 ### And anyone going to debug corruption errors is probably
1011 ### going to jump straight to this comment anyway! */
1013 ? svn_fs_fs__unparse_representation
1014 (rep, ffd->format, TRUE, scratch_pool, scratch_pool)->data
1017 return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
1018 "Corrupt representation '%s'",
1021 /* ### Call representation_string() ? */
1022 return svn_error_trace(err);
1026 svn_fs_fs__check_rep(representation_t *rep,
1029 apr_pool_t *scratch_pool)
1031 if (svn_fs_fs__use_log_addressing(fs))
1034 svn_fs_fs__p2l_entry_t *entry;
1035 svn_fs_fs__revision_file_t *rev_file = NULL;
1037 /* Reuse the revision file provided by *HINT, if it is given and
1038 * actually the rev / pack file that we want. */
1039 svn_revnum_t start_rev = svn_fs_fs__packed_base_rev(fs, rep->revision);
1041 rev_file = *(svn_fs_fs__revision_file_t **)hint;
1043 if (rev_file == NULL || rev_file->start_revision != start_rev)
1044 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rep->revision,
1045 scratch_pool, scratch_pool));
1050 /* This will auto-retry if there was a background pack. */
1051 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rep->revision,
1052 NULL, rep->item_index, scratch_pool));
1054 /* This may fail if there is a background pack operation (can't auto-
1055 retry because the item offset lookup has to be redone as well). */
1056 SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file,
1057 rep->revision, offset,
1058 scratch_pool, scratch_pool));
1061 || entry->type < SVN_FS_FS__ITEM_TYPE_FILE_REP
1062 || entry->type > SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
1063 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
1064 _("No representation found at offset %s "
1065 "for item %s in revision %ld"),
1066 apr_off_t_toa(scratch_pool, offset),
1067 apr_psprintf(scratch_pool,
1068 "%" APR_UINT64_T_FMT,
1075 svn_fs_fs__rep_header_t *rep_header;
1077 /* ### Should this be using read_rep_line() directly? */
1078 SVN_ERR(create_rep_state(&rs, &rep_header, (shared_file_t**)hint,
1079 rep, fs, scratch_pool, scratch_pool));
1082 return SVN_NO_ERROR;
1086 svn_fs_fs__rep_chain_length(int *chain_length,
1088 representation_t *rep,
1090 apr_pool_t *scratch_pool)
1092 fs_fs_data_t *ffd = fs->fsap_data;
1093 svn_revnum_t shard_size = ffd->max_files_per_dir
1094 ? ffd->max_files_per_dir
1096 apr_pool_t *subpool = svn_pool_create(scratch_pool);
1097 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1098 svn_boolean_t is_delta = FALSE;
1101 svn_revnum_t last_shard = rep->revision / shard_size;
1103 /* Check whether the length of the deltification chain is acceptable.
1104 * Otherwise, shared reps may form a non-skipping delta chain in
1106 representation_t base_rep = *rep;
1108 /* re-use open files between iterations */
1109 shared_file_t *file_hint = NULL;
1111 svn_fs_fs__rep_header_t *header;
1113 /* follow the delta chain towards the end but for at most
1114 * MAX_CHAIN_LENGTH steps. */
1117 rep_state_t *rep_state;
1119 svn_pool_clear(iterpool);
1121 if (base_rep.revision / shard_size != last_shard)
1123 last_shard = base_rep.revision / shard_size;
1127 SVN_ERR(create_rep_state_body(&rep_state,
1135 base_rep.revision = header->base_revision;
1136 base_rep.item_index = header->base_item_index;
1137 base_rep.size = header->base_length;
1138 svn_fs_fs__id_txn_reset(&base_rep.txn_id);
1139 is_delta = header->type == svn_fs_fs__rep_delta;
1141 /* Clear it the SUBPOOL once in a while. Doing it too frequently
1142 * renders the FILE_HINT ineffective. Doing too infrequently, may
1143 * leave us with too many open file handles.
1145 * Note that this is mostly about efficiency, with larger values
1146 * being more efficient, and any non-zero value is legal here. When
1147 * reading deltified contents, we may keep 10s of rev files open at
1148 * the same time and the system has to cope with that. Thus, the
1149 * limit of 16 chosen below is in the same ballpark.
1152 if (count % 16 == 0)
1155 svn_pool_clear(subpool);
1158 while (is_delta && base_rep.revision);
1160 *chain_length = count;
1161 *shard_count = shards;
1162 svn_pool_destroy(subpool);
1163 svn_pool_destroy(iterpool);
1165 return SVN_NO_ERROR;
1168 struct rep_read_baton
1170 /* The FS from which we're reading. */
1173 /* Representation to read. */
1174 representation_t rep;
1176 /* If not NULL, this is the base for the first delta window in rs_list */
1177 svn_stringbuf_t *base_window;
1179 /* The state of all prior delta representations. */
1180 apr_array_header_t *rs_list;
1182 /* The plaintext state, if there is a plaintext. */
1183 rep_state_t *src_state;
1185 /* The index of the current delta chunk, if we are reading a delta. */
1188 /* The buffer where we store undeltified data. */
1193 /* A checksum context for summing the data read in order to verify it.
1194 Note: we don't need to use the sha1 checksum because we're only doing
1195 data verification, for which md5 is perfectly safe. */
1196 svn_checksum_ctx_t *md5_checksum_ctx;
1198 svn_boolean_t checksum_finalized;
1200 /* The stored checksum of the representation we are reading, its
1201 length, and the amount we've read so far. Some of this
1202 information is redundant with rs_list and src_state, but it's
1203 convenient for the checksumming code to have it here. */
1204 unsigned char md5_digest[APR_MD5_DIGESTSIZE];
1209 /* The key for the fulltext cache for this rep, if there is a
1211 pair_cache_key_t fulltext_cache_key;
1212 /* The text we've been reading, if we're going to cache it. */
1213 svn_stringbuf_t *current_fulltext;
1215 /* If not NULL, attempt to read the data from this cache.
1216 Once that lookup fails, reset it to NULL. */
1217 svn_cache__t *fulltext_cache;
1219 /* Bytes delivered from the FULLTEXT_CACHE so far. If the next
1220 lookup fails, we need to skip that much data from the reconstructed
1221 window stream before we continue normal operation. */
1222 svn_filesize_t fulltext_delivered;
1224 /* Used for temporary allocations during the read. */
1227 /* Pool used to store file handles and other data that is persistant
1228 for the entire stream read. */
1229 apr_pool_t *filehandle_pool;
1232 /* Set window key in *KEY to address the window described by RS.
1233 For convenience, return the KEY. */
1234 static window_cache_key_t *
1235 get_window_key(window_cache_key_t *key, rep_state_t *rs)
1237 assert(rs->revision <= APR_UINT32_MAX);
1238 key->revision = (apr_uint32_t)rs->revision;
1239 key->item_index = rs->item_index;
1240 key->chunk_index = rs->chunk_index;
1245 /* Implement svn_cache__partial_getter_func_t for raw txdelta windows.
1246 * Parse the raw data and return a svn_fs_fs__txdelta_cached_window_t.
1248 static svn_error_t *
1249 parse_raw_window(void **out,
1251 apr_size_t data_len,
1253 apr_pool_t *result_pool)
1255 svn_string_t raw_window;
1256 svn_stream_t *stream;
1258 /* unparsed and parsed window */
1259 const svn_fs_fs__raw_cached_window_t *window
1260 = (const svn_fs_fs__raw_cached_window_t *)data;
1261 svn_fs_fs__txdelta_cached_window_t *result
1262 = apr_pcalloc(result_pool, sizeof(*result));
1264 /* create a read stream taking the raw window as input */
1265 raw_window.data = svn_temp_deserializer__ptr(window,
1266 (const void * const *)&window->window.data);
1267 raw_window.len = window->window.len;
1268 stream = svn_stream_from_string(&raw_window, result_pool);
1271 SVN_ERR(svn_txdelta_read_svndiff_window(&result->window, stream, window->ver,
1274 /* complete the window and return it */
1275 result->end_offset = window->end_offset;
1278 return SVN_NO_ERROR;
1282 /* Read the WINDOW_P number CHUNK_INDEX for the representation given in
1283 * rep state RS from the current FSFS session's cache. This will be a
1284 * no-op and IS_CACHED will be set to FALSE if no cache has been given.
1285 * If a cache is available IS_CACHED will inform the caller about the
1286 * success of the lookup. Allocations of the window in will be made
1287 * from RESULT_POOL. Use SCRATCH_POOL for temporary allocations.
1289 * If the information could be found, put RS to CHUNK_INDEX.
1291 static svn_error_t *
1292 get_cached_window(svn_txdelta_window_t **window_p,
1295 svn_boolean_t *is_cached,
1296 apr_pool_t *result_pool,
1297 apr_pool_t *scratch_pool)
1299 if (! rs->window_cache)
1301 /* txdelta window has not been enabled */
1306 /* ask the cache for the desired txdelta window */
1307 svn_fs_fs__txdelta_cached_window_t *cached_window;
1308 window_cache_key_t key = { 0 };
1309 get_window_key(&key, rs);
1310 key.chunk_index = chunk_index;
1311 SVN_ERR(svn_cache__get((void **) &cached_window,
1317 /* If we did not find a parsed txdelta window, we might have a raw
1318 version of it in our cache. If so, read, parse and re-cache it. */
1319 if (!*is_cached && rs->raw_window_cache)
1321 SVN_ERR(svn_cache__get_partial((void **) &cached_window, is_cached,
1322 rs->raw_window_cache, &key,
1323 parse_raw_window, NULL, result_pool));
1325 SVN_ERR(svn_cache__set(rs->window_cache, &key, cached_window,
1329 /* Return cached information. */
1332 /* found it. Pass it back to the caller. */
1333 *window_p = cached_window->window;
1335 /* manipulate the RS as if we just read the data */
1336 rs->current = cached_window->end_offset;
1337 rs->chunk_index = chunk_index;
1341 return SVN_NO_ERROR;
1344 /* Store the WINDOW read for the rep state RS in the current FSFS
1345 * session's cache. This will be a no-op if no cache has been given.
1346 * Temporary allocations will be made from SCRATCH_POOL. */
1347 static svn_error_t *
1348 set_cached_window(svn_txdelta_window_t *window,
1350 apr_pool_t *scratch_pool)
1352 if (rs->window_cache)
1354 /* store the window and the first offset _past_ it */
1355 svn_fs_fs__txdelta_cached_window_t cached_window;
1356 window_cache_key_t key = {0};
1358 cached_window.window = window;
1359 cached_window.end_offset = rs->current;
1361 /* but key it with the start offset because that is the known state
1362 * when we will look it up */
1363 SVN_ERR(svn_cache__set(rs->window_cache,
1364 get_window_key(&key, rs),
1369 return SVN_NO_ERROR;
1372 /* Read the WINDOW_P for the rep state RS from the current FSFS session's
1373 * cache. This will be a no-op and IS_CACHED will be set to FALSE if no
1374 * cache has been given. If a cache is available IS_CACHED will inform
1375 * the caller about the success of the lookup. Allocations (of the window
1376 * in particular) will be made from POOL.
1378 static svn_error_t *
1379 get_cached_combined_window(svn_stringbuf_t **window_p,
1381 svn_boolean_t *is_cached,
1384 if (! rs->combined_cache)
1386 /* txdelta window has not been enabled */
1391 /* ask the cache for the desired txdelta window */
1392 window_cache_key_t key = { 0 };
1393 return svn_cache__get((void **)window_p,
1396 get_window_key(&key, rs),
1400 return SVN_NO_ERROR;
1403 /* Store the WINDOW read for the rep state RS in the current FSFS session's
1404 * cache. This will be a no-op if no cache has been given.
1405 * Temporary allocations will be made from SCRATCH_POOL. */
1406 static svn_error_t *
1407 set_cached_combined_window(svn_stringbuf_t *window,
1409 apr_pool_t *scratch_pool)
1411 if (rs->combined_cache)
1413 /* but key it with the start offset because that is the known state
1414 * when we will look it up */
1415 window_cache_key_t key = { 0 };
1416 return svn_cache__set(rs->combined_cache,
1417 get_window_key(&key, rs),
1422 return SVN_NO_ERROR;
1425 /* Build an array of rep_state structures in *LIST giving the delta
1426 reps from first_rep to a plain-text or self-compressed rep. Set
1427 *SRC_STATE to the plain-text rep we find at the end of the chain,
1428 or to NULL if the final delta representation is self-compressed.
1429 The representation to start from is designated by filesystem FS, id
1430 ID, and representation REP.
1431 Also, set *WINDOW_P to the base window content for *LIST, if it
1432 could be found in cache. Otherwise, *LIST will contain the base
1433 representation for the whole delta chain. */
1434 static svn_error_t *
1435 build_rep_list(apr_array_header_t **list,
1436 svn_stringbuf_t **window_p,
1437 rep_state_t **src_state,
1439 representation_t *first_rep,
1442 representation_t rep;
1443 rep_state_t *rs = NULL;
1444 svn_fs_fs__rep_header_t *rep_header;
1445 svn_boolean_t is_cached = FALSE;
1446 shared_file_t *shared_file = NULL;
1447 apr_pool_t *iterpool = svn_pool_create(pool);
1449 *list = apr_array_make(pool, 1, sizeof(rep_state_t *));
1452 /* for the top-level rep, we need the rep_args */
1453 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs, pool,
1457 svn_pool_clear(iterpool);
1459 /* fetch state, if that has not been done already */
1461 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file,
1462 &rep, fs, pool, iterpool));
1464 /* for txn reps, there won't be a cached combined window */
1465 if ( !svn_fs_fs__id_txn_used(&rep.txn_id)
1466 && rep.expanded_size < SVN_DELTA_WINDOW_SIZE)
1467 SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached, pool));
1471 /* We already have a reconstructed window in our cache.
1472 Write a pseudo rep_state with the full length. */
1475 rs->size = (*window_p)->len;
1480 if (rep_header->type == svn_fs_fs__rep_plain)
1482 /* This is a plaintext, so just return the current rep_state. */
1487 /* Push this rep onto the list. If it's self-compressed, we're done. */
1488 APR_ARRAY_PUSH(*list, rep_state_t *) = rs;
1489 if (rep_header->type == svn_fs_fs__rep_self_delta)
1495 rep.revision = rep_header->base_revision;
1496 rep.item_index = rep_header->base_item_index;
1497 rep.size = rep_header->base_length;
1498 svn_fs_fs__id_txn_reset(&rep.txn_id);
1502 svn_pool_destroy(iterpool);
1504 return SVN_NO_ERROR;
1508 /* Create a rep_read_baton structure for node revision NODEREV in
1509 filesystem FS and store it in *RB_P. Perform all allocations in
1510 POOL. If rep is mutable, it must be for file contents. */
1511 static svn_error_t *
1512 rep_read_get_baton(struct rep_read_baton **rb_p,
1514 representation_t *rep,
1515 pair_cache_key_t fulltext_cache_key,
1518 struct rep_read_baton *b;
1520 b = apr_pcalloc(pool, sizeof(*b));
1523 b->base_window = NULL;
1526 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
1527 b->checksum_finalized = FALSE;
1528 memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest));
1529 b->len = rep->expanded_size;
1531 b->fulltext_cache_key = fulltext_cache_key;
1532 b->pool = svn_pool_create(pool);
1533 b->filehandle_pool = svn_pool_create(pool);
1534 b->fulltext_cache = NULL;
1535 b->fulltext_delivered = 0;
1536 b->current_fulltext = NULL;
1538 /* Save our output baton. */
1541 return SVN_NO_ERROR;
1544 /* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
1545 window into *NWIN. Note that RS->CHUNK_INDEX will be THIS_CHUNK rather
1546 than THIS_CHUNK + 1 when this function returns. */
1547 static svn_error_t *
1548 read_delta_window(svn_txdelta_window_t **nwin, int this_chunk,
1549 rep_state_t *rs, apr_pool_t *result_pool,
1550 apr_pool_t *scratch_pool)
1552 svn_boolean_t is_cached;
1553 apr_off_t start_offset;
1554 apr_off_t end_offset;
1555 apr_pool_t *iterpool;
1557 SVN_ERR_ASSERT(rs->chunk_index <= this_chunk);
1559 SVN_ERR(dbg_log_access(rs->sfile->fs, rs->revision, rs->item_index,
1560 NULL, SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
1562 /* Read the next window. But first, try to find it in the cache. */
1563 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1564 result_pool, scratch_pool));
1566 return SVN_NO_ERROR;
1568 /* someone has to actually read the data from file. Open it */
1569 SVN_ERR(auto_open_shared_file(rs->sfile));
1571 /* invoke the 'block-read' feature for non-txn data.
1572 However, don't do that if we are in the middle of some representation,
1573 because the block is unlikely to contain other data. */
1574 if ( rs->chunk_index == 0
1575 && SVN_IS_VALID_REVNUM(rs->revision)
1576 && use_block_read(rs->sfile->fs)
1577 && rs->raw_window_cache)
1579 SVN_ERR(block_read(NULL, rs->sfile->fs, rs->revision, rs->item_index,
1580 rs->sfile->rfile, result_pool, scratch_pool));
1582 /* reading the whole block probably also provided us with the
1583 desired txdelta window */
1584 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1585 result_pool, scratch_pool));
1587 return SVN_NO_ERROR;
1590 /* data is still not cached -> we need to read it.
1591 Make sure we have all the necessary info. */
1592 SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1593 SVN_ERR(auto_read_diff_version(rs, scratch_pool));
1595 /* RS->FILE may be shared between RS instances -> make sure we point
1596 * to the right data. */
1597 start_offset = rs->start + rs->current;
1598 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, scratch_pool));
1600 /* Skip windows to reach the current chunk if we aren't there yet. */
1601 iterpool = svn_pool_create(scratch_pool);
1602 while (rs->chunk_index < this_chunk)
1604 svn_pool_clear(iterpool);
1605 SVN_ERR(svn_txdelta_skip_svndiff_window(rs->sfile->rfile->file,
1606 rs->ver, iterpool));
1608 SVN_ERR(get_file_offset(&start_offset, rs, iterpool));
1609 rs->current = start_offset - rs->start;
1610 if (rs->current >= rs->size)
1611 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1612 _("Reading one svndiff window read "
1613 "beyond the end of the "
1616 svn_pool_destroy(iterpool);
1618 /* Actually read the next window. */
1619 SVN_ERR(svn_txdelta_read_svndiff_window(nwin, rs->sfile->rfile->stream,
1620 rs->ver, result_pool));
1621 SVN_ERR(get_file_offset(&end_offset, rs, scratch_pool));
1622 rs->current = end_offset - rs->start;
1623 if (rs->current > rs->size)
1624 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1625 _("Reading one svndiff window read beyond "
1626 "the end of the representation"));
1628 /* the window has not been cached before, thus cache it now
1629 * (if caching is used for them at all) */
1630 if (SVN_IS_VALID_REVNUM(rs->revision))
1631 SVN_ERR(set_cached_window(*nwin, rs, scratch_pool));
1633 return SVN_NO_ERROR;
1636 /* Read SIZE bytes from the representation RS and return it in *NWIN. */
1637 static svn_error_t *
1638 read_plain_window(svn_stringbuf_t **nwin, rep_state_t *rs,
1639 apr_size_t size, apr_pool_t *result_pool,
1640 apr_pool_t *scratch_pool)
1644 /* RS->FILE may be shared between RS instances -> make sure we point
1645 * to the right data. */
1646 SVN_ERR(auto_open_shared_file(rs->sfile));
1647 SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1649 offset = rs->start + rs->current;
1650 SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
1652 /* Read the plain data. */
1653 *nwin = svn_stringbuf_create_ensure(size, result_pool);
1654 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, (*nwin)->data, size,
1655 NULL, NULL, result_pool));
1656 (*nwin)->data[size] = 0;
1659 rs->current += (apr_off_t)size;
1661 return SVN_NO_ERROR;
1664 /* Skip SIZE bytes from the PLAIN representation RS. */
1665 static svn_error_t *
1666 skip_plain_window(rep_state_t *rs,
1670 rs->current += (apr_off_t)size;
1672 return SVN_NO_ERROR;
1675 /* Get the undeltified window that is a result of combining all deltas
1676 from the current desired representation identified in *RB with its
1677 base representation. Store the window in *RESULT. */
1678 static svn_error_t *
1679 get_combined_window(svn_stringbuf_t **result,
1680 struct rep_read_baton *rb)
1682 apr_pool_t *pool, *new_pool, *window_pool;
1684 apr_array_header_t *windows;
1685 svn_stringbuf_t *source, *buf = rb->base_window;
1687 apr_pool_t *iterpool;
1689 /* Read all windows that we need to combine. This is fine because
1690 the size of each window is relatively small (100kB) and skip-
1691 delta limits the number of deltas in a chain to well under 100.
1692 Stop early if one of them does not depend on its predecessors. */
1693 window_pool = svn_pool_create(rb->pool);
1694 windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *));
1695 iterpool = svn_pool_create(rb->pool);
1696 for (i = 0; i < rb->rs_list->nelts; ++i)
1698 svn_txdelta_window_t *window;
1700 svn_pool_clear(iterpool);
1702 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1703 SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool,
1706 APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window;
1707 if (window->src_ops == 0)
1714 /* Combine in the windows from the other delta reps. */
1715 pool = svn_pool_create(rb->pool);
1716 for (--i; i >= 0; --i)
1718 svn_txdelta_window_t *window;
1720 svn_pool_clear(iterpool);
1722 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1723 window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
1725 /* Maybe, we've got a PLAIN start representation. If we do, read
1726 as much data from it as the needed for the txdelta window's source
1728 Note that BUF / SOURCE may only be NULL in the first iteration.
1729 Also note that we may have short-cut reading the delta chain --
1730 in which case SRC_OPS is 0 and it might not be a PLAIN rep. */
1732 if (source == NULL && rb->src_state != NULL)
1734 /* Even if we don't need the source rep now, we still must keep
1735 * its read offset in sync with what we might need for the next
1737 if (window->src_ops)
1738 SVN_ERR(read_plain_window(&source, rb->src_state,
1742 SVN_ERR(skip_plain_window(rb->src_state, window->sview_len));
1745 /* Combine this window with the current one. */
1746 new_pool = svn_pool_create(rb->pool);
1747 buf = svn_stringbuf_create_ensure(window->tview_len, new_pool);
1748 buf->len = window->tview_len;
1750 svn_txdelta_apply_instructions(window, source ? source->data : NULL,
1751 buf->data, &buf->len);
1752 if (buf->len != window->tview_len)
1753 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1754 _("svndiff window length is "
1757 /* Cache windows only if the whole rep content could be read as a
1758 single chunk. Only then will no other chunk need a deeper RS
1759 list than the cached chunk. */
1760 if ( (rb->chunk_index == 0) && (rs->current == rs->size)
1761 && SVN_IS_VALID_REVNUM(rs->revision))
1762 SVN_ERR(set_cached_combined_window(buf, rs, new_pool));
1766 /* Cycle pools so that we only need to hold three windows at a time. */
1767 svn_pool_destroy(pool);
1770 svn_pool_destroy(iterpool);
1772 svn_pool_destroy(window_pool);
1775 return SVN_NO_ERROR;
1778 /* Returns whether or not the expanded fulltext of the file is cachable
1779 * based on its size SIZE. The decision depends on the cache used by FFD.
1781 static svn_boolean_t
1782 fulltext_size_is_cachable(fs_fs_data_t *ffd, svn_filesize_t size)
1784 return (size < APR_SIZE_MAX)
1785 && svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size);
1788 /* Close method used on streams returned by read_representation().
1790 static svn_error_t *
1791 rep_read_contents_close(void *baton)
1793 struct rep_read_baton *rb = baton;
1795 svn_pool_destroy(rb->pool);
1796 svn_pool_destroy(rb->filehandle_pool);
1798 return SVN_NO_ERROR;
1801 /* Return the next *LEN bytes of the rep from our plain / delta windows
1802 and store them in *BUF. */
1803 static svn_error_t *
1804 get_contents_from_windows(struct rep_read_baton *rb,
1808 apr_size_t copy_len, remaining = *len;
1812 /* Special case for when there are no delta reps, only a plain
1814 if (rb->rs_list->nelts == 0)
1816 copy_len = remaining;
1819 if (rb->base_window != NULL)
1821 /* We got the desired rep directly from the cache.
1822 This is where we need the pseudo rep_state created
1823 by build_rep_list(). */
1824 apr_size_t offset = (apr_size_t)rs->current;
1825 if (offset >= rb->base_window->len)
1827 else if (copy_len > rb->base_window->len - offset)
1828 copy_len = rb->base_window->len - offset;
1830 memcpy (cur, rb->base_window->data + offset, copy_len);
1835 if (((apr_off_t) copy_len) > rs->size - rs->current)
1836 copy_len = (apr_size_t) (rs->size - rs->current);
1838 SVN_ERR(auto_open_shared_file(rs->sfile));
1839 SVN_ERR(auto_set_start_offset(rs, rb->pool));
1841 offset = rs->start + rs->current;
1842 SVN_ERR(rs_aligned_seek(rs, NULL, offset, rb->pool));
1843 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, cur,
1844 copy_len, NULL, NULL, rb->pool));
1847 rs->current += copy_len;
1849 return SVN_NO_ERROR;
1852 while (remaining > 0)
1854 /* If we have buffered data from a previous chunk, use that. */
1857 /* Determine how much to copy from the buffer. */
1858 copy_len = rb->buf_len - rb->buf_pos;
1859 if (copy_len > remaining)
1860 copy_len = remaining;
1862 /* Actually copy the data. */
1863 memcpy(cur, rb->buf + rb->buf_pos, copy_len);
1864 rb->buf_pos += copy_len;
1866 remaining -= copy_len;
1868 /* If the buffer is all used up, clear it and empty the
1870 if (rb->buf_pos == rb->buf_len)
1872 svn_pool_clear(rb->pool);
1878 svn_stringbuf_t *sbuf = NULL;
1880 rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *);
1881 if (rs->current == rs->size)
1884 /* Get more buffered data by evaluating a chunk. */
1885 SVN_ERR(get_combined_window(&sbuf, rb));
1888 rb->buf_len = sbuf->len;
1889 rb->buf = sbuf->data;
1896 return SVN_NO_ERROR;
1899 /* Baton type for get_fulltext_partial. */
1900 typedef struct fulltext_baton_t
1902 /* Target buffer to write to; of at least LEN bytes. */
1905 /* Offset within the respective fulltext at which we shall start to
1906 copy data into BUFFER. */
1909 /* Number of bytes to copy. The actual amount may be less in case
1910 the fulltext is short(er). */
1913 /* Number of bytes actually copied into BUFFER. */
1917 /* Implement svn_cache__partial_getter_func_t for fulltext caches.
1918 * From the fulltext in DATA, we copy the range specified by the
1919 * fulltext_baton_t* BATON into the buffer provided by that baton.
1920 * OUT and RESULT_POOL are not used.
1922 static svn_error_t *
1923 get_fulltext_partial(void **out,
1925 apr_size_t data_len,
1927 apr_pool_t *result_pool)
1929 fulltext_baton_t *fulltext_baton = baton;
1931 /* We cached the fulltext with an NUL appended to it. */
1932 apr_size_t fulltext_len = data_len - 1;
1934 /* Clip the copy range to what the fulltext size allows. */
1935 apr_size_t start = MIN(fulltext_baton->start, fulltext_len);
1936 fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len);
1938 /* Copy the data to the output buffer and be done. */
1939 memcpy(fulltext_baton->buffer, (const char *)data + start,
1940 fulltext_baton->read);
1942 return SVN_NO_ERROR;
1945 /* Find the fulltext specified in BATON in the fulltext cache given
1946 * as well by BATON. If that succeeds, set *CACHED to TRUE and copy
1947 * up to the next *LEN bytes into BUFFER. Set *LEN to the actual
1948 * number of bytes copied.
1950 static svn_error_t *
1951 get_contents_from_fulltext(svn_boolean_t *cached,
1952 struct rep_read_baton *baton,
1957 fulltext_baton_t fulltext_baton;
1959 SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered
1960 == baton->fulltext_delivered);
1961 fulltext_baton.buffer = buffer;
1962 fulltext_baton.start = (apr_size_t)baton->fulltext_delivered;
1963 fulltext_baton.len = *len;
1964 fulltext_baton.read = 0;
1966 SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache,
1967 &baton->fulltext_cache_key,
1968 get_fulltext_partial, &fulltext_baton,
1973 baton->fulltext_delivered += fulltext_baton.read;
1974 *len = fulltext_baton.read;
1977 return SVN_NO_ERROR;
1980 /* Determine the optimal size of a string buf that shall receive a
1981 * (full-) text of NEEDED bytes.
1983 * The critical point is that those buffers may be very large and
1984 * can cause memory fragmentation. We apply simple heuristics to
1985 * make fragmentation less likely.
1988 optimimal_allocation_size(apr_size_t needed)
1990 /* For all allocations, assume some overhead that is shared between
1991 * OS memory managemnt, APR memory management and svn_stringbuf_t. */
1992 const apr_size_t overhead = 0x400;
1995 /* If an allocation size if safe for other ephemeral buffers, it should
1996 * be safe for ours. */
1997 if (needed <= SVN__STREAM_CHUNK_SIZE)
2000 /* Paranoia edge case:
2001 * Skip our heuristics if they created arithmetical overflow.
2002 * Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */
2003 if (needed >= APR_SIZE_MAX / 2 - overhead)
2006 /* As per definition SVN__STREAM_CHUNK_SIZE is a power of two.
2007 * Since we know NEEDED to be larger than that, use it as the
2010 * Heuristics: Allocate a power-of-two number of bytes that fit
2011 * NEEDED plus some OVERHEAD. The APR allocator
2012 * will round it up to the next full page size.
2014 optimal = SVN__STREAM_CHUNK_SIZE;
2015 while (optimal - overhead < needed)
2018 /* This is above or equal to NEEDED. */
2019 return optimal - overhead;
2022 /* After a fulltext cache lookup failure, we will continue to read from
2023 * combined delta or plain windows. However, we must first make that data
2024 * stream in BATON catch up tho the position LEN already delivered from the
2025 * fulltext cache. Also, we need to store the reconstructed fulltext if we
2026 * want to cache it at the end.
2028 static svn_error_t *
2029 skip_contents(struct rep_read_baton *baton,
2032 svn_error_t *err = SVN_NO_ERROR;
2034 /* Do we want to cache the reconstructed fulltext? */
2035 if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision))
2038 svn_filesize_t to_alloc = MAX(len, baton->len);
2040 /* This should only be happening if BATON->LEN and LEN are
2041 * cacheable, implying they fit into memory. */
2042 SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc);
2044 /* Allocate the fulltext buffer. */
2045 baton->current_fulltext = svn_stringbuf_create_ensure(
2046 optimimal_allocation_size((apr_size_t)to_alloc),
2047 baton->filehandle_pool);
2049 /* Read LEN bytes from the window stream and store the data
2050 * in the fulltext buffer (will be filled by further reads later). */
2051 baton->current_fulltext->len = (apr_size_t)len;
2052 baton->current_fulltext->data[(apr_size_t)len] = 0;
2054 buffer = baton->current_fulltext->data;
2055 while (len > 0 && !err)
2057 apr_size_t to_read = (apr_size_t)len;
2058 err = get_contents_from_windows(baton, buffer, &to_read);
2063 /* Make the MD5 calculation catch up with the data delivered
2064 * (we did not run MD5 on the data that we took from the cache). */
2067 SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
2068 baton->current_fulltext->data,
2069 baton->current_fulltext->len));
2070 baton->off += baton->current_fulltext->len;
2075 /* Simply drain LEN bytes from the window stream. */
2076 apr_pool_t *subpool = svn_pool_create(baton->pool);
2077 char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
2079 while (len > 0 && !err)
2081 apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE
2082 ? SVN__STREAM_CHUNK_SIZE
2085 err = get_contents_from_windows(baton, buffer, &to_read);
2088 /* Make the MD5 calculation catch up with the data delivered
2089 * (we did not run MD5 on the data that we took from the cache). */
2092 SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
2094 baton->off += to_read;
2098 svn_pool_destroy(subpool);
2101 return svn_error_trace(err);
2104 /* BATON is of type `rep_read_baton'; read the next *LEN bytes of the
2105 representation and store them in *BUF. Sum as we read and verify
2106 the MD5 sum at the end. This is a READ_FULL_FN for svn_stream_t. */
2107 static svn_error_t *
2108 rep_read_contents(void *baton,
2112 struct rep_read_baton *rb = baton;
2113 apr_size_t len_requested = *len;
2115 /* Get data from the fulltext cache for as long as we can. */
2116 if (rb->fulltext_cache)
2118 svn_boolean_t cached;
2119 SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len));
2121 return SVN_NO_ERROR;
2123 /* Cache miss. From now on, we will never read from the fulltext
2124 * cache for this representation anymore. */
2125 rb->fulltext_cache = NULL;
2128 /* No fulltext cache to help us. We must read from the window stream. */
2131 /* Window stream not initialized, yet. Do it now. */
2132 rb->len = rb->rep.expanded_size;
2133 SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2134 &rb->src_state, rb->fs, &rb->rep,
2135 rb->filehandle_pool));
2137 /* In case we did read from the fulltext cache before, make the
2138 * window stream catch up. Also, initialize the fulltext buffer
2139 * if we want to cache the fulltext at the end. */
2140 SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
2143 /* Get the next block of data.
2144 * Keep in mind that the representation might be empty and leave us
2145 * already positioned at the end of the rep. */
2146 if (rb->off == rb->len)
2149 SVN_ERR(get_contents_from_windows(rb, buf, len));
2151 if (rb->current_fulltext)
2152 svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
2154 /* This is a FULL_READ_FN so a short read implies EOF and we can
2155 verify the length. */
2157 if (*len < len_requested && rb->off != rb->len)
2159 /* A warning rather than an error to allow the data to be
2160 retrieved when the length is wrong but the data is
2161 present, i.e. if repository corruption has stored the wrong
2163 svn_error_t *err = svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2164 _("Length mismatch while reading representation:"
2167 apr_psprintf(rb->pool, "%" SVN_FILESIZE_T_FMT,
2169 apr_psprintf(rb->pool, "%" SVN_FILESIZE_T_FMT,
2172 rb->fs->warning(rb->fs->warning_baton, err);
2173 svn_error_clear(err);
2176 /* Perform checksumming. We want to check the checksum as soon as
2177 the last byte of data is read, in case the caller never performs
2178 a short read, but we don't want to finalize the MD5 context
2180 if (!rb->checksum_finalized)
2182 SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len));
2183 if (rb->off == rb->len)
2185 svn_checksum_t *md5_checksum;
2186 svn_checksum_t expected;
2187 expected.kind = svn_checksum_md5;
2188 expected.digest = rb->md5_digest;
2190 rb->checksum_finalized = TRUE;
2191 SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx,
2193 if (!svn_checksum_match(md5_checksum, &expected))
2194 return svn_error_create(SVN_ERR_FS_CORRUPT,
2195 svn_checksum_mismatch_err(&expected, md5_checksum,
2197 _("Checksum mismatch while reading representation")),
2202 if (rb->off == rb->len && rb->current_fulltext)
2204 fs_fs_data_t *ffd = rb->fs->fsap_data;
2205 SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key,
2206 rb->current_fulltext, rb->pool));
2207 rb->current_fulltext = NULL;
2210 return SVN_NO_ERROR;
2214 svn_fs_fs__get_contents(svn_stream_t **contents_p,
2216 representation_t *rep,
2217 svn_boolean_t cache_fulltext,
2222 *contents_p = svn_stream_empty(pool);
2226 fs_fs_data_t *ffd = fs->fsap_data;
2227 struct rep_read_baton *rb;
2229 pair_cache_key_t fulltext_cache_key = { 0 };
2230 fulltext_cache_key.revision = rep->revision;
2231 fulltext_cache_key.second = rep->item_index;
2233 /* Initialize the reader baton. Some members may added lazily
2234 * while reading from the stream */
2235 SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
2237 /* Make the stream attempt fulltext cache lookups if the fulltext
2238 * is cacheable. If it is not, then also don't try to buffer and
2240 if (ffd->fulltext_cache && cache_fulltext
2241 && SVN_IS_VALID_REVNUM(rep->revision)
2242 && fulltext_size_is_cachable(ffd, rep->expanded_size))
2244 rb->fulltext_cache = ffd->fulltext_cache;
2248 /* This will also prevent the reconstructed fulltext from being
2249 put into the cache. */
2250 rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM;
2253 *contents_p = svn_stream_create(rb, pool);
2254 svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2256 svn_stream_set_close(*contents_p, rep_read_contents_close);
2259 return SVN_NO_ERROR;
2263 svn_fs_fs__get_contents_from_file(svn_stream_t **contents_p,
2265 representation_t *rep,
2270 struct rep_read_baton *rb;
2271 pair_cache_key_t fulltext_cache_key = { SVN_INVALID_REVNUM, 0 };
2272 rep_state_t *rs = apr_pcalloc(pool, sizeof(*rs));
2273 svn_fs_fs__rep_header_t *rh;
2275 /* Initialize the reader baton. Some members may added lazily
2276 * while reading from the stream. */
2277 SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
2279 /* Continue constructing RS. Leave caches as NULL. */
2280 rs->size = rep->size;
2281 rs->revision = SVN_INVALID_REVNUM;
2286 /* Provide just enough file access info to allow for a basic read from
2287 * FILE but leave all index / footer info with empty values b/c FILE
2288 * probably is not a complete revision file. */
2289 rs->sfile = apr_pcalloc(pool, sizeof(*rs->sfile));
2290 rs->sfile->revision = rep->revision;
2291 rs->sfile->pool = pool;
2293 rs->sfile->rfile = apr_pcalloc(pool, sizeof(*rs->sfile->rfile));
2294 rs->sfile->rfile->start_revision = SVN_INVALID_REVNUM;
2295 rs->sfile->rfile->file = file;
2296 rs->sfile->rfile->stream = svn_stream_from_aprfile2(file, TRUE, pool);
2298 /* Read the rep header. */
2299 SVN_ERR(aligned_seek(fs, file, NULL, offset, pool));
2300 SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
2302 SVN_ERR(get_file_offset(&rs->start, rs, pool));
2303 rs->header_size = rh->header_size;
2305 /* Log the access. */
2306 SVN_ERR(dbg_log_access(fs, SVN_INVALID_REVNUM, 0, rh,
2307 SVN_FS_FS__ITEM_TYPE_ANY_REP, pool));
2309 /* Build the representation list (delta chain). */
2310 if (rh->type == svn_fs_fs__rep_plain)
2312 rb->rs_list = apr_array_make(pool, 0, sizeof(rep_state_t *));
2315 else if (rh->type == svn_fs_fs__rep_self_delta)
2317 rb->rs_list = apr_array_make(pool, 1, sizeof(rep_state_t *));
2318 APR_ARRAY_PUSH(rb->rs_list, rep_state_t *) = rs;
2319 rb->src_state = NULL;
2323 representation_t next_rep = { 0 };
2325 /* skip "SVNx" diff marker */
2328 /* REP's base rep is inside a proper revision.
2329 * It can be reconstructed in the usual way. */
2330 next_rep.revision = rh->base_revision;
2331 next_rep.item_index = rh->base_item_index;
2332 next_rep.size = rh->base_length;
2333 svn_fs_fs__id_txn_reset(&next_rep.txn_id);
2335 SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2336 &rb->src_state, rb->fs, &next_rep,
2337 rb->filehandle_pool));
2339 /* Insert the access to REP as the first element of the delta chain. */
2340 SVN_ERR(svn_sort__array_insert2(rb->rs_list, &rs, 0));
2343 /* Now, the baton is complete and we can assemble the stream around it. */
2344 *contents_p = svn_stream_create(rb, pool);
2345 svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2347 svn_stream_set_close(*contents_p, rep_read_contents_close);
2349 return SVN_NO_ERROR;
2352 /* Baton for cache_access_wrapper. Wraps the original parameters of
2353 * svn_fs_fs__try_process_file_content().
2355 typedef struct cache_access_wrapper_baton_t
2357 svn_fs_process_contents_func_t func;
2359 } cache_access_wrapper_baton_t;
2361 /* Wrapper to translate between svn_fs_process_contents_func_t and
2362 * svn_cache__partial_getter_func_t.
2364 static svn_error_t *
2365 cache_access_wrapper(void **out,
2367 apr_size_t data_len,
2371 cache_access_wrapper_baton_t *wrapper_baton = baton;
2373 SVN_ERR(wrapper_baton->func((const unsigned char *)data,
2374 data_len - 1, /* cache adds terminating 0 */
2375 wrapper_baton->baton,
2378 /* non-NULL value to signal the calling cache that all went well */
2381 return SVN_NO_ERROR;
2385 svn_fs_fs__try_process_file_contents(svn_boolean_t *success,
2387 node_revision_t *noderev,
2388 svn_fs_process_contents_func_t processor,
2392 representation_t *rep = noderev->data_rep;
2395 fs_fs_data_t *ffd = fs->fsap_data;
2396 pair_cache_key_t fulltext_cache_key = { 0 };
2398 fulltext_cache_key.revision = rep->revision;
2399 fulltext_cache_key.second = rep->item_index;
2400 if (ffd->fulltext_cache && SVN_IS_VALID_REVNUM(rep->revision)
2401 && fulltext_size_is_cachable(ffd, rep->expanded_size))
2403 cache_access_wrapper_baton_t wrapper_baton;
2406 wrapper_baton.func = processor;
2407 wrapper_baton.baton = baton;
2408 return svn_cache__get_partial(&dummy, success,
2409 ffd->fulltext_cache,
2410 &fulltext_cache_key,
2411 cache_access_wrapper,
2418 return SVN_NO_ERROR;
2422 /* Baton used when reading delta windows. */
2423 struct delta_read_baton
2426 unsigned char md5_digest[APR_MD5_DIGESTSIZE];
2429 /* This implements the svn_txdelta_next_window_fn_t interface. */
2430 static svn_error_t *
2431 delta_read_next_window(svn_txdelta_window_t **window, void *baton,
2434 struct delta_read_baton *drb = baton;
2435 apr_pool_t *scratch_pool = svn_pool_create(pool);
2438 if (drb->rs->current < drb->rs->size)
2440 SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool,
2442 drb->rs->chunk_index++;
2445 svn_pool_destroy(scratch_pool);
2447 return SVN_NO_ERROR;
2450 /* This implements the svn_txdelta_md5_digest_fn_t interface. */
2451 static const unsigned char *
2452 delta_read_md5_digest(void *baton)
2454 struct delta_read_baton *drb = baton;
2455 return drb->md5_digest;
2458 /* Return a txdelta stream for on-disk representation REP_STATE
2459 * of TARGET. Allocate the result in POOL.
2461 static svn_txdelta_stream_t *
2462 get_storaged_delta_stream(rep_state_t *rep_state,
2463 node_revision_t *target,
2466 /* Create the delta read baton. */
2467 struct delta_read_baton *drb = apr_pcalloc(pool, sizeof(*drb));
2468 drb->rs = rep_state;
2469 memcpy(drb->md5_digest, target->data_rep->md5_digest,
2470 sizeof(drb->md5_digest));
2471 return svn_txdelta_stream_create(drb, delta_read_next_window,
2472 delta_read_md5_digest, pool);
2476 svn_fs_fs__get_file_delta_stream(svn_txdelta_stream_t **stream_p,
2478 node_revision_t *source,
2479 node_revision_t *target,
2482 svn_stream_t *source_stream, *target_stream;
2483 rep_state_t *rep_state;
2484 svn_fs_fs__rep_header_t *rep_header;
2485 fs_fs_data_t *ffd = fs->fsap_data;
2487 /* Try a shortcut: if the target is stored as a delta against the source,
2488 then just use that delta. However, prefer using the fulltext cache
2489 whenever that is available. */
2490 if (target->data_rep && (source || ! ffd->fulltext_cache))
2492 /* Read target's base rep if any. */
2493 SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL,
2494 target->data_rep, fs, pool, pool));
2496 if (source && source->data_rep && target->data_rep)
2498 /* If that matches source, then use this delta as is.
2499 Note that we want an actual delta here. E.g. a self-delta would
2500 not be good enough. */
2501 if (rep_header->type == svn_fs_fs__rep_delta
2502 && rep_header->base_revision == source->data_rep->revision
2503 && rep_header->base_item_index == source->data_rep->item_index)
2505 *stream_p = get_storaged_delta_stream(rep_state, target, pool);
2506 return SVN_NO_ERROR;
2511 /* We want a self-delta. There is a fair chance that TARGET got
2512 added in this revision and is already stored in the requested
2514 if (rep_header->type == svn_fs_fs__rep_self_delta)
2516 *stream_p = get_storaged_delta_stream(rep_state, target, pool);
2517 return SVN_NO_ERROR;
2521 /* Don't keep file handles open for longer than necessary. */
2522 if (rep_state->sfile->rfile)
2524 SVN_ERR(svn_fs_fs__close_revision_file(rep_state->sfile->rfile));
2525 rep_state->sfile->rfile = NULL;
2529 /* Read both fulltexts and construct a delta. */
2531 SVN_ERR(svn_fs_fs__get_contents(&source_stream, fs, source->data_rep,
2534 source_stream = svn_stream_empty(pool);
2535 SVN_ERR(svn_fs_fs__get_contents(&target_stream, fs, target->data_rep,
2538 /* Because source and target stream will already verify their content,
2539 * there is no need to do this once more. In particular if the stream
2540 * content is being fetched from cache. */
2541 svn_txdelta2(stream_p, source_stream, target_stream, FALSE, pool);
2543 return SVN_NO_ERROR;
2546 /* Return TRUE when all svn_fs_dirent_t* in ENTRIES are already sorted
2547 by their respective name. */
2548 static svn_boolean_t
2549 sorted(apr_array_header_t *entries)
2553 const svn_fs_dirent_t * const *dirents = (const void *)entries->elts;
2554 for (i = 0; i < entries->nelts-1; ++i)
2555 if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
2561 /* Compare the names of the two dirents given in **A and **B. */
2563 compare_dirents(const void *a, const void *b)
2565 const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
2566 const svn_fs_dirent_t *rhs = *((const svn_fs_dirent_t * const *) b);
2568 return strcmp(lhs->name, rhs->name);
2571 /* Compare the name of the dirents given in **A with the C string in *B. */
2573 compare_dirent_name(const void *a, const void *b)
2575 const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
2576 const char *rhs = b;
2578 return strcmp(lhs->name, rhs);
2581 /* Into *ENTRIES_P, read all directories entries from the key-value text in
2582 * STREAM. If INCREMENTAL is TRUE, read until the end of the STREAM and
2583 * update the data. ID is provided for nicer error messages.
2585 static svn_error_t *
2586 read_dir_entries(apr_array_header_t **entries_p,
2587 svn_stream_t *stream,
2588 svn_boolean_t incremental,
2589 const svn_fs_id_t *id,
2590 apr_pool_t *result_pool,
2591 apr_pool_t *scratch_pool)
2593 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2594 apr_hash_t *hash = NULL;
2595 const char *terminator = SVN_HASH_TERMINATOR;
2596 apr_array_header_t *entries = NULL;
2599 hash = svn_hash__make(scratch_pool);
2601 entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *));
2603 /* Read until the terminator (non-incremental) or the end of STREAM
2604 (incremental mode). In the latter mode, we use a temporary HASH
2605 to make updating and removing entries cheaper. */
2608 svn_hash__entry_t entry;
2609 svn_fs_dirent_t *dirent;
2612 svn_pool_clear(iterpool);
2613 SVN_ERR_W(svn_hash__read_entry(&entry, stream, terminator,
2614 incremental, iterpool),
2615 apr_psprintf(iterpool,
2616 _("Directory representation corrupt in '%s'"),
2617 svn_fs_fs__id_unparse(id, scratch_pool)->data));
2619 /* End of directory? */
2620 if (entry.key == NULL)
2622 /* In incremental mode, we skip the terminator and read the
2623 increments following it until the end of the stream. */
2624 if (incremental && terminator)
2630 /* Deleted entry? */
2631 if (entry.val == NULL)
2633 /* We must be in incremental mode */
2635 apr_hash_set(hash, entry.key, entry.keylen, NULL);
2639 /* Add a new directory entry. */
2640 dirent = apr_pcalloc(result_pool, sizeof(*dirent));
2641 dirent->name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
2643 str = svn_cstring_tokenize(" ", &entry.val);
2645 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2646 _("Directory entry corrupt in '%s'"),
2647 svn_fs_fs__id_unparse(id, scratch_pool)->data);
2649 if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0)
2651 dirent->kind = svn_node_file;
2653 else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0)
2655 dirent->kind = svn_node_dir;
2659 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2660 _("Directory entry corrupt in '%s'"),
2661 svn_fs_fs__id_unparse(id, scratch_pool)->data);
2664 str = svn_cstring_tokenize(" ", &entry.val);
2666 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2667 _("Directory entry corrupt in '%s'"),
2668 svn_fs_fs__id_unparse(id, scratch_pool)->data);
2670 SVN_ERR(svn_fs_fs__id_parse(&dirent->id, str, result_pool));
2672 /* In incremental mode, update the hash; otherwise, write to the
2673 * final array. Be sure to use hash keys that survive this iteration.
2676 apr_hash_set(hash, dirent->name, entry.keylen, dirent);
2678 APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = dirent;
2681 /* Convert container to a sorted array. */
2684 apr_hash_index_t *hi;
2686 entries = apr_array_make(result_pool, apr_hash_count(hash),
2687 sizeof(svn_fs_dirent_t *));
2688 for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi))
2689 APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = apr_hash_this_val(hi);
2692 if (!sorted(entries))
2693 svn_sort__array(entries, compare_dirents);
2695 svn_pool_destroy(iterpool);
2697 *entries_p = entries;
2698 return SVN_NO_ERROR;
2701 /* For directory NODEREV in FS, return the *FILESIZE of its in-txn
2702 * representation. If the directory representation is comitted data,
2703 * set *FILESIZE to SVN_INVALID_FILESIZE. Use SCRATCH_POOL for temporaries.
2705 static svn_error_t *
2706 get_txn_dir_info(svn_filesize_t *filesize,
2708 node_revision_t *noderev,
2709 apr_pool_t *scratch_pool)
2711 if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
2713 const svn_io_dirent2_t *dirent;
2714 const char *filename;
2716 filename = svn_fs_fs__path_txn_node_children(fs, noderev->id,
2719 SVN_ERR(svn_io_stat_dirent2(&dirent, filename, FALSE, FALSE,
2720 scratch_pool, scratch_pool));
2721 *filesize = dirent->filesize;
2725 *filesize = SVN_INVALID_FILESIZE;
2728 return SVN_NO_ERROR;
2731 /* Fetch the contents of a directory into DIR. Values are stored
2732 as filename to string mappings; further conversion is necessary to
2733 convert them into svn_fs_dirent_t values. */
2734 static svn_error_t *
2735 get_dir_contents(svn_fs_fs__dir_data_t *dir,
2737 node_revision_t *noderev,
2738 apr_pool_t *result_pool,
2739 apr_pool_t *scratch_pool)
2741 svn_stream_t *contents;
2743 /* Initialize the result. */
2744 dir->txn_filesize = SVN_INVALID_FILESIZE;
2746 /* Read dir contents - unless there is none in which case we are done. */
2747 if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
2749 /* Get location & current size of the directory representation. */
2750 const char *filename;
2753 filename = svn_fs_fs__path_txn_node_children(fs, noderev->id,
2756 /* The representation is mutable. Read the old directory
2757 contents from the mutable children file, followed by the
2758 changes we've made in this transaction. */
2759 SVN_ERR(svn_io_file_open(&file, filename, APR_READ | APR_BUFFERED,
2760 APR_OS_DEFAULT, scratch_pool));
2762 /* Obtain txn children file size. */
2763 SVN_ERR(svn_io_file_size_get(&dir->txn_filesize, file, scratch_pool));
2765 contents = svn_stream_from_aprfile2(file, FALSE, scratch_pool);
2766 SVN_ERR(read_dir_entries(&dir->entries, contents, TRUE, noderev->id,
2767 result_pool, scratch_pool));
2768 SVN_ERR(svn_stream_close(contents));
2770 else if (noderev->data_rep)
2772 /* Undeltify content before parsing it. Otherwise, we could only
2773 * parse it byte-by-byte.
2775 apr_size_t len = noderev->data_rep->expanded_size;
2776 svn_stringbuf_t *text;
2778 /* The representation is immutable. Read it normally. */
2779 SVN_ERR(svn_fs_fs__get_contents(&contents, fs, noderev->data_rep,
2780 FALSE, scratch_pool));
2781 SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, scratch_pool));
2782 SVN_ERR(svn_stream_close(contents));
2784 /* de-serialize hash */
2785 contents = svn_stream_from_stringbuf(text, scratch_pool);
2786 SVN_ERR(read_dir_entries(&dir->entries, contents, FALSE, noderev->id,
2787 result_pool, scratch_pool));
2791 dir->entries = apr_array_make(result_pool, 0, sizeof(svn_fs_dirent_t *));
2794 return SVN_NO_ERROR;
2798 /* Return the cache object in FS responsible to storing the directory the
2799 * NODEREV plus the corresponding *KEY. If no cache exists, return NULL.
2800 * PAIR_KEY must point to some key struct, which does not need to be
2801 * initialized. We use it to avoid dynamic allocation.
2803 static svn_cache__t *
2804 locate_dir_cache(svn_fs_t *fs,
2806 pair_cache_key_t *pair_key,
2807 node_revision_t *noderev,
2810 fs_fs_data_t *ffd = fs->fsap_data;
2811 if (!noderev->data_rep)
2813 /* no data rep -> empty directory.
2814 A NULL key causes a cache miss. */
2816 return ffd->dir_cache;
2819 if (svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
2821 /* data in txns requires the expensive fs_id-based addressing mode */
2822 *key = svn_fs_fs__id_unparse(noderev->id, pool)->data;
2824 return ffd->txn_dir_cache;
2828 /* committed data can use simple rev,item pairs */
2829 pair_key->revision = noderev->data_rep->revision;
2830 pair_key->second = noderev->data_rep->item_index;
2833 return ffd->dir_cache;
2838 svn_fs_fs__rep_contents_dir(apr_array_header_t **entries_p,
2840 node_revision_t *noderev,
2841 apr_pool_t *result_pool,
2842 apr_pool_t *scratch_pool)
2844 pair_cache_key_t pair_key = { 0 };
2846 svn_fs_fs__dir_data_t *dir;
2848 /* find the cache we may use */
2849 svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
2853 svn_boolean_t found;
2855 SVN_ERR(svn_cache__get((void **)&dir, &found, cache, key,
2859 /* Verify that the cached dir info is not stale
2860 * (no-op for committed data). */
2861 svn_filesize_t filesize;
2862 SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
2864 if (filesize == dir->txn_filesize)
2866 /* Still valid. Done. */
2867 *entries_p = dir->entries;
2868 return SVN_NO_ERROR;
2873 /* Read in the directory contents. */
2874 dir = apr_pcalloc(scratch_pool, sizeof(*dir));
2875 SVN_ERR(get_dir_contents(dir, fs, noderev, result_pool, scratch_pool));
2876 *entries_p = dir->entries;
2878 /* Update the cache, if we are to use one.
2880 * Don't even attempt to serialize very large directories; it would cause
2881 * an unnecessary memory allocation peak. 150 bytes/entry is about right.
2883 if (cache && svn_cache__is_cachable(cache, 150 * dir->entries->nelts))
2884 SVN_ERR(svn_cache__set(cache, key, dir, scratch_pool));
2886 return SVN_NO_ERROR;
2890 svn_fs_fs__find_dir_entry(apr_array_header_t *entries,
2894 svn_fs_dirent_t **result
2895 = svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
2896 return result ? *result : NULL;
2900 svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
2902 node_revision_t *noderev,
2904 apr_pool_t *result_pool,
2905 apr_pool_t *scratch_pool)
2907 extract_dir_entry_baton_t baton;
2908 svn_boolean_t found = FALSE;
2910 /* find the cache we may use */
2911 pair_cache_key_t pair_key = { 0 };
2913 svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
2917 svn_filesize_t filesize;
2918 SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
2921 baton.txn_filesize = filesize;
2923 SVN_ERR(svn_cache__get_partial((void **)dirent,
2927 svn_fs_fs__extract_dir_entry,
2932 /* fetch data from disk if we did not find it in the cache */
2933 if (! found || baton.out_of_date)
2935 svn_fs_dirent_t *entry;
2936 svn_fs_dirent_t *entry_copy = NULL;
2937 svn_fs_fs__dir_data_t dir;
2939 /* Read in the directory contents. */
2940 SVN_ERR(get_dir_contents(&dir, fs, noderev, scratch_pool,
2943 /* Update the cache, if we are to use one.
2945 * Don't even attempt to serialize very large directories; it would
2946 * cause an unnecessary memory allocation peak. 150 bytes / entry is
2948 if (cache && svn_cache__is_cachable(cache, 150 * dir.entries->nelts))
2949 SVN_ERR(svn_cache__set(cache, key, &dir, scratch_pool));
2951 /* find desired entry and return a copy in POOL, if found */
2952 entry = svn_fs_fs__find_dir_entry(dir.entries, name, NULL);
2955 entry_copy = apr_palloc(result_pool, sizeof(*entry_copy));
2956 entry_copy->name = apr_pstrdup(result_pool, entry->name);
2957 entry_copy->id = svn_fs_fs__id_copy(entry->id, result_pool);
2958 entry_copy->kind = entry->kind;
2961 *dirent = entry_copy;
2964 return SVN_NO_ERROR;
2968 svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
2970 node_revision_t *noderev,
2973 apr_hash_t *proplist;
2974 svn_stream_t *stream;
2976 if (noderev->prop_rep && svn_fs_fs__id_txn_used(&noderev->prop_rep->txn_id))
2979 const char *filename
2980 = svn_fs_fs__path_txn_node_props(fs, noderev->id, pool);
2981 proplist = apr_hash_make(pool);
2983 SVN_ERR(svn_stream_open_readonly(&stream, filename, pool, pool));
2984 err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
2987 svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
2989 err = svn_error_compose_create(err, svn_stream_close(stream));
2990 return svn_error_quick_wrapf(err,
2991 _("malformed property list for node-revision '%s' in '%s'"),
2992 id_str->data, filename);
2994 SVN_ERR(svn_stream_close(stream));
2996 else if (noderev->prop_rep)
2999 fs_fs_data_t *ffd = fs->fsap_data;
3000 representation_t *rep = noderev->prop_rep;
3001 pair_cache_key_t key = { 0 };
3003 key.revision = rep->revision;
3004 key.second = rep->item_index;
3005 if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
3007 svn_boolean_t is_cached;
3008 SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached,
3009 ffd->properties_cache, &key, pool));
3011 return SVN_NO_ERROR;
3014 proplist = apr_hash_make(pool);
3015 SVN_ERR(svn_fs_fs__get_contents(&stream, fs, noderev->prop_rep, FALSE,
3017 err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
3020 svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
3022 err = svn_error_compose_create(err, svn_stream_close(stream));
3023 return svn_error_quick_wrapf(err,
3024 _("malformed property list for node-revision '%s'"),
3027 SVN_ERR(svn_stream_close(stream));
3029 if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
3030 SVN_ERR(svn_cache__set(ffd->properties_cache, &key, proplist, pool));
3034 /* return an empty prop list if the node doesn't have any props */
3035 proplist = apr_hash_make(pool);
3038 *proplist_p = proplist;
3040 return SVN_NO_ERROR;
3044 svn_fs_fs__create_changes_context(svn_fs_fs__changes_context_t **context,
3047 apr_pool_t *result_pool)
3049 svn_fs_fs__changes_context_t *result = apr_pcalloc(result_pool,
3052 result->revision = rev;
3053 result->rev_file_pool = result_pool;
3056 return SVN_NO_ERROR;
3060 svn_fs_fs__get_changes(apr_array_header_t **changes,
3061 svn_fs_fs__changes_context_t *context,
3062 apr_pool_t *result_pool,
3063 apr_pool_t *scratch_pool)
3065 apr_off_t item_index = SVN_FS_FS__ITEM_INDEX_CHANGES;
3066 svn_boolean_t found;
3067 fs_fs_data_t *ffd = context->fs->fsap_data;
3068 svn_fs_fs__changes_list_t *changes_list;
3070 pair_cache_key_t key;
3071 key.revision = context->revision;
3072 key.second = context->next;
3074 /* try cache lookup first */
3076 if (ffd->changes_cache)
3078 SVN_ERR(svn_cache__get((void **)&changes_list, &found,
3079 ffd->changes_cache, &key, result_pool));
3088 /* read changes from revision file */
3090 if (!context->revision_file)
3092 SVN_ERR(svn_fs_fs__ensure_revision_exists(context->revision,
3095 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&context->revision_file,
3098 context->rev_file_pool,
3102 if (use_block_read(context->fs))
3104 /* 'block-read' will probably populate the cache with the data
3105 * that we want. However, we won't want to force it to process
3106 * very large change lists as part of this prefetching mechanism.
3107 * Those would be better handled by the iterative code below. */
3108 SVN_ERR(block_read(NULL, context->fs,
3109 context->revision, SVN_FS_FS__ITEM_INDEX_CHANGES,
3110 context->revision_file, scratch_pool,
3113 /* This may succeed now ... */
3114 SVN_ERR(svn_cache__get((void **)&changes_list, &found,
3115 ffd->changes_cache, &key, result_pool));
3118 /* If we still have no data, read it here. */
3121 apr_off_t changes_offset;
3123 /* Addressing is very different for old formats
3124 * (needs to read the revision trailer). */
3125 if (svn_fs_fs__use_log_addressing(context->fs))
3127 SVN_ERR(svn_fs_fs__item_offset(&changes_offset, context->fs,
3128 context->revision_file,
3129 context->revision, NULL,
3130 SVN_FS_FS__ITEM_INDEX_CHANGES,
3135 SVN_ERR(get_root_changes_offset(NULL, &changes_offset,
3136 context->revision_file,
3137 context->fs, context->revision,
3140 /* This variable will be used for debug logging only. */
3141 item_index = changes_offset;
3144 /* Actual reading and parsing are the same, though. */
3145 SVN_ERR(aligned_seek(context->fs, context->revision_file->file,
3146 NULL, changes_offset + context->next_offset,
3149 SVN_ERR(svn_fs_fs__read_changes(changes,
3150 context->revision_file->stream,
3151 SVN_FS_FS__CHANGES_BLOCK_SIZE,
3152 result_pool, scratch_pool));
3154 /* Construct the info object for the entries block we just read. */
3155 changes_list = apr_pcalloc(scratch_pool, sizeof(*changes_list));
3156 SVN_ERR(svn_io_file_get_offset(&changes_list->end_offset,
3157 context->revision_file->file,
3159 changes_list->end_offset -= changes_offset;
3160 changes_list->start_offset = context->next_offset;
3161 changes_list->count = (*changes)->nelts;
3162 changes_list->changes = (change_t **)(*changes)->elts;
3163 changes_list->eol = changes_list->count < SVN_FS_FS__CHANGES_BLOCK_SIZE;
3165 /* cache for future reference */
3167 if (ffd->changes_cache)
3168 SVN_ERR(svn_cache__set(ffd->changes_cache, &key, changes_list,
3175 /* Return the block as a "proper" APR array. */
3176 (*changes) = apr_array_make(result_pool, 0, sizeof(void *));
3177 (*changes)->elts = (char *)changes_list->changes;
3178 (*changes)->nelts = changes_list->count;
3179 (*changes)->nalloc = changes_list->count;
3182 /* Where to look next - if there is more data. */
3183 context->next += (*changes)->nelts;
3184 context->next_offset = changes_list->end_offset;
3185 context->eol = changes_list->eol;
3187 /* Close the revision file after we read all data. */
3188 if (context->eol && context->revision_file)
3190 SVN_ERR(svn_fs_fs__close_revision_file(context->revision_file));
3191 context->revision_file = NULL;
3194 SVN_ERR(dbg_log_access(context->fs, context->revision, item_index, *changes,
3195 SVN_FS_FS__ITEM_TYPE_CHANGES, scratch_pool));
3197 return SVN_NO_ERROR;
3200 /* Inialize the representation read state RS for the given REP_HEADER and
3201 * p2l index ENTRY. If not NULL, assign FILE and STREAM to RS.
3202 * Use RESULT_POOL for allocations.
3204 static svn_error_t *
3205 init_rep_state(rep_state_t *rs,
3206 svn_fs_fs__rep_header_t *rep_header,
3208 svn_fs_fs__revision_file_t *file,
3209 svn_fs_fs__p2l_entry_t* entry,
3210 apr_pool_t *result_pool)
3212 fs_fs_data_t *ffd = fs->fsap_data;
3213 shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file));
3215 /* this function does not apply to representation containers */
3216 SVN_ERR_ASSERT(entry->type >= SVN_FS_FS__ITEM_TYPE_FILE_REP
3217 && entry->type <= SVN_FS_FS__ITEM_TYPE_DIR_PROPS);
3219 shared_file->rfile = file;
3220 shared_file->fs = fs;
3221 shared_file->revision = entry->item.revision;
3222 shared_file->pool = result_pool;
3224 rs->sfile = shared_file;
3225 rs->revision = entry->item.revision;
3226 rs->item_index = entry->item.number;
3227 rs->header_size = rep_header->header_size;
3228 rs->start = entry->offset + rs->header_size;
3229 rs->current = rep_header->type == svn_fs_fs__rep_plain ? 0 : 4;
3230 rs->size = entry->size - rep_header->header_size - 7;
3232 rs->chunk_index = 0;
3233 rs->raw_window_cache = ffd->raw_window_cache;
3234 rs->window_cache = ffd->txdelta_window_cache;
3235 rs->combined_cache = ffd->combined_window_cache;
3237 return SVN_NO_ERROR;
3240 /* Implement svn_cache__partial_getter_func_t for txdelta windows.
3241 * Instead of the whole window data, return only END_OFFSET member.
3243 static svn_error_t *
3244 get_txdelta_window_end(void **out,
3246 apr_size_t data_len,
3248 apr_pool_t *result_pool)
3250 const svn_fs_fs__txdelta_cached_window_t *window
3251 = (const svn_fs_fs__txdelta_cached_window_t *)data;
3252 *(apr_off_t*)out = window->end_offset;
3254 return SVN_NO_ERROR;
3257 /* Implement svn_cache__partial_getter_func_t for raw windows.
3258 * Instead of the whole window data, return only END_OFFSET member.
3260 static svn_error_t *
3261 get_raw_window_end(void **out,
3263 apr_size_t data_len,
3265 apr_pool_t *result_pool)
3267 const svn_fs_fs__raw_cached_window_t *window
3268 = (const svn_fs_fs__raw_cached_window_t *)data;
3269 *(apr_off_t*)out = window->end_offset;
3271 return SVN_NO_ERROR;
3274 /* Walk through all windows in the representation addressed by RS in FS
3275 * (excluding the delta bases) and put those not already cached into the
3276 * window caches. If MAX_OFFSET is not -1, don't read windows that start
3277 * at or beyond that offset. Use POOL for temporary allocations.
3279 * This function requires RS->RAW_WINDOW_CACHE and RS->WINDOW_CACHE to
3282 static svn_error_t *
3283 cache_windows(svn_fs_t *fs,
3285 apr_off_t max_offset,
3288 apr_pool_t *iterpool = svn_pool_create(pool);
3290 SVN_ERR(auto_read_diff_version(rs, iterpool));
3292 while (rs->current < rs->size)
3294 apr_off_t end_offset;
3295 svn_boolean_t found = FALSE;
3296 window_cache_key_t key = { 0 };
3298 svn_pool_clear(iterpool);
3300 if (max_offset != -1 && rs->start + rs->current >= max_offset)
3302 svn_pool_destroy(iterpool);
3303 return SVN_NO_ERROR;
3306 /* We don't need to read the data again if it is already in cache.
3307 * It might be cached as either raw or parsed window.
3309 SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
3310 rs->raw_window_cache,
3311 get_window_key(&key, rs),
3312 get_raw_window_end, NULL,
3315 SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
3316 rs->window_cache, &key,
3317 get_txdelta_window_end, NULL,
3322 rs->current = end_offset;
3326 /* Read, decode and cache the window. */
3327 svn_fs_fs__raw_cached_window_t window;
3328 apr_off_t start_offset = rs->start + rs->current;
3329 apr_size_t window_len;
3332 /* navigate to the current window */
3333 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
3334 SVN_ERR(svn_txdelta__read_raw_window_len(&window_len,
3335 rs->sfile->rfile->stream,
3338 /* Read the raw window. */
3339 buf = apr_palloc(iterpool, window_len + 1);
3340 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
3341 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
3342 window_len, NULL, NULL, iterpool));
3343 buf[window_len] = 0;
3345 /* update relative offset in representation */
3346 rs->current += window_len;
3348 /* Construct the cachable raw window object. */
3349 window.end_offset = rs->current;
3350 window.window.len = window_len;
3351 window.window.data = buf;
3352 window.ver = rs->ver;
3354 /* cache the window now */
3355 SVN_ERR(svn_cache__set(rs->raw_window_cache, &key, &window,
3359 if (rs->current > rs->size)
3360 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
3361 _("Reading one svndiff window read beyond "
3362 "the end of the representation"));
3367 svn_pool_destroy(iterpool);
3368 return SVN_NO_ERROR;
3371 /* Read all txdelta / plain windows following REP_HEADER in FS as described
3372 * by ENTRY. Read the data from the already open FILE and the wrapping
3373 * STREAM object. If MAX_OFFSET is not -1, don't read windows that start
3374 * at or beyond that offset. Use SCRATCH_POOL for temporary allocations.
3375 * If caching is not enabled, this is a no-op.
3377 static svn_error_t *
3378 block_read_windows(svn_fs_fs__rep_header_t *rep_header,
3380 svn_fs_fs__revision_file_t *rev_file,
3381 svn_fs_fs__p2l_entry_t* entry,
3382 apr_off_t max_offset,
3383 apr_pool_t *result_pool,
3384 apr_pool_t *scratch_pool)
3386 fs_fs_data_t *ffd = fs->fsap_data;
3387 rep_state_t rs = { 0 };
3389 window_cache_key_t key = { 0 };
3391 if ( (rep_header->type != svn_fs_fs__rep_plain
3392 && (!ffd->txdelta_window_cache || !ffd->raw_window_cache))
3393 || (rep_header->type == svn_fs_fs__rep_plain
3394 && !ffd->combined_window_cache))
3395 return SVN_NO_ERROR;
3397 SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry,
3400 /* RS->FILE may be shared between RS instances -> make sure we point
3401 * to the right data. */
3402 offset = rs.start + rs.current;
3403 if (rep_header->type == svn_fs_fs__rep_plain)
3405 svn_stringbuf_t *plaintext;
3406 svn_boolean_t is_cached;
3408 /* already in cache? */
3409 SVN_ERR(svn_cache__has_key(&is_cached, rs.combined_cache,
3410 get_window_key(&key, &rs),
3413 return SVN_NO_ERROR;
3415 /* for larger reps, the header may have crossed a block boundary.
3416 * make sure we still read blocks properly aligned, i.e. don't use
3417 * plain seek here. */
3418 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, scratch_pool));
3420 plaintext = svn_stringbuf_create_ensure(rs.size, result_pool);
3421 SVN_ERR(svn_io_file_read_full2(rev_file->file, plaintext->data,
3422 rs.size, &plaintext->len, NULL,
3424 plaintext->data[plaintext->len] = 0;
3425 rs.current += rs.size;
3427 SVN_ERR(set_cached_combined_window(plaintext, &rs, scratch_pool));
3431 SVN_ERR(cache_windows(fs, &rs, max_offset, scratch_pool));
3434 return SVN_NO_ERROR;
3437 /* Try to get the representation header identified by KEY from FS's cache.
3438 * If it has not been cached, read it from the current position in STREAM
3439 * and put it into the cache (if caching has been enabled for rep headers).
3440 * Return the result in *REP_HEADER. Use POOL for allocations.
3442 static svn_error_t *
3443 read_rep_header(svn_fs_fs__rep_header_t **rep_header,
3445 svn_stream_t *stream,
3446 pair_cache_key_t *key,
3447 apr_pool_t *result_pool,
3448 apr_pool_t *scratch_pool)
3450 fs_fs_data_t *ffd = fs->fsap_data;
3451 svn_boolean_t is_cached = FALSE;
3453 if (ffd->rep_header_cache)
3455 SVN_ERR(svn_cache__get((void**)rep_header, &is_cached,
3456 ffd->rep_header_cache, key,
3459 return SVN_NO_ERROR;
3462 SVN_ERR(svn_fs_fs__read_rep_header(rep_header, stream, result_pool,
3465 if (ffd->rep_header_cache)
3466 SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header,
3469 return SVN_NO_ERROR;
3472 /* Fetch the representation data (header, txdelta / plain windows)
3473 * addressed by ENTRY->ITEM in FS and cache it if caches are enabled.
3474 * Read the data from REV_FILE. If MAX_OFFSET is not -1, don't read
3475 * windows that start at or beyond that offset.
3476 * Use SCRATCH_POOL for temporary allocations.
3478 static svn_error_t *
3479 block_read_contents(svn_fs_t *fs,
3480 svn_fs_fs__revision_file_t *rev_file,
3481 svn_fs_fs__p2l_entry_t* entry,
3482 apr_off_t max_offset,
3483 apr_pool_t *scratch_pool)
3485 pair_cache_key_t header_key = { 0 };
3486 svn_fs_fs__rep_header_t *rep_header;
3488 header_key.revision = (apr_int32_t)entry->item.revision;
3489 header_key.second = entry->item.number;
3491 SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &header_key,
3492 scratch_pool, scratch_pool));
3493 SVN_ERR(block_read_windows(rep_header, fs, rev_file, entry, max_offset,
3494 scratch_pool, scratch_pool));
3496 return SVN_NO_ERROR;
3499 /* For the given REV_FILE in FS, in *STREAM return a stream covering the
3500 * item specified by ENTRY. Also, verify the item's content by low-level
3501 * checksum. Allocate the result in POOL.
3503 static svn_error_t *
3504 read_item(svn_stream_t **stream,
3506 svn_fs_fs__revision_file_t *rev_file,
3507 svn_fs_fs__p2l_entry_t* entry,
3510 apr_uint32_t digest;
3511 svn_checksum_t *expected, *actual;
3512 apr_uint32_t plain_digest;
3514 /* Read item into string buffer. */
3515 svn_stringbuf_t *text = svn_stringbuf_create_ensure(entry->size, pool);
3516 text->len = entry->size;
3517 text->data[text->len] = 0;
3518 SVN_ERR(svn_io_file_read_full2(rev_file->file, text->data, text->len,
3521 /* Return (construct, calculate) stream and checksum. */
3522 *stream = svn_stream_from_stringbuf(text, pool);
3523 digest = svn__fnv1a_32x4(text->data, text->len);
3525 /* Checksums will match most of the time. */
3526 if (entry->fnv1_checksum == digest)
3527 return SVN_NO_ERROR;
3529 /* Construct proper checksum objects from their digests to allow for
3530 * nice error messages. */
3531 plain_digest = htonl(entry->fnv1_checksum);
3532 expected = svn_checksum__from_digest_fnv1a_32x4(
3533 (const unsigned char *)&plain_digest, pool);
3534 plain_digest = htonl(digest);
3535 actual = svn_checksum__from_digest_fnv1a_32x4(
3536 (const unsigned char *)&plain_digest, pool);
3538 /* Construct the full error message with all the info we have. */
3539 return svn_checksum_mismatch_err(expected, actual, pool,
3540 _("Low-level checksum mismatch while reading\n"
3541 "%s bytes of meta data at offset %s "
3542 "for item %s in revision %ld"),
3543 apr_off_t_toa(pool, entry->size),
3544 apr_off_t_toa(pool, entry->offset),
3545 apr_psprintf(pool, "%" APR_UINT64_T_FMT, entry->item.number),
3546 entry->item.revision);
3549 /* If not already cached, read the changed paths list addressed by ENTRY in
3550 * FS and cache it if it has no more than SVN_FS_FS__CHANGES_BLOCK_SIZE
3551 * entries and caching is enabled. Read the data from REV_FILE.
3552 * Allocate temporaries in SCRATCH_POOL.
3554 static svn_error_t *
3555 block_read_changes(svn_fs_t *fs,
3556 svn_fs_fs__revision_file_t *rev_file,
3557 svn_fs_fs__p2l_entry_t *entry,
3558 apr_pool_t *scratch_pool)
3560 fs_fs_data_t *ffd = fs->fsap_data;
3561 svn_stream_t *stream;
3562 apr_array_header_t *changes;
3564 pair_cache_key_t key;
3565 key.revision = entry->item.revision;
3568 if (!ffd->changes_cache)
3569 return SVN_NO_ERROR;
3571 /* already in cache? */
3572 if (ffd->changes_cache)
3574 svn_boolean_t is_cached;
3575 SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, &key,
3578 return SVN_NO_ERROR;
3581 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3583 /* Read changes from revision file. But read just past the first block to
3584 enable us to determine whether the first block already hit the EOL.
3586 Note: A 100 entries block is already > 10kB on disk. With a 4kB default
3587 disk block size, this function won't even be called for larger
3588 changed paths lists. */
3589 SVN_ERR(svn_fs_fs__read_changes(&changes, stream,
3590 SVN_FS_FS__CHANGES_BLOCK_SIZE + 1,
3591 scratch_pool, scratch_pool));
3593 /* We can only cache small lists that don't need to be split up.
3594 For longer lists, we miss the file offset info for the respective */
3595 if (changes->nelts <= SVN_FS_FS__CHANGES_BLOCK_SIZE)
3597 svn_fs_fs__changes_list_t changes_list;
3599 /* Construct the info object for the entries block we just read. */
3600 changes_list.end_offset = entry->size;
3601 changes_list.start_offset = 0;
3602 changes_list.count = changes->nelts;
3603 changes_list.changes = (change_t **)changes->elts;
3604 changes_list.eol = TRUE;
3606 SVN_ERR(svn_cache__set(ffd->changes_cache, &key, &changes_list,
3610 return SVN_NO_ERROR;
3613 /* If not already cached or if MUST_READ is set, read the node revision
3614 * addressed by ENTRY in FS and retúrn it in *NODEREV_P. Cache the
3615 * result if caching is enabled. Read the data from REV_FILE. Allocate
3616 * *NODEREV_P in RESUSLT_POOL and allocate temporaries in SCRATCH_POOL.
3618 static svn_error_t *
3619 block_read_noderev(node_revision_t **noderev_p,
3621 svn_fs_fs__revision_file_t *rev_file,
3622 svn_fs_fs__p2l_entry_t *entry,
3623 svn_boolean_t must_read,
3624 apr_pool_t *result_pool,
3625 apr_pool_t *scratch_pool)
3627 fs_fs_data_t *ffd = fs->fsap_data;
3628 svn_stream_t *stream;
3630 pair_cache_key_t key = { 0 };
3631 key.revision = entry->item.revision;
3632 key.second = entry->item.number;
3634 if (!must_read && !ffd->node_revision_cache)
3635 return SVN_NO_ERROR;
3637 /* already in cache? */
3638 if (!must_read && ffd->node_revision_cache)
3640 svn_boolean_t is_cached;
3641 SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache,
3642 &key, scratch_pool));
3644 return SVN_NO_ERROR;
3647 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3649 /* read node rev from revision file */
3650 SVN_ERR(svn_fs_fs__read_noderev(noderev_p, stream,
3651 result_pool, scratch_pool));
3652 SVN_ERR(fixup_node_revision(fs, *noderev_p, scratch_pool));
3654 if (ffd->node_revision_cache)
3655 SVN_ERR(svn_cache__set(ffd->node_revision_cache, &key, *noderev_p,
3658 return SVN_NO_ERROR;
3661 /* Read the whole (e.g. 64kB) block containing ITEM_INDEX of REVISION in FS
3662 * and put all data into cache. If necessary and depending on heuristics,
3663 * neighboring blocks may also get read. The data is being read from
3664 * already open REVISION_FILE, which must be the correct rev / pack file
3667 * For noderevs and changed path lists, the item fetched can be allocated
3668 * RESULT_POOL and returned in *RESULT. Otherwise, RESULT must be NULL.
3670 static svn_error_t *
3671 block_read(void **result,
3673 svn_revnum_t revision,
3674 apr_uint64_t item_index,
3675 svn_fs_fs__revision_file_t *revision_file,
3676 apr_pool_t *result_pool,
3677 apr_pool_t *scratch_pool)
3679 fs_fs_data_t *ffd = fs->fsap_data;
3680 apr_off_t offset, wanted_offset = 0;
3681 apr_off_t block_start = 0;
3682 apr_array_header_t *entries;
3685 apr_pool_t *iterpool;
3687 /* Block read is an optional feature. If the caller does not want anything
3688 * specific we may not have to read anything. */
3690 return SVN_NO_ERROR;
3692 iterpool = svn_pool_create(scratch_pool);
3694 /* don't try this on transaction protorev files */
3695 SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision));
3697 /* index lookup: find the OFFSET of the item we *must* read plus (in the
3698 * "do-while" block) the list of items in the same block. */
3699 SVN_ERR(svn_fs_fs__item_offset(&wanted_offset, fs, revision_file,
3700 revision, NULL, item_index, iterpool));
3702 offset = wanted_offset;
3706 * Read this block. If the last item crosses the block boundary, read
3707 * the next block but stop there. Because cross-boundary items cause
3708 * blocks to be read twice, this heuristics will limit this effect to
3709 * approx. 50% of blocks, probably less, while providing a sensible
3710 * amount of read-ahead.
3714 /* fetch list of items in the block surrounding OFFSET */
3715 block_start = offset - (offset % ffd->block_size);
3716 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, revision_file,
3717 revision, block_start,
3718 ffd->block_size, scratch_pool,
3721 SVN_ERR(aligned_seek(fs, revision_file->file, &block_start, offset,
3724 /* read all items from the block */
3725 for (i = 0; i < entries->nelts; ++i)
3727 svn_boolean_t is_result, is_wanted;
3729 svn_fs_fs__p2l_entry_t* entry;
3731 svn_pool_clear(iterpool);
3733 /* skip empty sections */
3734 entry = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
3735 if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
3738 /* the item / container we were looking for? */
3739 is_wanted = entry->offset == wanted_offset
3740 && entry->item.revision == revision
3741 && entry->item.number == item_index;
3742 is_result = result && is_wanted;
3744 /* select the pool that we want the item to be allocated in */
3745 pool = is_result ? result_pool : iterpool;
3747 /* handle all items that start within this block and are relatively
3748 * small (i.e. < block size). Always read the item we need to return.
3750 if (is_result || ( entry->offset >= block_start
3751 && entry->size < ffd->block_size))
3754 SVN_ERR(svn_io_file_seek(revision_file->file, APR_SET,
3755 &entry->offset, iterpool));
3756 switch (entry->type)
3758 case SVN_FS_FS__ITEM_TYPE_FILE_REP:
3759 case SVN_FS_FS__ITEM_TYPE_DIR_REP:
3760 case SVN_FS_FS__ITEM_TYPE_FILE_PROPS:
3761 case SVN_FS_FS__ITEM_TYPE_DIR_PROPS:
3762 SVN_ERR(block_read_contents(fs, revision_file, entry,
3765 : block_start + ffd->block_size,
3769 case SVN_FS_FS__ITEM_TYPE_NODEREV:
3770 if (ffd->node_revision_cache || is_result)
3771 SVN_ERR(block_read_noderev((node_revision_t **)&item,
3773 entry, is_result, pool,
3777 case SVN_FS_FS__ITEM_TYPE_CHANGES:
3778 SVN_ERR(block_read_changes(fs, revision_file,
3789 /* if we crossed a block boundary, read the remainder of
3790 * the last block as well */
3791 offset = entry->offset + entry->size;
3792 if (offset - block_start > ffd->block_size)
3798 while(run_count++ == 1); /* can only be true once and only if a block
3799 * boundary got crossed */
3801 /* if the caller requested a result, we must have provided one by now */
3802 assert(!result || *result);
3803 svn_pool_destroy(iterpool);
3805 return SVN_NO_ERROR;