1 /* cached_data.c --- cached (read) access to FSFS data
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
23 #include "cached_data.h"
28 #include "svn_ctype.h"
29 #include "svn_sorts.h"
30 #include "private/svn_delta_private.h"
31 #include "private/svn_io_private.h"
32 #include "private/svn_sorts_private.h"
33 #include "private/svn_subr_private.h"
34 #include "private/svn_temp_serializer.h"
39 #include "low_level.h"
42 #include "temp_serializer.h"
44 #include "../libsvn_fs/fs-loader.h"
45 #include "../libsvn_delta/delta.h" /* for SVN_DELTA_WINDOW_SIZE */
47 #include "svn_private_config.h"
49 /* forward-declare. See implementation for the docstring */
51 block_read(void **result,
53 svn_revnum_t revision,
54 apr_uint64_t item_index,
55 svn_fs_fs__revision_file_t *revision_file,
56 apr_pool_t *result_pool,
57 apr_pool_t *scratch_pool);
60 /* Define this to enable access logging via dbg_log_access
61 #define SVN_FS_FS__LOG_ACCESS
64 /* When SVN_FS_FS__LOG_ACCESS has been defined, write a line to console
65 * showing where REVISION, ITEM_INDEX is located in FS and use ITEM to
66 * show details on it's contents if not NULL. To support format 6 and
67 * earlier repos, ITEM_TYPE (SVN_FS_FS__ITEM_TYPE_*) must match ITEM.
68 * Use SCRATCH_POOL for temporary allocations.
70 * For pre-format7 repos, the display will be restricted.
73 dbg_log_access(svn_fs_t *fs,
74 svn_revnum_t revision,
75 apr_uint64_t item_index,
77 apr_uint32_t item_type,
78 apr_pool_t *scratch_pool)
80 /* no-op if this macro is not defined */
81 #ifdef SVN_FS_FS__LOG_ACCESS
82 fs_fs_data_t *ffd = fs->fsap_data;
83 apr_off_t end_offset = 0;
84 svn_fs_fs__p2l_entry_t *entry = NULL;
85 static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop",
86 "node ", "chgs ", "rep "};
87 const char *description = "";
88 const char *type = types[item_type];
89 const char *pack = "";
91 svn_fs_fs__revision_file_t *rev_file;
93 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision,
94 scratch_pool, scratch_pool));
96 /* determine rev / pack file offset */
97 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision, NULL,
98 item_index, scratch_pool));
100 /* constructing the pack file description */
101 if (revision < ffd->min_unpacked_rev)
102 pack = apr_psprintf(scratch_pool, "%4ld|",
103 revision / ffd->max_files_per_dir);
105 /* construct description if possible */
106 if (item_type == SVN_FS_FS__ITEM_TYPE_NODEREV && item != NULL)
108 node_revision_t *node = item;
111 ? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT,
112 node->data_rep->revision,
113 node->data_rep->item_index)
117 ? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT,
118 node->prop_rep->revision,
119 node->prop_rep->item_index)
121 description = apr_psprintf(scratch_pool, "%s (pc=%d%s%s)",
123 node->predecessor_count,
127 else if (item_type == SVN_FS_FS__ITEM_TYPE_ANY_REP)
129 svn_fs_fs__rep_header_t *header = item;
131 description = " (txdelta window)";
132 else if (header->type == svn_fs_fs__rep_plain)
133 description = " PLAIN";
134 else if (header->type == svn_fs_fs__rep_self_delta)
135 description = " DELTA";
137 description = apr_psprintf(scratch_pool,
138 " DELTA against %ld/%" APR_UINT64_T_FMT,
139 header->base_revision,
140 header->base_item_index);
142 else if (item_type == SVN_FS_FS__ITEM_TYPE_CHANGES && item != NULL)
144 apr_array_header_t *changes = item;
145 switch (changes->nelts)
147 case 0: description = " no change";
149 case 1: description = " 1 change";
151 default: description = apr_psprintf(scratch_pool, " %d changes",
156 /* some info is only available in format7 repos */
157 if (svn_fs_fs__use_log_addressing(fs))
159 /* reverse index lookup: get item description in ENTRY */
160 SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, revision,
161 offset, scratch_pool,
166 end_offset = offset + entry->size;
167 type = types[entry->type];
171 printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT" %s\n",
172 pack, (long)(offset / ffd->block_size),
173 (long)(offset % ffd->block_size),
174 (long)(end_offset / ffd->block_size),
175 (long)(end_offset % ffd->block_size),
176 type, revision, item_index, description);
180 /* reduced logging for format 6 and earlier */
181 printf("%5s%10" APR_UINT64_T_HEX_FMT " %s %7ld %7" APR_UINT64_T_FMT \
183 pack, (apr_uint64_t)(offset), type, revision, item_index,
187 /* We don't know when SCRATCH_POOL will be cleared, so close the rev file
189 SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
196 /* Convenience wrapper around svn_io_file_aligned_seek, taking filesystem
197 FS instead of a block size. */
199 aligned_seek(svn_fs_t *fs,
201 apr_off_t *buffer_start,
205 fs_fs_data_t *ffd = fs->fsap_data;
206 return svn_error_trace(svn_io_file_aligned_seek(file, ffd->block_size,
207 buffer_start, offset,
211 /* Open the revision file for revision REV in filesystem FS and store
212 the newly opened file in FILE. Seek to location OFFSET before
213 returning. Perform temporary allocations in POOL. */
215 open_and_seek_revision(svn_fs_fs__revision_file_t **file,
221 svn_fs_fs__revision_file_t *rev_file;
222 apr_off_t offset = -1;
224 SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, pool));
226 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, pool, pool));
227 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rev, NULL, item,
230 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
237 /* Open the representation REP for a node-revision in filesystem FS, seek
238 to its position and store the newly opened file in FILE. Perform
239 temporary allocations in POOL. */
241 open_and_seek_transaction(svn_fs_fs__revision_file_t **file,
243 representation_t *rep,
248 SVN_ERR(svn_fs_fs__open_proto_rev_file(file, fs, &rep->txn_id, pool, pool));
250 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, NULL, SVN_INVALID_REVNUM,
251 &rep->txn_id, rep->item_index, pool));
252 SVN_ERR(aligned_seek(fs, (*file)->file, NULL, offset, pool));
257 /* Given a node-id ID, and a representation REP in filesystem FS, open
258 the correct file and seek to the correction location. Store this
259 file in *FILE_P. Perform any allocations in POOL. */
261 open_and_seek_representation(svn_fs_fs__revision_file_t **file_p,
263 representation_t *rep,
266 if (! svn_fs_fs__id_txn_used(&rep->txn_id))
267 return open_and_seek_revision(file_p, fs, rep->revision, rep->item_index,
270 return open_and_seek_transaction(file_p, fs, rep, pool);
276 err_dangling_id(svn_fs_t *fs, const svn_fs_id_t *id)
278 svn_string_t *id_str = svn_fs_fs__id_unparse(id, fs->pool);
279 return svn_error_createf
280 (SVN_ERR_FS_ID_NOT_FOUND, 0,
281 _("Reference to non-existent node '%s' in filesystem '%s'"),
282 id_str->data, fs->path);
285 /* Return TRUE, if FS is of a format that supports block-read and the
286 feature has been enabled. */
288 use_block_read(svn_fs_t *fs)
290 fs_fs_data_t *ffd = fs->fsap_data;
291 return svn_fs_fs__use_log_addressing(fs) && ffd->use_block_read;
295 svn_fs_fs__fixup_expanded_size(svn_fs_t *fs,
296 representation_t *rep,
297 apr_pool_t *scratch_pool)
299 svn_checksum_t checksum;
300 svn_checksum_t *empty_md5;
301 svn_fs_fs__revision_file_t *revision_file;
302 svn_fs_fs__rep_header_t *rep_header;
304 /* Anything to do at all?
306 * Note that a 0 SIZE is only possible for PLAIN reps due to the SVN\1
307 * magic prefix in any DELTA rep. */
308 if (!rep || rep->expanded_size != 0 || rep->size == 0)
311 /* This function may only be called for committed data. */
312 assert(!svn_fs_fs__id_txn_used(&rep->txn_id));
314 /* EXPANDED_SIZE is 0. If the MD5 does not match the one for empty
315 * contents, we know that EXPANDED_SIZE == 0 is wrong and needs to
316 * be set to the actual value given by SIZE.
318 * Using svn_checksum_match() will also accept all-zero values for
319 * the MD5 digest and only report a mismatch if the MD5 has actually
321 empty_md5 = svn_checksum_empty_checksum(svn_checksum_md5, scratch_pool);
323 checksum.digest = rep->md5_digest;
324 checksum.kind = svn_checksum_md5;
325 if (!svn_checksum_match(empty_md5, &checksum))
327 rep->expanded_size = rep->size;
331 /* Data in the rep-cache.db does not have MD5 checksums (all zero) on it.
332 * Compare SHA1 instead. */
335 svn_checksum_t *empty_sha1
336 = svn_checksum_empty_checksum(svn_checksum_sha1, scratch_pool);
338 checksum.digest = rep->sha1_digest;
339 checksum.kind = svn_checksum_sha1;
340 if (!svn_checksum_match(empty_sha1, &checksum))
342 rep->expanded_size = rep->size;
347 /* Only two cases are left here.
348 * (1) A non-empty PLAIN rep with a MD5 collision on EMPTY_MD5.
349 * (2) A DELTA rep with zero-length output. */
351 /* SVN always stores a DELTA rep with zero-length output as an empty
352 * sequence of txdelta windows, i.e. as "SVN\1". In that case, SIZE is
353 * 4 bytes. There is no other possible DELTA rep of that size and any
354 * PLAIN rep of 4 bytes would produce a different MD5. Hence, if SIZE is
355 * actually 4 here, we know that this is an empty DELTA rep.
357 * Note that it is technically legal to have DELTA reps with a 0 length
358 * output window. Their on-disk size would be longer. We handle that
359 * case later together with the equally unlikely MD5 collision. */
362 /* EXPANDED_SIZE is already 0. */
366 /* We still have the two options, PLAIN or DELTA rep. At this point, we
367 * are in an extremely unlikely case and can spend some time to figure it
368 * out. So, let's just look at the representation header. */
369 SVN_ERR(open_and_seek_revision(&revision_file, fs, rep->revision,
370 rep->item_index, scratch_pool));
371 SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, revision_file->stream,
372 scratch_pool, scratch_pool));
373 SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
375 /* Only for PLAIN reps do we have to correct EXPANDED_SIZE. */
376 if (rep_header->type == svn_fs_fs__rep_plain)
377 rep->expanded_size = rep->size;
382 /* Correct known issues with committed NODEREV in FS.
383 * Uses SCRATCH_POOL for temporaries.
386 fixup_node_revision(svn_fs_t *fs,
387 node_revision_t *noderev,
388 apr_pool_t *scratch_pool)
390 /* Workaround issue #4031: is-fresh-txn-root in revision files. */
391 noderev->is_fresh_txn_root = FALSE;
393 /* Make sure EXPANDED_SIZE has the correct value for every rep. */
394 SVN_ERR(svn_fs_fs__fixup_expanded_size(fs, noderev->data_rep,
396 SVN_ERR(svn_fs_fs__fixup_expanded_size(fs, noderev->prop_rep,
402 /* Get the node-revision for the node ID in FS.
403 Set *NODEREV_P to the new node-revision structure, allocated in POOL.
404 See svn_fs_fs__get_node_revision, which wraps this and adds another
407 get_node_revision_body(node_revision_t **noderev_p,
409 const svn_fs_id_t *id,
410 apr_pool_t *result_pool,
411 apr_pool_t *scratch_pool)
414 svn_boolean_t is_cached = FALSE;
415 fs_fs_data_t *ffd = fs->fsap_data;
417 if (svn_fs_fs__id_is_txn(id))
421 /* This is a transaction node-rev. Its storage logic is very
422 different from that of rev / pack files. */
423 err = svn_io_file_open(&file,
424 svn_fs_fs__path_txn_node_rev(fs, id,
426 APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
428 if (err && APR_STATUS_IS_ENOENT(err->apr_err))
430 svn_error_clear(err);
431 return svn_error_trace(err_dangling_id(fs, id));
435 return svn_error_trace(err);
438 SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
439 svn_stream_from_aprfile2(file,
442 result_pool, scratch_pool));
446 svn_fs_fs__revision_file_t *revision_file;
448 /* noderevs in rev / pack files can be cached */
449 const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
450 pair_cache_key_t key = { 0 };
451 key.revision = rev_item->revision;
452 key.second = rev_item->number;
454 /* Not found or not applicable. Try a noderev cache lookup.
455 * If that succeeds, we are done here. */
456 if (ffd->node_revision_cache)
458 SVN_ERR(svn_cache__get((void **) noderev_p,
460 ffd->node_revision_cache,
467 /* read the data from disk */
468 SVN_ERR(open_and_seek_revision(&revision_file, fs,
473 if (use_block_read(fs))
475 /* block-read will parse the whole block and will also return
476 the one noderev that we need right now. */
477 SVN_ERR(block_read((void **)noderev_p, fs,
486 /* physical addressing mode reading, parsing and caching */
487 SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
488 revision_file->stream,
491 SVN_ERR(fixup_node_revision(fs, *noderev_p, scratch_pool));
493 /* The noderev is not in cache, yet. Add it, if caching has been enabled. */
494 if (ffd->node_revision_cache)
495 SVN_ERR(svn_cache__set(ffd->node_revision_cache,
501 SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
508 svn_fs_fs__get_node_revision(node_revision_t **noderev_p,
510 const svn_fs_id_t *id,
511 apr_pool_t *result_pool,
512 apr_pool_t *scratch_pool)
514 const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
516 svn_error_t *err = get_node_revision_body(noderev_p, fs, id,
517 result_pool, scratch_pool);
518 if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
520 svn_string_t *id_string = svn_fs_fs__id_unparse(id, scratch_pool);
521 return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
522 "Corrupt node-revision '%s'",
526 SVN_ERR(dbg_log_access(fs,
530 SVN_FS_FS__ITEM_TYPE_NODEREV,
533 return svn_error_trace(err);
537 /* Given a revision file REV_FILE, opened to REV in FS, find the Node-ID
538 of the header located at OFFSET and store it in *ID_P. Allocate
539 temporary variables from POOL. */
541 get_fs_id_at_offset(svn_fs_id_t **id_p,
542 svn_fs_fs__revision_file_t *rev_file,
548 node_revision_t *noderev;
550 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
551 SVN_ERR(svn_fs_fs__read_noderev(&noderev,
555 /* noderev->id is const, get rid of that */
556 *id_p = svn_fs_fs__id_copy(noderev->id, pool);
558 /* assert that the txn_id is REV
559 * (asserting on offset would be harder because we the rev_offset is not
561 assert(svn_fs_fs__id_rev(*id_p) == rev);
567 /* Given an open revision file REV_FILE in FS for REV, locate the trailer that
568 specifies the offset to the root node-id and to the changed path
569 information. Store the root node offset in *ROOT_OFFSET and the
570 changed path offset in *CHANGES_OFFSET. If either of these
571 pointers is NULL, do nothing with it.
573 Allocate temporary variables from POOL. */
575 get_root_changes_offset(apr_off_t *root_offset,
576 apr_off_t *changes_offset,
577 svn_fs_fs__revision_file_t *rev_file,
582 fs_fs_data_t *ffd = fs->fsap_data;
583 apr_off_t rev_offset;
584 apr_seek_where_t seek_relative;
585 svn_stringbuf_t *trailer;
591 /* Determine where to seek to in the file.
593 If we've got a pack file, we want to seek to the end of the desired
594 revision. But we don't track that, so we seek to the beginning of the
597 Unless the next revision is in a different file, in which case, we can
598 just seek to the end of the pack file -- just like we do in the
600 if (rev_file->is_packed && ((rev + 1) % ffd->max_files_per_dir != 0))
602 SVN_ERR(svn_fs_fs__get_packed_offset(&end, fs, rev + 1, pool));
603 seek_relative = APR_SET;
607 seek_relative = APR_END;
611 /* Offset of the revision from the start of the pack file, if applicable. */
612 if (rev_file->is_packed)
613 SVN_ERR(svn_fs_fs__get_packed_offset(&rev_offset, fs, rev, pool));
617 /* We will assume that the last line containing the two offsets
618 will never be longer than 64 characters. */
619 SVN_ERR(svn_io_file_seek(rev_file->file, seek_relative, &end, pool));
621 if (end < sizeof(buffer))
623 len = (apr_size_t)end;
628 len = sizeof(buffer);
629 start = end - sizeof(buffer);
632 /* Read in this last block, from which we will identify the last line. */
633 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, start, pool));
634 SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, len, NULL, NULL,
637 /* Parse the last line. */
638 trailer = svn_stringbuf_ncreate(buffer, len, pool);
639 SVN_ERR(svn_fs_fs__parse_revision_trailer(root_offset,
644 /* return absolute offsets */
646 *root_offset += rev_offset;
648 *changes_offset += rev_offset;
654 svn_fs_fs__rev_get_root(svn_fs_id_t **root_id_p,
657 apr_pool_t *result_pool,
658 apr_pool_t *scratch_pool)
660 fs_fs_data_t *ffd = fs->fsap_data;
661 SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool));
663 if (svn_fs_fs__use_log_addressing(fs))
665 *root_id_p = svn_fs_fs__id_create_root(rev, result_pool);
669 svn_fs_fs__revision_file_t *revision_file;
670 apr_off_t root_offset;
671 svn_fs_id_t *root_id = NULL;
672 svn_boolean_t is_cached;
674 SVN_ERR(svn_cache__get((void **) root_id_p, &is_cached,
675 ffd->rev_root_id_cache, &rev, result_pool));
679 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev,
680 scratch_pool, scratch_pool));
681 SVN_ERR(get_root_changes_offset(&root_offset, NULL,
682 revision_file, fs, rev,
685 SVN_ERR(get_fs_id_at_offset(&root_id, revision_file, fs, rev,
686 root_offset, result_pool));
688 SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
690 SVN_ERR(svn_cache__set(ffd->rev_root_id_cache, &rev, root_id,
693 *root_id_p = root_id;
699 /* Describes a lazily opened rev / pack file. Instances will be shared
700 between multiple instances of rep_state_t. */
701 typedef struct shared_file_t
703 /* The opened file. NULL while file is not open, yet. */
704 svn_fs_fs__revision_file_t *rfile;
706 /* file system to open the file in */
709 /* a revision contained in the FILE. Since this file may be shared,
710 that value may be different from REP_STATE_T->REVISION. */
711 svn_revnum_t revision;
713 /* pool to use when creating the FILE. This guarantees that the file
714 remains open / valid beyond the respective local context that required
715 the file to be opened eventually. */
719 /* Represents where in the current svndiff data block each
720 representation is. */
721 typedef struct rep_state_t
723 /* shared lazy-open rev/pack file structure */
724 shared_file_t *sfile;
725 /* The txdelta window cache to use or NULL. */
726 svn_cache__t *raw_window_cache;
727 /* Caches raw (unparsed) windows. May be NULL. */
728 svn_cache__t *window_cache;
729 /* Caches un-deltified windows. May be NULL. */
730 svn_cache__t *combined_cache;
731 /* revision containing the representation */
732 svn_revnum_t revision;
733 /* representation's item index in REVISION */
734 apr_uint64_t item_index;
735 /* length of the header at the start of the rep.
736 0 iff this is rep is stored in a container
737 (i.e. does not have a header) */
738 apr_size_t header_size;
739 apr_off_t start; /* The starting offset for the raw
740 svndiff/plaintext data minus header.
741 -1 if the offset is yet unknown. */
742 apr_off_t current;/* The current offset relative to START. */
743 apr_off_t size; /* The on-disk size of the representation. */
744 int ver; /* If a delta, what svndiff version?
745 -1 for unknown delta version. */
746 int chunk_index; /* number of the window to read */
749 /* Simple wrapper around svn_io_file_get_offset to simplify callers. */
751 get_file_offset(apr_off_t *offset,
755 return svn_error_trace(svn_io_file_get_offset(offset,
756 rs->sfile->rfile->file,
760 /* Simple wrapper around svn_io_file_aligned_seek to simplify callers. */
762 rs_aligned_seek(rep_state_t *rs,
763 apr_off_t *buffer_start,
767 fs_fs_data_t *ffd = rs->sfile->fs->fsap_data;
768 return svn_error_trace(svn_io_file_aligned_seek(rs->sfile->rfile->file,
770 buffer_start, offset,
774 /* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */
776 auto_open_shared_file(shared_file_t *file)
778 if (file->rfile == NULL)
779 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&file->rfile, file->fs,
780 file->revision, file->pool,
786 /* Set RS->START to the begin of the representation raw in RS->FILE->FILE,
787 if that hasn't been done yet. Use POOL for temporary allocations. */
789 auto_set_start_offset(rep_state_t *rs, apr_pool_t *pool)
793 SVN_ERR(svn_fs_fs__item_offset(&rs->start, rs->sfile->fs,
794 rs->sfile->rfile, rs->revision, NULL,
795 rs->item_index, pool));
796 rs->start += rs->header_size;
802 /* Set RS->VER depending on what is found in the already open RS->FILE->FILE
803 if the diff version is still unknown. Use POOL for temporary allocations.
806 auto_read_diff_version(rep_state_t *rs, apr_pool_t *pool)
811 SVN_ERR(rs_aligned_seek(rs, NULL, rs->start, pool));
812 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
813 sizeof(buf), NULL, NULL, pool));
815 /* ### Layering violation */
816 if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N')))
817 return svn_error_create
818 (SVN_ERR_FS_CORRUPT, NULL,
819 _("Malformed svndiff data in representation"));
829 /* See create_rep_state, which wraps this and adds another error. */
831 create_rep_state_body(rep_state_t **rep_state,
832 svn_fs_fs__rep_header_t **rep_header,
833 shared_file_t **shared_file,
834 representation_t *rep,
836 apr_pool_t *result_pool,
837 apr_pool_t *scratch_pool)
839 fs_fs_data_t *ffd = fs->fsap_data;
840 rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs));
841 svn_fs_fs__rep_header_t *rh;
842 svn_boolean_t is_cached = FALSE;
843 apr_uint64_t estimated_window_storage;
847 * - refers to a valid revision,
848 * - refers to a packed revision,
849 * - as does the rep we want to read, and
850 * - refers to the same pack file as the rep
851 * we can re-use the same, already open file object
853 svn_boolean_t reuse_shared_file
854 = shared_file && *shared_file && (*shared_file)->rfile
855 && SVN_IS_VALID_REVNUM((*shared_file)->revision)
856 && (*shared_file)->revision < ffd->min_unpacked_rev
857 && rep->revision < ffd->min_unpacked_rev
858 && ( ((*shared_file)->revision / ffd->max_files_per_dir)
859 == (rep->revision / ffd->max_files_per_dir));
861 pair_cache_key_t key;
862 key.revision = rep->revision;
863 key.second = rep->item_index;
865 /* continue constructing RS and RA */
866 rs->size = rep->size;
867 rs->revision = rep->revision;
868 rs->item_index = rep->item_index;
869 rs->raw_window_cache = use_block_read(fs) ? ffd->raw_window_cache : NULL;
873 /* Very long files stored as self-delta will produce a huge number of
874 delta windows. Don't cache them lest we don't thrash the cache.
875 Since we don't know the depth of the delta chain, let's assume, the
876 whole contents get rewritten 3 times.
878 estimated_window_storage = 4 * (rep->expanded_size + SVN_DELTA_WINDOW_SIZE);
879 estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX);
881 rs->window_cache = ffd->txdelta_window_cache
882 && svn_cache__is_cachable(ffd->txdelta_window_cache,
883 (apr_size_t)estimated_window_storage)
884 ? ffd->txdelta_window_cache
886 rs->combined_cache = ffd->combined_window_cache
887 && svn_cache__is_cachable(ffd->combined_window_cache,
888 (apr_size_t)estimated_window_storage)
889 ? ffd->combined_window_cache
892 /* cache lookup, i.e. skip reading the rep header if possible */
893 if (ffd->rep_header_cache && !svn_fs_fs__id_txn_used(&rep->txn_id))
894 SVN_ERR(svn_cache__get((void **) &rh, &is_cached,
895 ffd->rep_header_cache, &key, result_pool));
897 /* initialize the (shared) FILE member in RS */
898 if (reuse_shared_file)
900 rs->sfile = *shared_file;
904 shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file));
905 file->revision = rep->revision;
906 file->pool = result_pool;
910 /* remember the current file, if suggested by the caller */
915 /* read rep header, if necessary */
918 /* ensure file is open and navigate to the start of rep header */
919 if (reuse_shared_file)
923 /* ... we can re-use the same, already open file object.
924 * This implies that we don't read from a txn.
926 rs->sfile = *shared_file;
927 SVN_ERR(auto_open_shared_file(rs->sfile));
928 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rs->sfile->rfile,
929 rep->revision, NULL, rep->item_index,
931 SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
935 /* otherwise, create a new file object. May or may not be
938 SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep,
942 SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
943 result_pool, scratch_pool));
944 SVN_ERR(get_file_offset(&rs->start, rs, result_pool));
946 /* populate the cache if appropriate */
947 if (! svn_fs_fs__id_txn_used(&rep->txn_id))
949 if (use_block_read(fs))
950 SVN_ERR(block_read(NULL, fs, rep->revision, rep->item_index,
951 rs->sfile->rfile, result_pool, scratch_pool));
953 if (ffd->rep_header_cache)
954 SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh,
960 SVN_ERR(dbg_log_access(fs, rep->revision, rep->item_index, rh,
961 SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
963 rs->header_size = rh->header_size;
967 if (rh->type == svn_fs_fs__rep_plain)
968 /* This is a plaintext, so just return the current rep_state. */
971 /* skip "SVNx" diff marker */
977 /* Read the rep args for REP in filesystem FS and create a rep_state
978 for reading the representation. Return the rep_state in *REP_STATE
979 and the rep header in *REP_HEADER, both allocated in POOL.
981 When reading multiple reps, i.e. a skip delta chain, you may provide
982 non-NULL SHARED_FILE. (If SHARED_FILE is not NULL, in the first
983 call it should be a pointer to NULL.) The function will use this
984 variable to store the previous call results and tries to re-use it.
985 This may result in significant savings in I/O for packed files and
986 number of open file handles.
989 create_rep_state(rep_state_t **rep_state,
990 svn_fs_fs__rep_header_t **rep_header,
991 shared_file_t **shared_file,
992 representation_t *rep,
994 apr_pool_t *result_pool,
995 apr_pool_t *scratch_pool)
997 svn_error_t *err = create_rep_state_body(rep_state, rep_header,
998 shared_file, rep, fs,
999 result_pool, scratch_pool);
1000 if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
1002 fs_fs_data_t *ffd = fs->fsap_data;
1003 const char *rep_str;
1005 /* ### This always returns "-1" for transaction reps, because
1006 ### this particular bit of code doesn't know if the rep is
1007 ### stored in the protorev or in the mutable area (for props
1008 ### or dir contents). It is pretty rare for FSFS to *read*
1009 ### from the protorev file, though, so this is probably OK.
1010 ### And anyone going to debug corruption errors is probably
1011 ### going to jump straight to this comment anyway! */
1013 ? svn_fs_fs__unparse_representation
1014 (rep, ffd->format, TRUE, scratch_pool, scratch_pool)->data
1017 return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
1018 "Corrupt representation '%s'",
1021 /* ### Call representation_string() ? */
1022 return svn_error_trace(err);
1026 svn_fs_fs__check_rep(representation_t *rep,
1029 apr_pool_t *scratch_pool)
1031 if (svn_fs_fs__use_log_addressing(fs))
1034 svn_fs_fs__p2l_entry_t *entry;
1035 svn_fs_fs__revision_file_t *rev_file = NULL;
1037 /* Reuse the revision file provided by *HINT, if it is given and
1038 * actually the rev / pack file that we want. */
1039 svn_revnum_t start_rev = svn_fs_fs__packed_base_rev(fs, rep->revision);
1041 rev_file = *(svn_fs_fs__revision_file_t **)hint;
1043 if (rev_file == NULL || rev_file->start_revision != start_rev)
1044 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rep->revision,
1045 scratch_pool, scratch_pool));
1050 /* This will auto-retry if there was a background pack. */
1051 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rep->revision,
1052 NULL, rep->item_index, scratch_pool));
1054 /* This may fail if there is a background pack operation (can't auto-
1055 retry because the item offset lookup has to be redone as well). */
1056 SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file,
1057 rep->revision, offset,
1058 scratch_pool, scratch_pool));
1061 || entry->type < SVN_FS_FS__ITEM_TYPE_FILE_REP
1062 || entry->type > SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
1063 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
1064 _("No representation found at offset %s "
1065 "for item %s in revision %ld"),
1066 apr_off_t_toa(scratch_pool, offset),
1067 apr_psprintf(scratch_pool,
1068 "%" APR_UINT64_T_FMT,
1075 svn_fs_fs__rep_header_t *rep_header;
1077 /* ### Should this be using read_rep_line() directly? */
1078 SVN_ERR(create_rep_state(&rs, &rep_header, (shared_file_t**)hint,
1079 rep, fs, scratch_pool, scratch_pool));
1082 return SVN_NO_ERROR;
1086 svn_fs_fs__rep_chain_length(int *chain_length,
1088 representation_t *rep,
1090 apr_pool_t *scratch_pool)
1092 fs_fs_data_t *ffd = fs->fsap_data;
1093 svn_revnum_t shard_size = ffd->max_files_per_dir
1094 ? ffd->max_files_per_dir
1096 apr_pool_t *subpool = svn_pool_create(scratch_pool);
1097 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1098 svn_boolean_t is_delta = FALSE;
1101 svn_revnum_t last_shard = rep->revision / shard_size;
1103 /* Check whether the length of the deltification chain is acceptable.
1104 * Otherwise, shared reps may form a non-skipping delta chain in
1106 representation_t base_rep = *rep;
1108 /* re-use open files between iterations */
1109 shared_file_t *file_hint = NULL;
1111 svn_fs_fs__rep_header_t *header;
1113 /* follow the delta chain towards the end but for at most
1114 * MAX_CHAIN_LENGTH steps. */
1117 rep_state_t *rep_state;
1119 svn_pool_clear(iterpool);
1121 if (base_rep.revision / shard_size != last_shard)
1123 last_shard = base_rep.revision / shard_size;
1127 SVN_ERR(create_rep_state_body(&rep_state,
1135 base_rep.revision = header->base_revision;
1136 base_rep.item_index = header->base_item_index;
1137 base_rep.size = header->base_length;
1138 svn_fs_fs__id_txn_reset(&base_rep.txn_id);
1139 is_delta = header->type == svn_fs_fs__rep_delta;
1141 /* Clear it the SUBPOOL once in a while. Doing it too frequently
1142 * renders the FILE_HINT ineffective. Doing too infrequently, may
1143 * leave us with too many open file handles.
1145 * Note that this is mostly about efficiency, with larger values
1146 * being more efficient, and any non-zero value is legal here. When
1147 * reading deltified contents, we may keep 10s of rev files open at
1148 * the same time and the system has to cope with that. Thus, the
1149 * limit of 16 chosen below is in the same ballpark.
1152 if (count % 16 == 0)
1155 svn_pool_clear(subpool);
1158 while (is_delta && base_rep.revision);
1160 *chain_length = count;
1161 *shard_count = shards;
1162 svn_pool_destroy(subpool);
1163 svn_pool_destroy(iterpool);
1165 return SVN_NO_ERROR;
1168 struct rep_read_baton
1170 /* The FS from which we're reading. */
1173 /* Representation to read. */
1174 representation_t rep;
1176 /* If not NULL, this is the base for the first delta window in rs_list */
1177 svn_stringbuf_t *base_window;
1179 /* The state of all prior delta representations. */
1180 apr_array_header_t *rs_list;
1182 /* The plaintext state, if there is a plaintext. */
1183 rep_state_t *src_state;
1185 /* The index of the current delta chunk, if we are reading a delta. */
1188 /* The buffer where we store undeltified data. */
1193 /* A checksum context for summing the data read in order to verify it.
1194 Note: we don't need to use the sha1 checksum because we're only doing
1195 data verification, for which md5 is perfectly safe. */
1196 svn_checksum_ctx_t *md5_checksum_ctx;
1198 svn_boolean_t checksum_finalized;
1200 /* The stored checksum of the representation we are reading, its
1201 length, and the amount we've read so far. Some of this
1202 information is redundant with rs_list and src_state, but it's
1203 convenient for the checksumming code to have it here. */
1204 unsigned char md5_digest[APR_MD5_DIGESTSIZE];
1209 /* The key for the fulltext cache for this rep, if there is a
1211 pair_cache_key_t fulltext_cache_key;
1212 /* The text we've been reading, if we're going to cache it. */
1213 svn_stringbuf_t *current_fulltext;
1215 /* If not NULL, attempt to read the data from this cache.
1216 Once that lookup fails, reset it to NULL. */
1217 svn_cache__t *fulltext_cache;
1219 /* Bytes delivered from the FULLTEXT_CACHE so far. If the next
1220 lookup fails, we need to skip that much data from the reconstructed
1221 window stream before we continue normal operation. */
1222 svn_filesize_t fulltext_delivered;
1224 /* Used for temporary allocations during the read. */
1227 /* Pool used to store file handles and other data that is persistant
1228 for the entire stream read. */
1229 apr_pool_t *filehandle_pool;
1232 /* Set window key in *KEY to address the window described by RS.
1233 For convenience, return the KEY. */
1234 static window_cache_key_t *
1235 get_window_key(window_cache_key_t *key, rep_state_t *rs)
1237 assert(rs->revision <= APR_UINT32_MAX);
1238 key->revision = (apr_uint32_t)rs->revision;
1239 key->item_index = rs->item_index;
1240 key->chunk_index = rs->chunk_index;
1245 /* Implement svn_cache__partial_getter_func_t for raw txdelta windows.
1246 * Parse the raw data and return a svn_fs_fs__txdelta_cached_window_t.
1248 static svn_error_t *
1249 parse_raw_window(void **out,
1251 apr_size_t data_len,
1253 apr_pool_t *result_pool)
1255 svn_string_t raw_window;
1256 svn_stream_t *stream;
1258 /* unparsed and parsed window */
1259 const svn_fs_fs__raw_cached_window_t *window
1260 = (const svn_fs_fs__raw_cached_window_t *)data;
1261 svn_fs_fs__txdelta_cached_window_t *result
1262 = apr_pcalloc(result_pool, sizeof(*result));
1264 /* create a read stream taking the raw window as input */
1265 raw_window.data = svn_temp_deserializer__ptr(window,
1266 (const void * const *)&window->window.data);
1267 raw_window.len = window->window.len;
1268 stream = svn_stream_from_string(&raw_window, result_pool);
1271 SVN_ERR(svn_txdelta_read_svndiff_window(&result->window, stream, window->ver,
1274 /* complete the window and return it */
1275 result->end_offset = window->end_offset;
1278 return SVN_NO_ERROR;
1282 /* Read the WINDOW_P number CHUNK_INDEX for the representation given in
1283 * rep state RS from the current FSFS session's cache. This will be a
1284 * no-op and IS_CACHED will be set to FALSE if no cache has been given.
1285 * If a cache is available IS_CACHED will inform the caller about the
1286 * success of the lookup. Allocations of the window in will be made
1287 * from RESULT_POOL. Use SCRATCH_POOL for temporary allocations.
1289 * If the information could be found, put RS to CHUNK_INDEX.
1291 static svn_error_t *
1292 get_cached_window(svn_txdelta_window_t **window_p,
1295 svn_boolean_t *is_cached,
1296 apr_pool_t *result_pool,
1297 apr_pool_t *scratch_pool)
1299 if (! rs->window_cache)
1301 /* txdelta window has not been enabled */
1306 /* ask the cache for the desired txdelta window */
1307 svn_fs_fs__txdelta_cached_window_t *cached_window;
1308 window_cache_key_t key = { 0 };
1309 get_window_key(&key, rs);
1310 key.chunk_index = chunk_index;
1311 SVN_ERR(svn_cache__get((void **) &cached_window,
1317 /* If we did not find a parsed txdelta window, we might have a raw
1318 version of it in our cache. If so, read, parse and re-cache it. */
1319 if (!*is_cached && rs->raw_window_cache)
1321 SVN_ERR(svn_cache__get_partial((void **) &cached_window, is_cached,
1322 rs->raw_window_cache, &key,
1323 parse_raw_window, NULL, result_pool));
1325 SVN_ERR(svn_cache__set(rs->window_cache, &key, cached_window,
1329 /* Return cached information. */
1332 /* found it. Pass it back to the caller. */
1333 *window_p = cached_window->window;
1335 /* manipulate the RS as if we just read the data */
1336 rs->current = cached_window->end_offset;
1337 rs->chunk_index = chunk_index;
1341 return SVN_NO_ERROR;
1344 /* Store the WINDOW read for the rep state RS in the current FSFS
1345 * session's cache. This will be a no-op if no cache has been given.
1346 * Temporary allocations will be made from SCRATCH_POOL. */
1347 static svn_error_t *
1348 set_cached_window(svn_txdelta_window_t *window,
1350 apr_pool_t *scratch_pool)
1352 if (rs->window_cache)
1354 /* store the window and the first offset _past_ it */
1355 svn_fs_fs__txdelta_cached_window_t cached_window;
1356 window_cache_key_t key = {0};
1358 cached_window.window = window;
1359 cached_window.end_offset = rs->current;
1361 /* but key it with the start offset because that is the known state
1362 * when we will look it up */
1363 SVN_ERR(svn_cache__set(rs->window_cache,
1364 get_window_key(&key, rs),
1369 return SVN_NO_ERROR;
1372 /* Read the WINDOW_P for the rep state RS from the current FSFS session's
1373 * cache. This will be a no-op and IS_CACHED will be set to FALSE if no
1374 * cache has been given. If a cache is available IS_CACHED will inform
1375 * the caller about the success of the lookup. Allocations (of the window
1376 * in particular) will be made from POOL.
1378 static svn_error_t *
1379 get_cached_combined_window(svn_stringbuf_t **window_p,
1381 svn_boolean_t *is_cached,
1384 if (! rs->combined_cache)
1386 /* txdelta window has not been enabled */
1391 /* ask the cache for the desired txdelta window */
1392 window_cache_key_t key = { 0 };
1393 return svn_cache__get((void **)window_p,
1396 get_window_key(&key, rs),
1400 return SVN_NO_ERROR;
1403 /* Store the WINDOW read for the rep state RS in the current FSFS session's
1404 * cache. This will be a no-op if no cache has been given.
1405 * Temporary allocations will be made from SCRATCH_POOL. */
1406 static svn_error_t *
1407 set_cached_combined_window(svn_stringbuf_t *window,
1409 apr_pool_t *scratch_pool)
1411 if (rs->combined_cache)
1413 /* but key it with the start offset because that is the known state
1414 * when we will look it up */
1415 window_cache_key_t key = { 0 };
1416 return svn_cache__set(rs->combined_cache,
1417 get_window_key(&key, rs),
1422 return SVN_NO_ERROR;
1425 /* Build an array of rep_state structures in *LIST giving the delta
1426 reps from first_rep to a plain-text or self-compressed rep. Set
1427 *SRC_STATE to the plain-text rep we find at the end of the chain,
1428 or to NULL if the final delta representation is self-compressed.
1429 The representation to start from is designated by filesystem FS, id
1430 ID, and representation REP.
1431 Also, set *WINDOW_P to the base window content for *LIST, if it
1432 could be found in cache. Otherwise, *LIST will contain the base
1433 representation for the whole delta chain. */
1434 static svn_error_t *
1435 build_rep_list(apr_array_header_t **list,
1436 svn_stringbuf_t **window_p,
1437 rep_state_t **src_state,
1439 representation_t *first_rep,
1442 representation_t rep;
1443 rep_state_t *rs = NULL;
1444 svn_fs_fs__rep_header_t *rep_header;
1445 svn_boolean_t is_cached = FALSE;
1446 shared_file_t *shared_file = NULL;
1447 apr_pool_t *iterpool = svn_pool_create(pool);
1449 *list = apr_array_make(pool, 1, sizeof(rep_state_t *));
1452 /* for the top-level rep, we need the rep_args */
1453 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs, pool,
1457 svn_pool_clear(iterpool);
1459 /* fetch state, if that has not been done already */
1461 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file,
1462 &rep, fs, pool, iterpool));
1464 /* for txn reps, there won't be a cached combined window */
1465 if ( !svn_fs_fs__id_txn_used(&rep.txn_id)
1466 && rep.expanded_size < SVN_DELTA_WINDOW_SIZE)
1467 SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached, pool));
1471 /* We already have a reconstructed window in our cache.
1472 Write a pseudo rep_state with the full length. */
1475 rs->size = (*window_p)->len;
1480 if (rep_header->type == svn_fs_fs__rep_plain)
1482 /* This is a plaintext, so just return the current rep_state. */
1487 /* Push this rep onto the list. If it's self-compressed, we're done. */
1488 APR_ARRAY_PUSH(*list, rep_state_t *) = rs;
1489 if (rep_header->type == svn_fs_fs__rep_self_delta)
1495 rep.revision = rep_header->base_revision;
1496 rep.item_index = rep_header->base_item_index;
1497 rep.size = rep_header->base_length;
1498 svn_fs_fs__id_txn_reset(&rep.txn_id);
1502 svn_pool_destroy(iterpool);
1504 return SVN_NO_ERROR;
1508 /* Create a rep_read_baton structure for node revision NODEREV in
1509 filesystem FS and store it in *RB_P. Perform all allocations in
1510 POOL. If rep is mutable, it must be for file contents. */
1511 static svn_error_t *
1512 rep_read_get_baton(struct rep_read_baton **rb_p,
1514 representation_t *rep,
1515 pair_cache_key_t fulltext_cache_key,
1518 struct rep_read_baton *b;
1520 b = apr_pcalloc(pool, sizeof(*b));
1523 b->base_window = NULL;
1526 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
1527 b->checksum_finalized = FALSE;
1528 memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest));
1529 b->len = rep->expanded_size;
1531 b->fulltext_cache_key = fulltext_cache_key;
1532 b->pool = svn_pool_create(pool);
1533 b->filehandle_pool = svn_pool_create(pool);
1534 b->fulltext_cache = NULL;
1535 b->fulltext_delivered = 0;
1536 b->current_fulltext = NULL;
1538 /* Save our output baton. */
1541 return SVN_NO_ERROR;
1544 /* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
1545 window into *NWIN. Note that RS->CHUNK_INDEX will be THIS_CHUNK rather
1546 than THIS_CHUNK + 1 when this function returns. */
1547 static svn_error_t *
1548 read_delta_window(svn_txdelta_window_t **nwin, int this_chunk,
1549 rep_state_t *rs, apr_pool_t *result_pool,
1550 apr_pool_t *scratch_pool)
1552 svn_boolean_t is_cached;
1553 apr_off_t start_offset;
1554 apr_off_t end_offset;
1555 apr_pool_t *iterpool;
1557 SVN_ERR_ASSERT(rs->chunk_index <= this_chunk);
1559 SVN_ERR(dbg_log_access(rs->sfile->fs, rs->revision, rs->item_index,
1560 NULL, SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
1562 /* Read the next window. But first, try to find it in the cache. */
1563 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1564 result_pool, scratch_pool));
1566 return SVN_NO_ERROR;
1568 /* someone has to actually read the data from file. Open it */
1569 SVN_ERR(auto_open_shared_file(rs->sfile));
1571 /* invoke the 'block-read' feature for non-txn data.
1572 However, don't do that if we are in the middle of some representation,
1573 because the block is unlikely to contain other data. */
1574 if ( rs->chunk_index == 0
1575 && SVN_IS_VALID_REVNUM(rs->revision)
1576 && use_block_read(rs->sfile->fs)
1577 && rs->raw_window_cache)
1579 SVN_ERR(block_read(NULL, rs->sfile->fs, rs->revision, rs->item_index,
1580 rs->sfile->rfile, result_pool, scratch_pool));
1582 /* reading the whole block probably also provided us with the
1583 desired txdelta window */
1584 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1585 result_pool, scratch_pool));
1587 return SVN_NO_ERROR;
1590 /* data is still not cached -> we need to read it.
1591 Make sure we have all the necessary info. */
1592 SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1593 SVN_ERR(auto_read_diff_version(rs, scratch_pool));
1595 /* RS->FILE may be shared between RS instances -> make sure we point
1596 * to the right data. */
1597 start_offset = rs->start + rs->current;
1598 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, scratch_pool));
1600 /* Skip windows to reach the current chunk if we aren't there yet. */
1601 iterpool = svn_pool_create(scratch_pool);
1602 while (rs->chunk_index < this_chunk)
1604 svn_pool_clear(iterpool);
1605 SVN_ERR(svn_txdelta_skip_svndiff_window(rs->sfile->rfile->file,
1606 rs->ver, iterpool));
1608 SVN_ERR(get_file_offset(&start_offset, rs, iterpool));
1609 rs->current = start_offset - rs->start;
1610 if (rs->current >= rs->size)
1611 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1612 _("Reading one svndiff window read "
1613 "beyond the end of the "
1616 svn_pool_destroy(iterpool);
1618 /* Actually read the next window. */
1619 SVN_ERR(svn_txdelta_read_svndiff_window(nwin, rs->sfile->rfile->stream,
1620 rs->ver, result_pool));
1621 SVN_ERR(get_file_offset(&end_offset, rs, scratch_pool));
1622 rs->current = end_offset - rs->start;
1623 if (rs->current > rs->size)
1624 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1625 _("Reading one svndiff window read beyond "
1626 "the end of the representation"));
1628 /* the window has not been cached before, thus cache it now
1629 * (if caching is used for them at all) */
1630 if (SVN_IS_VALID_REVNUM(rs->revision))
1631 SVN_ERR(set_cached_window(*nwin, rs, scratch_pool));
1633 return SVN_NO_ERROR;
1636 /* Read SIZE bytes from the representation RS and return it in *NWIN. */
1637 static svn_error_t *
1638 read_plain_window(svn_stringbuf_t **nwin, rep_state_t *rs,
1639 apr_size_t size, apr_pool_t *result_pool,
1640 apr_pool_t *scratch_pool)
1644 /* RS->FILE may be shared between RS instances -> make sure we point
1645 * to the right data. */
1646 SVN_ERR(auto_open_shared_file(rs->sfile));
1647 SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1649 offset = rs->start + rs->current;
1650 SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
1652 /* Read the plain data. */
1653 *nwin = svn_stringbuf_create_ensure(size, result_pool);
1654 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, (*nwin)->data, size,
1655 NULL, NULL, result_pool));
1656 (*nwin)->data[size] = 0;
1659 rs->current += (apr_off_t)size;
1661 return SVN_NO_ERROR;
1664 /* Skip SIZE bytes from the PLAIN representation RS. */
1665 static svn_error_t *
1666 skip_plain_window(rep_state_t *rs,
1670 rs->current += (apr_off_t)size;
1672 return SVN_NO_ERROR;
1675 /* Get the undeltified window that is a result of combining all deltas
1676 from the current desired representation identified in *RB with its
1677 base representation. Store the window in *RESULT. */
1678 static svn_error_t *
1679 get_combined_window(svn_stringbuf_t **result,
1680 struct rep_read_baton *rb)
1682 apr_pool_t *pool, *new_pool, *window_pool;
1684 apr_array_header_t *windows;
1685 svn_stringbuf_t *source, *buf = rb->base_window;
1687 apr_pool_t *iterpool;
1689 /* Read all windows that we need to combine. This is fine because
1690 the size of each window is relatively small (100kB) and skip-
1691 delta limits the number of deltas in a chain to well under 100.
1692 Stop early if one of them does not depend on its predecessors. */
1693 window_pool = svn_pool_create(rb->pool);
1694 windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *));
1695 iterpool = svn_pool_create(rb->pool);
1696 for (i = 0; i < rb->rs_list->nelts; ++i)
1698 svn_txdelta_window_t *window;
1700 svn_pool_clear(iterpool);
1702 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1703 SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool,
1706 APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window;
1707 if (window->src_ops == 0)
1714 /* Combine in the windows from the other delta reps. */
1715 pool = svn_pool_create(rb->pool);
1716 for (--i; i >= 0; --i)
1718 svn_txdelta_window_t *window;
1720 svn_pool_clear(iterpool);
1722 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1723 window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
1725 /* Maybe, we've got a PLAIN start representation. If we do, read
1726 as much data from it as the needed for the txdelta window's source
1728 Note that BUF / SOURCE may only be NULL in the first iteration.
1729 Also note that we may have short-cut reading the delta chain --
1730 in which case SRC_OPS is 0 and it might not be a PLAIN rep. */
1732 if (source == NULL && rb->src_state != NULL)
1734 /* Even if we don't need the source rep now, we still must keep
1735 * its read offset in sync with what we might need for the next
1737 if (window->src_ops)
1738 SVN_ERR(read_plain_window(&source, rb->src_state,
1742 SVN_ERR(skip_plain_window(rb->src_state, window->sview_len));
1745 /* Combine this window with the current one. */
1746 new_pool = svn_pool_create(rb->pool);
1747 buf = svn_stringbuf_create_ensure(window->tview_len, new_pool);
1748 buf->len = window->tview_len;
1750 svn_txdelta_apply_instructions(window, source ? source->data : NULL,
1751 buf->data, &buf->len);
1752 if (buf->len != window->tview_len)
1753 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1754 _("svndiff window length is "
1757 /* Cache windows only if the whole rep content could be read as a
1758 single chunk. Only then will no other chunk need a deeper RS
1759 list than the cached chunk. */
1760 if ( (rb->chunk_index == 0) && (rs->current == rs->size)
1761 && SVN_IS_VALID_REVNUM(rs->revision))
1762 SVN_ERR(set_cached_combined_window(buf, rs, new_pool));
1766 /* Cycle pools so that we only need to hold three windows at a time. */
1767 svn_pool_destroy(pool);
1770 svn_pool_destroy(iterpool);
1772 svn_pool_destroy(window_pool);
1775 return SVN_NO_ERROR;
1778 /* Returns whether or not the expanded fulltext of the file is cachable
1779 * based on its size SIZE. The decision depends on the cache used by FFD.
1781 static svn_boolean_t
1782 fulltext_size_is_cachable(fs_fs_data_t *ffd, svn_filesize_t size)
1784 return (size < APR_SIZE_MAX)
1785 && svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size);
1788 /* Close method used on streams returned by read_representation().
1790 static svn_error_t *
1791 rep_read_contents_close(void *baton)
1793 struct rep_read_baton *rb = baton;
1795 svn_pool_destroy(rb->pool);
1796 svn_pool_destroy(rb->filehandle_pool);
1798 return SVN_NO_ERROR;
1801 /* Return the next *LEN bytes of the rep from our plain / delta windows
1802 and store them in *BUF. */
1803 static svn_error_t *
1804 get_contents_from_windows(struct rep_read_baton *rb,
1808 apr_size_t copy_len, remaining = *len;
1812 /* Special case for when there are no delta reps, only a plain
1814 if (rb->rs_list->nelts == 0)
1816 copy_len = remaining;
1819 if (rb->base_window != NULL)
1821 /* We got the desired rep directly from the cache.
1822 This is where we need the pseudo rep_state created
1823 by build_rep_list(). */
1824 apr_size_t offset = (apr_size_t)rs->current;
1825 if (offset >= rb->base_window->len)
1827 else if (copy_len > rb->base_window->len - offset)
1828 copy_len = rb->base_window->len - offset;
1830 memcpy (cur, rb->base_window->data + offset, copy_len);
1835 if (((apr_off_t) copy_len) > rs->size - rs->current)
1836 copy_len = (apr_size_t) (rs->size - rs->current);
1838 SVN_ERR(auto_open_shared_file(rs->sfile));
1839 SVN_ERR(auto_set_start_offset(rs, rb->pool));
1841 offset = rs->start + rs->current;
1842 SVN_ERR(rs_aligned_seek(rs, NULL, offset, rb->pool));
1843 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, cur,
1844 copy_len, NULL, NULL, rb->pool));
1847 rs->current += copy_len;
1849 return SVN_NO_ERROR;
1852 while (remaining > 0)
1854 /* If we have buffered data from a previous chunk, use that. */
1857 /* Determine how much to copy from the buffer. */
1858 copy_len = rb->buf_len - rb->buf_pos;
1859 if (copy_len > remaining)
1860 copy_len = remaining;
1862 /* Actually copy the data. */
1863 memcpy(cur, rb->buf + rb->buf_pos, copy_len);
1864 rb->buf_pos += copy_len;
1866 remaining -= copy_len;
1868 /* If the buffer is all used up, clear it and empty the
1870 if (rb->buf_pos == rb->buf_len)
1872 svn_pool_clear(rb->pool);
1878 svn_stringbuf_t *sbuf = NULL;
1880 rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *);
1881 if (rs->current == rs->size)
1884 /* Get more buffered data by evaluating a chunk. */
1885 SVN_ERR(get_combined_window(&sbuf, rb));
1888 rb->buf_len = sbuf->len;
1889 rb->buf = sbuf->data;
1896 return SVN_NO_ERROR;
1899 /* Baton type for get_fulltext_partial. */
1900 typedef struct fulltext_baton_t
1902 /* Target buffer to write to; of at least LEN bytes. */
1905 /* Offset within the respective fulltext at which we shall start to
1906 copy data into BUFFER. */
1909 /* Number of bytes to copy. The actual amount may be less in case
1910 the fulltext is short(er). */
1913 /* Number of bytes actually copied into BUFFER. */
1917 /* Implement svn_cache__partial_getter_func_t for fulltext caches.
1918 * From the fulltext in DATA, we copy the range specified by the
1919 * fulltext_baton_t* BATON into the buffer provided by that baton.
1920 * OUT and RESULT_POOL are not used.
1922 static svn_error_t *
1923 get_fulltext_partial(void **out,
1925 apr_size_t data_len,
1927 apr_pool_t *result_pool)
1929 fulltext_baton_t *fulltext_baton = baton;
1931 /* We cached the fulltext with an NUL appended to it. */
1932 apr_size_t fulltext_len = data_len - 1;
1934 /* Clip the copy range to what the fulltext size allows. */
1935 apr_size_t start = MIN(fulltext_baton->start, fulltext_len);
1936 fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len);
1938 /* Copy the data to the output buffer and be done. */
1939 memcpy(fulltext_baton->buffer, (const char *)data + start,
1940 fulltext_baton->read);
1942 return SVN_NO_ERROR;
1945 /* Find the fulltext specified in BATON in the fulltext cache given
1946 * as well by BATON. If that succeeds, set *CACHED to TRUE and copy
1947 * up to the next *LEN bytes into BUFFER. Set *LEN to the actual
1948 * number of bytes copied.
1950 static svn_error_t *
1951 get_contents_from_fulltext(svn_boolean_t *cached,
1952 struct rep_read_baton *baton,
1957 fulltext_baton_t fulltext_baton;
1959 SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered
1960 == baton->fulltext_delivered);
1961 fulltext_baton.buffer = buffer;
1962 fulltext_baton.start = (apr_size_t)baton->fulltext_delivered;
1963 fulltext_baton.len = *len;
1964 fulltext_baton.read = 0;
1966 SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache,
1967 &baton->fulltext_cache_key,
1968 get_fulltext_partial, &fulltext_baton,
1973 baton->fulltext_delivered += fulltext_baton.read;
1974 *len = fulltext_baton.read;
1977 return SVN_NO_ERROR;
1980 /* Determine the optimal size of a string buf that shall receive a
1981 * (full-) text of NEEDED bytes.
1983 * The critical point is that those buffers may be very large and
1984 * can cause memory fragmentation. We apply simple heuristics to
1985 * make fragmentation less likely.
1988 optimimal_allocation_size(apr_size_t needed)
1990 /* For all allocations, assume some overhead that is shared between
1991 * OS memory managemnt, APR memory management and svn_stringbuf_t. */
1992 const apr_size_t overhead = 0x400;
1995 /* If an allocation size if safe for other ephemeral buffers, it should
1996 * be safe for ours. */
1997 if (needed <= SVN__STREAM_CHUNK_SIZE)
2000 /* Paranoia edge case:
2001 * Skip our heuristics if they created arithmetical overflow.
2002 * Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */
2003 if (needed >= APR_SIZE_MAX / 2 - overhead)
2006 /* As per definition SVN__STREAM_CHUNK_SIZE is a power of two.
2007 * Since we know NEEDED to be larger than that, use it as the
2010 * Heuristics: Allocate a power-of-two number of bytes that fit
2011 * NEEDED plus some OVERHEAD. The APR allocator
2012 * will round it up to the next full page size.
2014 optimal = SVN__STREAM_CHUNK_SIZE;
2015 while (optimal - overhead < needed)
2018 /* This is above or equal to NEEDED. */
2019 return optimal - overhead;
2022 /* After a fulltext cache lookup failure, we will continue to read from
2023 * combined delta or plain windows. However, we must first make that data
2024 * stream in BATON catch up tho the position LEN already delivered from the
2025 * fulltext cache. Also, we need to store the reconstructed fulltext if we
2026 * want to cache it at the end.
2028 static svn_error_t *
2029 skip_contents(struct rep_read_baton *baton,
2032 svn_error_t *err = SVN_NO_ERROR;
2034 /* Do we want to cache the reconstructed fulltext? */
2035 if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision))
2038 svn_filesize_t to_alloc = MAX(len, baton->len);
2040 /* This should only be happening if BATON->LEN and LEN are
2041 * cacheable, implying they fit into memory. */
2042 SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc);
2044 /* Allocate the fulltext buffer. */
2045 baton->current_fulltext = svn_stringbuf_create_ensure(
2046 optimimal_allocation_size((apr_size_t)to_alloc),
2047 baton->filehandle_pool);
2049 /* Read LEN bytes from the window stream and store the data
2050 * in the fulltext buffer (will be filled by further reads later). */
2051 baton->current_fulltext->len = (apr_size_t)len;
2052 baton->current_fulltext->data[(apr_size_t)len] = 0;
2054 buffer = baton->current_fulltext->data;
2055 while (len > 0 && !err)
2057 apr_size_t to_read = (apr_size_t)len;
2058 err = get_contents_from_windows(baton, buffer, &to_read);
2063 /* Make the MD5 calculation catch up with the data delivered
2064 * (we did not run MD5 on the data that we took from the cache). */
2067 SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
2068 baton->current_fulltext->data,
2069 baton->current_fulltext->len));
2070 baton->off += baton->current_fulltext->len;
2075 /* Simply drain LEN bytes from the window stream. */
2076 apr_pool_t *subpool = svn_pool_create(baton->pool);
2077 char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
2079 while (len > 0 && !err)
2081 apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE
2082 ? SVN__STREAM_CHUNK_SIZE
2085 err = get_contents_from_windows(baton, buffer, &to_read);
2088 /* Make the MD5 calculation catch up with the data delivered
2089 * (we did not run MD5 on the data that we took from the cache). */
2092 SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
2094 baton->off += to_read;
2098 svn_pool_destroy(subpool);
2101 return svn_error_trace(err);
2104 /* BATON is of type `rep_read_baton'; read the next *LEN bytes of the
2105 representation and store them in *BUF. Sum as we read and verify
2106 the MD5 sum at the end. */
2107 static svn_error_t *
2108 rep_read_contents(void *baton,
2112 struct rep_read_baton *rb = baton;
2114 /* Get data from the fulltext cache for as long as we can. */
2115 if (rb->fulltext_cache)
2117 svn_boolean_t cached;
2118 SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len));
2120 return SVN_NO_ERROR;
2122 /* Cache miss. From now on, we will never read from the fulltext
2123 * cache for this representation anymore. */
2124 rb->fulltext_cache = NULL;
2127 /* No fulltext cache to help us. We must read from the window stream. */
2130 /* Window stream not initialized, yet. Do it now. */
2131 rb->len = rb->rep.expanded_size;
2132 SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2133 &rb->src_state, rb->fs, &rb->rep,
2134 rb->filehandle_pool));
2136 /* In case we did read from the fulltext cache before, make the
2137 * window stream catch up. Also, initialize the fulltext buffer
2138 * if we want to cache the fulltext at the end. */
2139 SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
2142 /* Get the next block of data.
2143 * Keep in mind that the representation might be empty and leave us
2144 * already positioned at the end of the rep. */
2145 if (rb->off == rb->len)
2148 SVN_ERR(get_contents_from_windows(rb, buf, len));
2150 if (rb->current_fulltext)
2151 svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
2153 /* Perform checksumming. We want to check the checksum as soon as
2154 the last byte of data is read, in case the caller never performs
2155 a short read, but we don't want to finalize the MD5 context
2157 if (!rb->checksum_finalized)
2159 SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len));
2161 if (rb->off == rb->len)
2163 svn_checksum_t *md5_checksum;
2164 svn_checksum_t expected;
2165 expected.kind = svn_checksum_md5;
2166 expected.digest = rb->md5_digest;
2168 rb->checksum_finalized = TRUE;
2169 SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx,
2171 if (!svn_checksum_match(md5_checksum, &expected))
2172 return svn_error_create(SVN_ERR_FS_CORRUPT,
2173 svn_checksum_mismatch_err(&expected, md5_checksum,
2175 _("Checksum mismatch while reading representation")),
2180 if (rb->off == rb->len && rb->current_fulltext)
2182 fs_fs_data_t *ffd = rb->fs->fsap_data;
2183 SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key,
2184 rb->current_fulltext, rb->pool));
2185 rb->current_fulltext = NULL;
2188 return SVN_NO_ERROR;
2192 svn_fs_fs__get_contents(svn_stream_t **contents_p,
2194 representation_t *rep,
2195 svn_boolean_t cache_fulltext,
2200 *contents_p = svn_stream_empty(pool);
2204 fs_fs_data_t *ffd = fs->fsap_data;
2205 struct rep_read_baton *rb;
2207 pair_cache_key_t fulltext_cache_key = { 0 };
2208 fulltext_cache_key.revision = rep->revision;
2209 fulltext_cache_key.second = rep->item_index;
2211 /* Initialize the reader baton. Some members may added lazily
2212 * while reading from the stream */
2213 SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
2215 /* Make the stream attempt fulltext cache lookups if the fulltext
2216 * is cacheable. If it is not, then also don't try to buffer and
2218 if (ffd->fulltext_cache && cache_fulltext
2219 && SVN_IS_VALID_REVNUM(rep->revision)
2220 && fulltext_size_is_cachable(ffd, rep->expanded_size))
2222 rb->fulltext_cache = ffd->fulltext_cache;
2226 /* This will also prevent the reconstructed fulltext from being
2227 put into the cache. */
2228 rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM;
2231 *contents_p = svn_stream_create(rb, pool);
2232 svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2234 svn_stream_set_close(*contents_p, rep_read_contents_close);
2237 return SVN_NO_ERROR;
2241 svn_fs_fs__get_contents_from_file(svn_stream_t **contents_p,
2243 representation_t *rep,
2248 struct rep_read_baton *rb;
2249 pair_cache_key_t fulltext_cache_key = { SVN_INVALID_REVNUM, 0 };
2250 rep_state_t *rs = apr_pcalloc(pool, sizeof(*rs));
2251 svn_fs_fs__rep_header_t *rh;
2253 /* Initialize the reader baton. Some members may added lazily
2254 * while reading from the stream. */
2255 SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
2257 /* Continue constructing RS. Leave caches as NULL. */
2258 rs->size = rep->size;
2259 rs->revision = SVN_INVALID_REVNUM;
2264 /* Provide just enough file access info to allow for a basic read from
2265 * FILE but leave all index / footer info with empty values b/c FILE
2266 * probably is not a complete revision file. */
2267 rs->sfile = apr_pcalloc(pool, sizeof(*rs->sfile));
2268 rs->sfile->revision = rep->revision;
2269 rs->sfile->pool = pool;
2271 rs->sfile->rfile = apr_pcalloc(pool, sizeof(*rs->sfile->rfile));
2272 rs->sfile->rfile->start_revision = SVN_INVALID_REVNUM;
2273 rs->sfile->rfile->file = file;
2274 rs->sfile->rfile->stream = svn_stream_from_aprfile2(file, TRUE, pool);
2276 /* Read the rep header. */
2277 SVN_ERR(aligned_seek(fs, file, NULL, offset, pool));
2278 SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
2280 SVN_ERR(get_file_offset(&rs->start, rs, pool));
2281 rs->header_size = rh->header_size;
2283 /* Log the access. */
2284 SVN_ERR(dbg_log_access(fs, SVN_INVALID_REVNUM, 0, rh,
2285 SVN_FS_FS__ITEM_TYPE_ANY_REP, pool));
2287 /* Build the representation list (delta chain). */
2288 if (rh->type == svn_fs_fs__rep_plain)
2290 rb->rs_list = apr_array_make(pool, 0, sizeof(rep_state_t *));
2293 else if (rh->type == svn_fs_fs__rep_self_delta)
2295 rb->rs_list = apr_array_make(pool, 1, sizeof(rep_state_t *));
2296 APR_ARRAY_PUSH(rb->rs_list, rep_state_t *) = rs;
2297 rb->src_state = NULL;
2301 representation_t next_rep = { 0 };
2303 /* skip "SVNx" diff marker */
2306 /* REP's base rep is inside a proper revision.
2307 * It can be reconstructed in the usual way. */
2308 next_rep.revision = rh->base_revision;
2309 next_rep.item_index = rh->base_item_index;
2310 next_rep.size = rh->base_length;
2311 svn_fs_fs__id_txn_reset(&next_rep.txn_id);
2313 SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2314 &rb->src_state, rb->fs, &next_rep,
2315 rb->filehandle_pool));
2317 /* Insert the access to REP as the first element of the delta chain. */
2318 svn_sort__array_insert(rb->rs_list, &rs, 0);
2321 /* Now, the baton is complete and we can assemble the stream around it. */
2322 *contents_p = svn_stream_create(rb, pool);
2323 svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2325 svn_stream_set_close(*contents_p, rep_read_contents_close);
2327 return SVN_NO_ERROR;
2330 /* Baton for cache_access_wrapper. Wraps the original parameters of
2331 * svn_fs_fs__try_process_file_content().
2333 typedef struct cache_access_wrapper_baton_t
2335 svn_fs_process_contents_func_t func;
2337 } cache_access_wrapper_baton_t;
2339 /* Wrapper to translate between svn_fs_process_contents_func_t and
2340 * svn_cache__partial_getter_func_t.
2342 static svn_error_t *
2343 cache_access_wrapper(void **out,
2345 apr_size_t data_len,
2349 cache_access_wrapper_baton_t *wrapper_baton = baton;
2351 SVN_ERR(wrapper_baton->func((const unsigned char *)data,
2352 data_len - 1, /* cache adds terminating 0 */
2353 wrapper_baton->baton,
2356 /* non-NULL value to signal the calling cache that all went well */
2359 return SVN_NO_ERROR;
2363 svn_fs_fs__try_process_file_contents(svn_boolean_t *success,
2365 node_revision_t *noderev,
2366 svn_fs_process_contents_func_t processor,
2370 representation_t *rep = noderev->data_rep;
2373 fs_fs_data_t *ffd = fs->fsap_data;
2374 pair_cache_key_t fulltext_cache_key = { 0 };
2376 fulltext_cache_key.revision = rep->revision;
2377 fulltext_cache_key.second = rep->item_index;
2378 if (ffd->fulltext_cache && SVN_IS_VALID_REVNUM(rep->revision)
2379 && fulltext_size_is_cachable(ffd, rep->expanded_size))
2381 cache_access_wrapper_baton_t wrapper_baton;
2384 wrapper_baton.func = processor;
2385 wrapper_baton.baton = baton;
2386 return svn_cache__get_partial(&dummy, success,
2387 ffd->fulltext_cache,
2388 &fulltext_cache_key,
2389 cache_access_wrapper,
2396 return SVN_NO_ERROR;
2400 /* Baton used when reading delta windows. */
2401 struct delta_read_baton
2404 unsigned char md5_digest[APR_MD5_DIGESTSIZE];
2407 /* This implements the svn_txdelta_next_window_fn_t interface. */
2408 static svn_error_t *
2409 delta_read_next_window(svn_txdelta_window_t **window, void *baton,
2412 struct delta_read_baton *drb = baton;
2413 apr_pool_t *scratch_pool = svn_pool_create(pool);
2416 if (drb->rs->current < drb->rs->size)
2418 SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool,
2420 drb->rs->chunk_index++;
2423 svn_pool_destroy(scratch_pool);
2425 return SVN_NO_ERROR;
2428 /* This implements the svn_txdelta_md5_digest_fn_t interface. */
2429 static const unsigned char *
2430 delta_read_md5_digest(void *baton)
2432 struct delta_read_baton *drb = baton;
2433 return drb->md5_digest;
2436 /* Return a txdelta stream for on-disk representation REP_STATE
2437 * of TARGET. Allocate the result in POOL.
2439 static svn_txdelta_stream_t *
2440 get_storaged_delta_stream(rep_state_t *rep_state,
2441 node_revision_t *target,
2444 /* Create the delta read baton. */
2445 struct delta_read_baton *drb = apr_pcalloc(pool, sizeof(*drb));
2446 drb->rs = rep_state;
2447 memcpy(drb->md5_digest, target->data_rep->md5_digest,
2448 sizeof(drb->md5_digest));
2449 return svn_txdelta_stream_create(drb, delta_read_next_window,
2450 delta_read_md5_digest, pool);
2454 svn_fs_fs__get_file_delta_stream(svn_txdelta_stream_t **stream_p,
2456 node_revision_t *source,
2457 node_revision_t *target,
2460 svn_stream_t *source_stream, *target_stream;
2461 rep_state_t *rep_state;
2462 svn_fs_fs__rep_header_t *rep_header;
2463 fs_fs_data_t *ffd = fs->fsap_data;
2465 /* Try a shortcut: if the target is stored as a delta against the source,
2466 then just use that delta. However, prefer using the fulltext cache
2467 whenever that is available. */
2468 if (target->data_rep && (source || ! ffd->fulltext_cache))
2470 /* Read target's base rep if any. */
2471 SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL,
2472 target->data_rep, fs, pool, pool));
2474 if (source && source->data_rep && target->data_rep)
2476 /* If that matches source, then use this delta as is.
2477 Note that we want an actual delta here. E.g. a self-delta would
2478 not be good enough. */
2479 if (rep_header->type == svn_fs_fs__rep_delta
2480 && rep_header->base_revision == source->data_rep->revision
2481 && rep_header->base_item_index == source->data_rep->item_index)
2483 *stream_p = get_storaged_delta_stream(rep_state, target, pool);
2484 return SVN_NO_ERROR;
2489 /* We want a self-delta. There is a fair chance that TARGET got
2490 added in this revision and is already stored in the requested
2492 if (rep_header->type == svn_fs_fs__rep_self_delta)
2494 *stream_p = get_storaged_delta_stream(rep_state, target, pool);
2495 return SVN_NO_ERROR;
2499 /* Don't keep file handles open for longer than necessary. */
2500 if (rep_state->sfile->rfile)
2502 SVN_ERR(svn_fs_fs__close_revision_file(rep_state->sfile->rfile));
2503 rep_state->sfile->rfile = NULL;
2507 /* Read both fulltexts and construct a delta. */
2509 SVN_ERR(svn_fs_fs__get_contents(&source_stream, fs, source->data_rep,
2512 source_stream = svn_stream_empty(pool);
2513 SVN_ERR(svn_fs_fs__get_contents(&target_stream, fs, target->data_rep,
2516 /* Because source and target stream will already verify their content,
2517 * there is no need to do this once more. In particular if the stream
2518 * content is being fetched from cache. */
2519 svn_txdelta2(stream_p, source_stream, target_stream, FALSE, pool);
2521 return SVN_NO_ERROR;
2524 /* Return TRUE when all svn_fs_dirent_t* in ENTRIES are already sorted
2525 by their respective name. */
2526 static svn_boolean_t
2527 sorted(apr_array_header_t *entries)
2531 const svn_fs_dirent_t * const *dirents = (const void *)entries->elts;
2532 for (i = 0; i < entries->nelts-1; ++i)
2533 if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
2539 /* Compare the names of the two dirents given in **A and **B. */
2541 compare_dirents(const void *a, const void *b)
2543 const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
2544 const svn_fs_dirent_t *rhs = *((const svn_fs_dirent_t * const *) b);
2546 return strcmp(lhs->name, rhs->name);
2549 /* Compare the name of the dirents given in **A with the C string in *B. */
2551 compare_dirent_name(const void *a, const void *b)
2553 const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
2554 const char *rhs = b;
2556 return strcmp(lhs->name, rhs);
2559 /* Into *ENTRIES_P, read all directories entries from the key-value text in
2560 * STREAM. If INCREMENTAL is TRUE, read until the end of the STREAM and
2561 * update the data. ID is provided for nicer error messages.
2563 static svn_error_t *
2564 read_dir_entries(apr_array_header_t **entries_p,
2565 svn_stream_t *stream,
2566 svn_boolean_t incremental,
2567 const svn_fs_id_t *id,
2568 apr_pool_t *result_pool,
2569 apr_pool_t *scratch_pool)
2571 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2572 apr_hash_t *hash = NULL;
2573 const char *terminator = SVN_HASH_TERMINATOR;
2574 apr_array_header_t *entries = NULL;
2577 hash = svn_hash__make(scratch_pool);
2579 entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *));
2581 /* Read until the terminator (non-incremental) or the end of STREAM
2582 (incremental mode). In the latter mode, we use a temporary HASH
2583 to make updating and removing entries cheaper. */
2586 svn_hash__entry_t entry;
2587 svn_fs_dirent_t *dirent;
2590 svn_pool_clear(iterpool);
2591 SVN_ERR_W(svn_hash__read_entry(&entry, stream, terminator,
2592 incremental, iterpool),
2593 apr_psprintf(iterpool,
2594 _("Directory representation corrupt in '%s'"),
2595 svn_fs_fs__id_unparse(id, scratch_pool)->data));
2597 /* End of directory? */
2598 if (entry.key == NULL)
2600 /* In incremental mode, we skip the terminator and read the
2601 increments following it until the end of the stream. */
2602 if (incremental && terminator)
2608 /* Deleted entry? */
2609 if (entry.val == NULL)
2611 /* We must be in incremental mode */
2613 apr_hash_set(hash, entry.key, entry.keylen, NULL);
2617 /* Add a new directory entry. */
2618 dirent = apr_pcalloc(result_pool, sizeof(*dirent));
2619 dirent->name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
2621 str = svn_cstring_tokenize(" ", &entry.val);
2623 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2624 _("Directory entry corrupt in '%s'"),
2625 svn_fs_fs__id_unparse(id, scratch_pool)->data);
2627 if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0)
2629 dirent->kind = svn_node_file;
2631 else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0)
2633 dirent->kind = svn_node_dir;
2637 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2638 _("Directory entry corrupt in '%s'"),
2639 svn_fs_fs__id_unparse(id, scratch_pool)->data);
2642 str = svn_cstring_tokenize(" ", &entry.val);
2644 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2645 _("Directory entry corrupt in '%s'"),
2646 svn_fs_fs__id_unparse(id, scratch_pool)->data);
2648 SVN_ERR(svn_fs_fs__id_parse(&dirent->id, str, result_pool));
2650 /* In incremental mode, update the hash; otherwise, write to the
2651 * final array. Be sure to use hash keys that survive this iteration.
2654 apr_hash_set(hash, dirent->name, entry.keylen, dirent);
2656 APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = dirent;
2659 /* Convert container to a sorted array. */
2662 apr_hash_index_t *hi;
2664 entries = apr_array_make(result_pool, apr_hash_count(hash),
2665 sizeof(svn_fs_dirent_t *));
2666 for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi))
2667 APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = apr_hash_this_val(hi);
2670 if (!sorted(entries))
2671 svn_sort__array(entries, compare_dirents);
2673 svn_pool_destroy(iterpool);
2675 *entries_p = entries;
2676 return SVN_NO_ERROR;
2679 /* For directory NODEREV in FS, return the *FILESIZE of its in-txn
2680 * representation. If the directory representation is comitted data,
2681 * set *FILESIZE to SVN_INVALID_FILESIZE. Use SCRATCH_POOL for temporaries.
2683 static svn_error_t *
2684 get_txn_dir_info(svn_filesize_t *filesize,
2686 node_revision_t *noderev,
2687 apr_pool_t *scratch_pool)
2689 if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
2691 const svn_io_dirent2_t *dirent;
2692 const char *filename;
2694 filename = svn_fs_fs__path_txn_node_children(fs, noderev->id,
2697 SVN_ERR(svn_io_stat_dirent2(&dirent, filename, FALSE, FALSE,
2698 scratch_pool, scratch_pool));
2699 *filesize = dirent->filesize;
2703 *filesize = SVN_INVALID_FILESIZE;
2706 return SVN_NO_ERROR;
2709 /* Fetch the contents of a directory into DIR. Values are stored
2710 as filename to string mappings; further conversion is necessary to
2711 convert them into svn_fs_dirent_t values. */
2712 static svn_error_t *
2713 get_dir_contents(svn_fs_fs__dir_data_t *dir,
2715 node_revision_t *noderev,
2716 apr_pool_t *result_pool,
2717 apr_pool_t *scratch_pool)
2719 svn_stream_t *contents;
2721 /* Initialize the result. */
2722 dir->txn_filesize = SVN_INVALID_FILESIZE;
2724 /* Read dir contents - unless there is none in which case we are done. */
2725 if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
2727 /* Get location & current size of the directory representation. */
2728 const char *filename;
2731 filename = svn_fs_fs__path_txn_node_children(fs, noderev->id,
2734 /* The representation is mutable. Read the old directory
2735 contents from the mutable children file, followed by the
2736 changes we've made in this transaction. */
2737 SVN_ERR(svn_io_file_open(&file, filename, APR_READ | APR_BUFFERED,
2738 APR_OS_DEFAULT, scratch_pool));
2740 /* Obtain txn children file size. */
2741 SVN_ERR(svn_io_file_size_get(&dir->txn_filesize, file, scratch_pool));
2743 contents = svn_stream_from_aprfile2(file, FALSE, scratch_pool);
2744 SVN_ERR(read_dir_entries(&dir->entries, contents, TRUE, noderev->id,
2745 result_pool, scratch_pool));
2746 SVN_ERR(svn_stream_close(contents));
2748 else if (noderev->data_rep)
2750 /* Undeltify content before parsing it. Otherwise, we could only
2751 * parse it byte-by-byte.
2753 apr_size_t len = noderev->data_rep->expanded_size;
2754 svn_stringbuf_t *text;
2756 /* The representation is immutable. Read it normally. */
2757 SVN_ERR(svn_fs_fs__get_contents(&contents, fs, noderev->data_rep,
2758 FALSE, scratch_pool));
2759 SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, scratch_pool));
2760 SVN_ERR(svn_stream_close(contents));
2762 /* de-serialize hash */
2763 contents = svn_stream_from_stringbuf(text, scratch_pool);
2764 SVN_ERR(read_dir_entries(&dir->entries, contents, FALSE, noderev->id,
2765 result_pool, scratch_pool));
2769 dir->entries = apr_array_make(result_pool, 0, sizeof(svn_fs_dirent_t *));
2772 return SVN_NO_ERROR;
2776 /* Return the cache object in FS responsible to storing the directory the
2777 * NODEREV plus the corresponding *KEY. If no cache exists, return NULL.
2778 * PAIR_KEY must point to some key struct, which does not need to be
2779 * initialized. We use it to avoid dynamic allocation.
2781 static svn_cache__t *
2782 locate_dir_cache(svn_fs_t *fs,
2784 pair_cache_key_t *pair_key,
2785 node_revision_t *noderev,
2788 fs_fs_data_t *ffd = fs->fsap_data;
2789 if (!noderev->data_rep)
2791 /* no data rep -> empty directory.
2792 A NULL key causes a cache miss. */
2794 return ffd->dir_cache;
2797 if (svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
2799 /* data in txns requires the expensive fs_id-based addressing mode */
2800 *key = svn_fs_fs__id_unparse(noderev->id, pool)->data;
2802 return ffd->txn_dir_cache;
2806 /* committed data can use simple rev,item pairs */
2807 pair_key->revision = noderev->data_rep->revision;
2808 pair_key->second = noderev->data_rep->item_index;
2811 return ffd->dir_cache;
2816 svn_fs_fs__rep_contents_dir(apr_array_header_t **entries_p,
2818 node_revision_t *noderev,
2819 apr_pool_t *result_pool,
2820 apr_pool_t *scratch_pool)
2822 pair_cache_key_t pair_key = { 0 };
2824 svn_fs_fs__dir_data_t *dir;
2826 /* find the cache we may use */
2827 svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
2831 svn_boolean_t found;
2833 SVN_ERR(svn_cache__get((void **)&dir, &found, cache, key,
2837 /* Verify that the cached dir info is not stale
2838 * (no-op for committed data). */
2839 svn_filesize_t filesize;
2840 SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
2842 if (filesize == dir->txn_filesize)
2844 /* Still valid. Done. */
2845 *entries_p = dir->entries;
2846 return SVN_NO_ERROR;
2851 /* Read in the directory contents. */
2852 dir = apr_pcalloc(scratch_pool, sizeof(*dir));
2853 SVN_ERR(get_dir_contents(dir, fs, noderev, result_pool, scratch_pool));
2854 *entries_p = dir->entries;
2856 /* Update the cache, if we are to use one.
2858 * Don't even attempt to serialize very large directories; it would cause
2859 * an unnecessary memory allocation peak. 150 bytes/entry is about right.
2861 if (cache && svn_cache__is_cachable(cache, 150 * dir->entries->nelts))
2862 SVN_ERR(svn_cache__set(cache, key, dir, scratch_pool));
2864 return SVN_NO_ERROR;
2868 svn_fs_fs__find_dir_entry(apr_array_header_t *entries,
2872 svn_fs_dirent_t **result
2873 = svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
2874 return result ? *result : NULL;
2878 svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
2880 node_revision_t *noderev,
2882 apr_pool_t *result_pool,
2883 apr_pool_t *scratch_pool)
2885 extract_dir_entry_baton_t baton;
2886 svn_boolean_t found = FALSE;
2888 /* find the cache we may use */
2889 pair_cache_key_t pair_key = { 0 };
2891 svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
2895 svn_filesize_t filesize;
2896 SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
2899 baton.txn_filesize = filesize;
2901 SVN_ERR(svn_cache__get_partial((void **)dirent,
2905 svn_fs_fs__extract_dir_entry,
2910 /* fetch data from disk if we did not find it in the cache */
2911 if (! found || baton.out_of_date)
2913 svn_fs_dirent_t *entry;
2914 svn_fs_dirent_t *entry_copy = NULL;
2915 svn_fs_fs__dir_data_t dir;
2917 /* Read in the directory contents. */
2918 SVN_ERR(get_dir_contents(&dir, fs, noderev, scratch_pool,
2921 /* Update the cache, if we are to use one.
2923 * Don't even attempt to serialize very large directories; it would
2924 * cause an unnecessary memory allocation peak. 150 bytes / entry is
2926 if (cache && svn_cache__is_cachable(cache, 150 * dir.entries->nelts))
2927 SVN_ERR(svn_cache__set(cache, key, &dir, scratch_pool));
2929 /* find desired entry and return a copy in POOL, if found */
2930 entry = svn_fs_fs__find_dir_entry(dir.entries, name, NULL);
2933 entry_copy = apr_palloc(result_pool, sizeof(*entry_copy));
2934 entry_copy->name = apr_pstrdup(result_pool, entry->name);
2935 entry_copy->id = svn_fs_fs__id_copy(entry->id, result_pool);
2936 entry_copy->kind = entry->kind;
2939 *dirent = entry_copy;
2942 return SVN_NO_ERROR;
2946 svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
2948 node_revision_t *noderev,
2951 apr_hash_t *proplist;
2952 svn_stream_t *stream;
2954 if (noderev->prop_rep && svn_fs_fs__id_txn_used(&noderev->prop_rep->txn_id))
2957 const char *filename
2958 = svn_fs_fs__path_txn_node_props(fs, noderev->id, pool);
2959 proplist = apr_hash_make(pool);
2961 SVN_ERR(svn_stream_open_readonly(&stream, filename, pool, pool));
2962 err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
2965 svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
2967 err = svn_error_compose_create(err, svn_stream_close(stream));
2968 return svn_error_quick_wrapf(err,
2969 _("malformed property list for node-revision '%s' in '%s'"),
2970 id_str->data, filename);
2972 SVN_ERR(svn_stream_close(stream));
2974 else if (noderev->prop_rep)
2977 fs_fs_data_t *ffd = fs->fsap_data;
2978 representation_t *rep = noderev->prop_rep;
2979 pair_cache_key_t key = { 0 };
2981 key.revision = rep->revision;
2982 key.second = rep->item_index;
2983 if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
2985 svn_boolean_t is_cached;
2986 SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached,
2987 ffd->properties_cache, &key, pool));
2989 return SVN_NO_ERROR;
2992 proplist = apr_hash_make(pool);
2993 SVN_ERR(svn_fs_fs__get_contents(&stream, fs, noderev->prop_rep, FALSE,
2995 err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
2998 svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
3000 err = svn_error_compose_create(err, svn_stream_close(stream));
3001 return svn_error_quick_wrapf(err,
3002 _("malformed property list for node-revision '%s'"),
3005 SVN_ERR(svn_stream_close(stream));
3007 if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
3008 SVN_ERR(svn_cache__set(ffd->properties_cache, &key, proplist, pool));
3012 /* return an empty prop list if the node doesn't have any props */
3013 proplist = apr_hash_make(pool);
3016 *proplist_p = proplist;
3018 return SVN_NO_ERROR;
3022 svn_fs_fs__create_changes_context(svn_fs_fs__changes_context_t **context,
3025 apr_pool_t *result_pool)
3027 svn_fs_fs__changes_context_t *result = apr_pcalloc(result_pool,
3030 result->revision = rev;
3031 result->rev_file_pool = result_pool;
3034 return SVN_NO_ERROR;
3038 svn_fs_fs__get_changes(apr_array_header_t **changes,
3039 svn_fs_fs__changes_context_t *context,
3040 apr_pool_t *result_pool,
3041 apr_pool_t *scratch_pool)
3043 apr_off_t item_index = SVN_FS_FS__ITEM_INDEX_CHANGES;
3044 svn_boolean_t found;
3045 fs_fs_data_t *ffd = context->fs->fsap_data;
3046 svn_fs_fs__changes_list_t *changes_list;
3048 pair_cache_key_t key;
3049 key.revision = context->revision;
3050 key.second = context->next;
3052 /* try cache lookup first */
3054 if (ffd->changes_cache)
3056 SVN_ERR(svn_cache__get((void **)&changes_list, &found,
3057 ffd->changes_cache, &key, result_pool));
3066 /* read changes from revision file */
3068 if (!context->revision_file)
3070 SVN_ERR(svn_fs_fs__ensure_revision_exists(context->revision,
3073 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&context->revision_file,
3076 context->rev_file_pool,
3080 if (use_block_read(context->fs))
3082 /* 'block-read' will probably populate the cache with the data
3083 * that we want. However, we won't want to force it to process
3084 * very large change lists as part of this prefetching mechanism.
3085 * Those would be better handled by the iterative code below. */
3086 SVN_ERR(block_read(NULL, context->fs,
3087 context->revision, SVN_FS_FS__ITEM_INDEX_CHANGES,
3088 context->revision_file, scratch_pool,
3091 /* This may succeed now ... */
3092 SVN_ERR(svn_cache__get((void **)&changes_list, &found,
3093 ffd->changes_cache, &key, result_pool));
3096 /* If we still have no data, read it here. */
3099 apr_off_t changes_offset;
3101 /* Addressing is very different for old formats
3102 * (needs to read the revision trailer). */
3103 if (svn_fs_fs__use_log_addressing(context->fs))
3105 SVN_ERR(svn_fs_fs__item_offset(&changes_offset, context->fs,
3106 context->revision_file,
3107 context->revision, NULL,
3108 SVN_FS_FS__ITEM_INDEX_CHANGES,
3113 SVN_ERR(get_root_changes_offset(NULL, &changes_offset,
3114 context->revision_file,
3115 context->fs, context->revision,
3118 /* This variable will be used for debug logging only. */
3119 item_index = changes_offset;
3122 /* Actual reading and parsing are the same, though. */
3123 SVN_ERR(aligned_seek(context->fs, context->revision_file->file,
3124 NULL, changes_offset + context->next_offset,
3127 SVN_ERR(svn_fs_fs__read_changes(changes,
3128 context->revision_file->stream,
3129 SVN_FS_FS__CHANGES_BLOCK_SIZE,
3130 result_pool, scratch_pool));
3132 /* Construct the info object for the entries block we just read. */
3133 changes_list = apr_pcalloc(scratch_pool, sizeof(*changes_list));
3134 SVN_ERR(svn_io_file_get_offset(&changes_list->end_offset,
3135 context->revision_file->file,
3137 changes_list->end_offset -= changes_offset;
3138 changes_list->start_offset = context->next_offset;
3139 changes_list->count = (*changes)->nelts;
3140 changes_list->changes = (change_t **)(*changes)->elts;
3141 changes_list->eol = changes_list->count < SVN_FS_FS__CHANGES_BLOCK_SIZE;
3143 /* cache for future reference */
3145 if (ffd->changes_cache)
3146 SVN_ERR(svn_cache__set(ffd->changes_cache, &key, changes_list,
3153 /* Return the block as a "proper" APR array. */
3154 (*changes) = apr_array_make(result_pool, 0, sizeof(void *));
3155 (*changes)->elts = (char *)changes_list->changes;
3156 (*changes)->nelts = changes_list->count;
3157 (*changes)->nalloc = changes_list->count;
3160 /* Where to look next - if there is more data. */
3161 context->next += (*changes)->nelts;
3162 context->next_offset = changes_list->end_offset;
3163 context->eol = changes_list->eol;
3165 /* Close the revision file after we read all data. */
3166 if (context->eol && context->revision_file)
3168 SVN_ERR(svn_fs_fs__close_revision_file(context->revision_file));
3169 context->revision_file = NULL;
3172 SVN_ERR(dbg_log_access(context->fs, context->revision, item_index, *changes,
3173 SVN_FS_FS__ITEM_TYPE_CHANGES, scratch_pool));
3175 return SVN_NO_ERROR;
3178 /* Inialize the representation read state RS for the given REP_HEADER and
3179 * p2l index ENTRY. If not NULL, assign FILE and STREAM to RS.
3180 * Use RESULT_POOL for allocations.
3182 static svn_error_t *
3183 init_rep_state(rep_state_t *rs,
3184 svn_fs_fs__rep_header_t *rep_header,
3186 svn_fs_fs__revision_file_t *file,
3187 svn_fs_fs__p2l_entry_t* entry,
3188 apr_pool_t *result_pool)
3190 fs_fs_data_t *ffd = fs->fsap_data;
3191 shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file));
3193 /* this function does not apply to representation containers */
3194 SVN_ERR_ASSERT(entry->type >= SVN_FS_FS__ITEM_TYPE_FILE_REP
3195 && entry->type <= SVN_FS_FS__ITEM_TYPE_DIR_PROPS);
3197 shared_file->rfile = file;
3198 shared_file->fs = fs;
3199 shared_file->revision = entry->item.revision;
3200 shared_file->pool = result_pool;
3202 rs->sfile = shared_file;
3203 rs->revision = entry->item.revision;
3204 rs->item_index = entry->item.number;
3205 rs->header_size = rep_header->header_size;
3206 rs->start = entry->offset + rs->header_size;
3207 rs->current = rep_header->type == svn_fs_fs__rep_plain ? 0 : 4;
3208 rs->size = entry->size - rep_header->header_size - 7;
3210 rs->chunk_index = 0;
3211 rs->raw_window_cache = ffd->raw_window_cache;
3212 rs->window_cache = ffd->txdelta_window_cache;
3213 rs->combined_cache = ffd->combined_window_cache;
3215 return SVN_NO_ERROR;
3218 /* Implement svn_cache__partial_getter_func_t for txdelta windows.
3219 * Instead of the whole window data, return only END_OFFSET member.
3221 static svn_error_t *
3222 get_txdelta_window_end(void **out,
3224 apr_size_t data_len,
3226 apr_pool_t *result_pool)
3228 const svn_fs_fs__txdelta_cached_window_t *window
3229 = (const svn_fs_fs__txdelta_cached_window_t *)data;
3230 *(apr_off_t*)out = window->end_offset;
3232 return SVN_NO_ERROR;
3235 /* Implement svn_cache__partial_getter_func_t for raw windows.
3236 * Instead of the whole window data, return only END_OFFSET member.
3238 static svn_error_t *
3239 get_raw_window_end(void **out,
3241 apr_size_t data_len,
3243 apr_pool_t *result_pool)
3245 const svn_fs_fs__raw_cached_window_t *window
3246 = (const svn_fs_fs__raw_cached_window_t *)data;
3247 *(apr_off_t*)out = window->end_offset;
3249 return SVN_NO_ERROR;
3252 /* Walk through all windows in the representation addressed by RS in FS
3253 * (excluding the delta bases) and put those not already cached into the
3254 * window caches. If MAX_OFFSET is not -1, don't read windows that start
3255 * at or beyond that offset. Use POOL for temporary allocations.
3257 * This function requires RS->RAW_WINDOW_CACHE and RS->WINDOW_CACHE to
3260 static svn_error_t *
3261 cache_windows(svn_fs_t *fs,
3263 apr_off_t max_offset,
3266 apr_pool_t *iterpool = svn_pool_create(pool);
3268 SVN_ERR(auto_read_diff_version(rs, iterpool));
3270 while (rs->current < rs->size)
3272 apr_off_t end_offset;
3273 svn_boolean_t found = FALSE;
3274 window_cache_key_t key = { 0 };
3276 svn_pool_clear(iterpool);
3278 if (max_offset != -1 && rs->start + rs->current >= max_offset)
3280 svn_pool_destroy(iterpool);
3281 return SVN_NO_ERROR;
3284 /* We don't need to read the data again if it is already in cache.
3285 * It might be cached as either raw or parsed window.
3287 SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
3288 rs->raw_window_cache,
3289 get_window_key(&key, rs),
3290 get_raw_window_end, NULL,
3293 SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
3294 rs->window_cache, &key,
3295 get_txdelta_window_end, NULL,
3300 rs->current = end_offset;
3304 /* Read, decode and cache the window. */
3305 svn_fs_fs__raw_cached_window_t window;
3306 apr_off_t start_offset = rs->start + rs->current;
3307 apr_size_t window_len;
3310 /* navigate to the current window */
3311 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
3312 SVN_ERR(svn_txdelta__read_raw_window_len(&window_len,
3313 rs->sfile->rfile->stream,
3316 /* Read the raw window. */
3317 buf = apr_palloc(iterpool, window_len + 1);
3318 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
3319 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
3320 window_len, NULL, NULL, iterpool));
3321 buf[window_len] = 0;
3323 /* update relative offset in representation */
3324 rs->current += window_len;
3326 /* Construct the cachable raw window object. */
3327 window.end_offset = rs->current;
3328 window.window.len = window_len;
3329 window.window.data = buf;
3330 window.ver = rs->ver;
3332 /* cache the window now */
3333 SVN_ERR(svn_cache__set(rs->raw_window_cache, &key, &window,
3337 if (rs->current > rs->size)
3338 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
3339 _("Reading one svndiff window read beyond "
3340 "the end of the representation"));
3345 svn_pool_destroy(iterpool);
3346 return SVN_NO_ERROR;
3349 /* Read all txdelta / plain windows following REP_HEADER in FS as described
3350 * by ENTRY. Read the data from the already open FILE and the wrapping
3351 * STREAM object. If MAX_OFFSET is not -1, don't read windows that start
3352 * at or beyond that offset. Use SCRATCH_POOL for temporary allocations.
3353 * If caching is not enabled, this is a no-op.
3355 static svn_error_t *
3356 block_read_windows(svn_fs_fs__rep_header_t *rep_header,
3358 svn_fs_fs__revision_file_t *rev_file,
3359 svn_fs_fs__p2l_entry_t* entry,
3360 apr_off_t max_offset,
3361 apr_pool_t *result_pool,
3362 apr_pool_t *scratch_pool)
3364 fs_fs_data_t *ffd = fs->fsap_data;
3365 rep_state_t rs = { 0 };
3367 window_cache_key_t key = { 0 };
3369 if ( (rep_header->type != svn_fs_fs__rep_plain
3370 && (!ffd->txdelta_window_cache || !ffd->raw_window_cache))
3371 || (rep_header->type == svn_fs_fs__rep_plain
3372 && !ffd->combined_window_cache))
3373 return SVN_NO_ERROR;
3375 SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry,
3378 /* RS->FILE may be shared between RS instances -> make sure we point
3379 * to the right data. */
3380 offset = rs.start + rs.current;
3381 if (rep_header->type == svn_fs_fs__rep_plain)
3383 svn_stringbuf_t *plaintext;
3384 svn_boolean_t is_cached;
3386 /* already in cache? */
3387 SVN_ERR(svn_cache__has_key(&is_cached, rs.combined_cache,
3388 get_window_key(&key, &rs),
3391 return SVN_NO_ERROR;
3393 /* for larger reps, the header may have crossed a block boundary.
3394 * make sure we still read blocks properly aligned, i.e. don't use
3395 * plain seek here. */
3396 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, scratch_pool));
3398 plaintext = svn_stringbuf_create_ensure(rs.size, result_pool);
3399 SVN_ERR(svn_io_file_read_full2(rev_file->file, plaintext->data,
3400 rs.size, &plaintext->len, NULL,
3402 plaintext->data[plaintext->len] = 0;
3403 rs.current += rs.size;
3405 SVN_ERR(set_cached_combined_window(plaintext, &rs, scratch_pool));
3409 SVN_ERR(cache_windows(fs, &rs, max_offset, scratch_pool));
3412 return SVN_NO_ERROR;
3415 /* Try to get the representation header identified by KEY from FS's cache.
3416 * If it has not been cached, read it from the current position in STREAM
3417 * and put it into the cache (if caching has been enabled for rep headers).
3418 * Return the result in *REP_HEADER. Use POOL for allocations.
3420 static svn_error_t *
3421 read_rep_header(svn_fs_fs__rep_header_t **rep_header,
3423 svn_stream_t *stream,
3424 pair_cache_key_t *key,
3425 apr_pool_t *result_pool,
3426 apr_pool_t *scratch_pool)
3428 fs_fs_data_t *ffd = fs->fsap_data;
3429 svn_boolean_t is_cached = FALSE;
3431 if (ffd->rep_header_cache)
3433 SVN_ERR(svn_cache__get((void**)rep_header, &is_cached,
3434 ffd->rep_header_cache, key,
3437 return SVN_NO_ERROR;
3440 SVN_ERR(svn_fs_fs__read_rep_header(rep_header, stream, result_pool,
3443 if (ffd->rep_header_cache)
3444 SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header,
3447 return SVN_NO_ERROR;
3450 /* Fetch the representation data (header, txdelta / plain windows)
3451 * addressed by ENTRY->ITEM in FS and cache it if caches are enabled.
3452 * Read the data from REV_FILE. If MAX_OFFSET is not -1, don't read
3453 * windows that start at or beyond that offset.
3454 * Use SCRATCH_POOL for temporary allocations.
3456 static svn_error_t *
3457 block_read_contents(svn_fs_t *fs,
3458 svn_fs_fs__revision_file_t *rev_file,
3459 svn_fs_fs__p2l_entry_t* entry,
3460 apr_off_t max_offset,
3461 apr_pool_t *scratch_pool)
3463 pair_cache_key_t header_key = { 0 };
3464 svn_fs_fs__rep_header_t *rep_header;
3466 header_key.revision = (apr_int32_t)entry->item.revision;
3467 header_key.second = entry->item.number;
3469 SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &header_key,
3470 scratch_pool, scratch_pool));
3471 SVN_ERR(block_read_windows(rep_header, fs, rev_file, entry, max_offset,
3472 scratch_pool, scratch_pool));
3474 return SVN_NO_ERROR;
3477 /* For the given REV_FILE in FS, in *STREAM return a stream covering the
3478 * item specified by ENTRY. Also, verify the item's content by low-level
3479 * checksum. Allocate the result in POOL.
3481 static svn_error_t *
3482 read_item(svn_stream_t **stream,
3484 svn_fs_fs__revision_file_t *rev_file,
3485 svn_fs_fs__p2l_entry_t* entry,
3488 apr_uint32_t digest;
3489 svn_checksum_t *expected, *actual;
3490 apr_uint32_t plain_digest;
3492 /* Read item into string buffer. */
3493 svn_stringbuf_t *text = svn_stringbuf_create_ensure(entry->size, pool);
3494 text->len = entry->size;
3495 text->data[text->len] = 0;
3496 SVN_ERR(svn_io_file_read_full2(rev_file->file, text->data, text->len,
3499 /* Return (construct, calculate) stream and checksum. */
3500 *stream = svn_stream_from_stringbuf(text, pool);
3501 digest = svn__fnv1a_32x4(text->data, text->len);
3503 /* Checksums will match most of the time. */
3504 if (entry->fnv1_checksum == digest)
3505 return SVN_NO_ERROR;
3507 /* Construct proper checksum objects from their digests to allow for
3508 * nice error messages. */
3509 plain_digest = htonl(entry->fnv1_checksum);
3510 expected = svn_checksum__from_digest_fnv1a_32x4(
3511 (const unsigned char *)&plain_digest, pool);
3512 plain_digest = htonl(digest);
3513 actual = svn_checksum__from_digest_fnv1a_32x4(
3514 (const unsigned char *)&plain_digest, pool);
3516 /* Construct the full error message with all the info we have. */
3517 return svn_checksum_mismatch_err(expected, actual, pool,
3518 _("Low-level checksum mismatch while reading\n"
3519 "%s bytes of meta data at offset %s "
3520 "for item %s in revision %ld"),
3521 apr_off_t_toa(pool, entry->size),
3522 apr_off_t_toa(pool, entry->offset),
3523 apr_psprintf(pool, "%" APR_UINT64_T_FMT, entry->item.number),
3524 entry->item.revision);
3527 /* If not already cached, read the changed paths list addressed by ENTRY in
3528 * FS and cache it if it has no more than SVN_FS_FS__CHANGES_BLOCK_SIZE
3529 * entries and caching is enabled. Read the data from REV_FILE.
3530 * Allocate temporaries in SCRATCH_POOL.
3532 static svn_error_t *
3533 block_read_changes(svn_fs_t *fs,
3534 svn_fs_fs__revision_file_t *rev_file,
3535 svn_fs_fs__p2l_entry_t *entry,
3536 apr_pool_t *scratch_pool)
3538 fs_fs_data_t *ffd = fs->fsap_data;
3539 svn_stream_t *stream;
3540 apr_array_header_t *changes;
3542 pair_cache_key_t key;
3543 key.revision = entry->item.revision;
3546 if (!ffd->changes_cache)
3547 return SVN_NO_ERROR;
3549 /* already in cache? */
3550 if (ffd->changes_cache)
3552 svn_boolean_t is_cached;
3553 SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, &key,
3556 return SVN_NO_ERROR;
3559 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3561 /* Read changes from revision file. But read just past the first block to
3562 enable us to determine whether the first block already hit the EOL.
3564 Note: A 100 entries block is already > 10kB on disk. With a 4kB default
3565 disk block size, this function won't even be called for larger
3566 changed paths lists. */
3567 SVN_ERR(svn_fs_fs__read_changes(&changes, stream,
3568 SVN_FS_FS__CHANGES_BLOCK_SIZE + 1,
3569 scratch_pool, scratch_pool));
3571 /* We can only cache small lists that don't need to be split up.
3572 For longer lists, we miss the file offset info for the respective */
3573 if (changes->nelts <= SVN_FS_FS__CHANGES_BLOCK_SIZE)
3575 svn_fs_fs__changes_list_t changes_list;
3577 /* Construct the info object for the entries block we just read. */
3578 changes_list.end_offset = entry->size;
3579 changes_list.start_offset = 0;
3580 changes_list.count = changes->nelts;
3581 changes_list.changes = (change_t **)changes->elts;
3582 changes_list.eol = TRUE;
3584 SVN_ERR(svn_cache__set(ffd->changes_cache, &key, &changes_list,
3588 return SVN_NO_ERROR;
3591 /* If not already cached or if MUST_READ is set, read the node revision
3592 * addressed by ENTRY in FS and retúrn it in *NODEREV_P. Cache the
3593 * result if caching is enabled. Read the data from REV_FILE. Allocate
3594 * *NODEREV_P in RESUSLT_POOL and allocate temporaries in SCRATCH_POOL.
3596 static svn_error_t *
3597 block_read_noderev(node_revision_t **noderev_p,
3599 svn_fs_fs__revision_file_t *rev_file,
3600 svn_fs_fs__p2l_entry_t *entry,
3601 svn_boolean_t must_read,
3602 apr_pool_t *result_pool,
3603 apr_pool_t *scratch_pool)
3605 fs_fs_data_t *ffd = fs->fsap_data;
3606 svn_stream_t *stream;
3608 pair_cache_key_t key = { 0 };
3609 key.revision = entry->item.revision;
3610 key.second = entry->item.number;
3612 if (!must_read && !ffd->node_revision_cache)
3613 return SVN_NO_ERROR;
3615 /* already in cache? */
3616 if (!must_read && ffd->node_revision_cache)
3618 svn_boolean_t is_cached;
3619 SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache,
3620 &key, scratch_pool));
3622 return SVN_NO_ERROR;
3625 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3627 /* read node rev from revision file */
3628 SVN_ERR(svn_fs_fs__read_noderev(noderev_p, stream,
3629 result_pool, scratch_pool));
3630 SVN_ERR(fixup_node_revision(fs, *noderev_p, scratch_pool));
3632 if (ffd->node_revision_cache)
3633 SVN_ERR(svn_cache__set(ffd->node_revision_cache, &key, *noderev_p,
3636 return SVN_NO_ERROR;
3639 /* Read the whole (e.g. 64kB) block containing ITEM_INDEX of REVISION in FS
3640 * and put all data into cache. If necessary and depending on heuristics,
3641 * neighboring blocks may also get read. The data is being read from
3642 * already open REVISION_FILE, which must be the correct rev / pack file
3645 * For noderevs and changed path lists, the item fetched can be allocated
3646 * RESULT_POOL and returned in *RESULT. Otherwise, RESULT must be NULL.
3648 static svn_error_t *
3649 block_read(void **result,
3651 svn_revnum_t revision,
3652 apr_uint64_t item_index,
3653 svn_fs_fs__revision_file_t *revision_file,
3654 apr_pool_t *result_pool,
3655 apr_pool_t *scratch_pool)
3657 fs_fs_data_t *ffd = fs->fsap_data;
3658 apr_off_t offset, wanted_offset = 0;
3659 apr_off_t block_start = 0;
3660 apr_array_header_t *entries;
3663 apr_pool_t *iterpool;
3665 /* Block read is an optional feature. If the caller does not want anything
3666 * specific we may not have to read anything. */
3668 return SVN_NO_ERROR;
3670 iterpool = svn_pool_create(scratch_pool);
3672 /* don't try this on transaction protorev files */
3673 SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision));
3675 /* index lookup: find the OFFSET of the item we *must* read plus (in the
3676 * "do-while" block) the list of items in the same block. */
3677 SVN_ERR(svn_fs_fs__item_offset(&wanted_offset, fs, revision_file,
3678 revision, NULL, item_index, iterpool));
3680 offset = wanted_offset;
3684 * Read this block. If the last item crosses the block boundary, read
3685 * the next block but stop there. Because cross-boundary items cause
3686 * blocks to be read twice, this heuristics will limit this effect to
3687 * approx. 50% of blocks, probably less, while providing a sensible
3688 * amount of read-ahead.
3692 /* fetch list of items in the block surrounding OFFSET */
3693 block_start = offset - (offset % ffd->block_size);
3694 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, revision_file,
3695 revision, block_start,
3696 ffd->block_size, scratch_pool,
3699 SVN_ERR(aligned_seek(fs, revision_file->file, &block_start, offset,
3702 /* read all items from the block */
3703 for (i = 0; i < entries->nelts; ++i)
3705 svn_boolean_t is_result, is_wanted;
3707 svn_fs_fs__p2l_entry_t* entry;
3709 svn_pool_clear(iterpool);
3711 /* skip empty sections */
3712 entry = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
3713 if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
3716 /* the item / container we were looking for? */
3717 is_wanted = entry->offset == wanted_offset
3718 && entry->item.revision == revision
3719 && entry->item.number == item_index;
3720 is_result = result && is_wanted;
3722 /* select the pool that we want the item to be allocated in */
3723 pool = is_result ? result_pool : iterpool;
3725 /* handle all items that start within this block and are relatively
3726 * small (i.e. < block size). Always read the item we need to return.
3728 if (is_result || ( entry->offset >= block_start
3729 && entry->size < ffd->block_size))
3732 SVN_ERR(svn_io_file_seek(revision_file->file, APR_SET,
3733 &entry->offset, iterpool));
3734 switch (entry->type)
3736 case SVN_FS_FS__ITEM_TYPE_FILE_REP:
3737 case SVN_FS_FS__ITEM_TYPE_DIR_REP:
3738 case SVN_FS_FS__ITEM_TYPE_FILE_PROPS:
3739 case SVN_FS_FS__ITEM_TYPE_DIR_PROPS:
3740 SVN_ERR(block_read_contents(fs, revision_file, entry,
3743 : block_start + ffd->block_size,
3747 case SVN_FS_FS__ITEM_TYPE_NODEREV:
3748 if (ffd->node_revision_cache || is_result)
3749 SVN_ERR(block_read_noderev((node_revision_t **)&item,
3751 entry, is_result, pool,
3755 case SVN_FS_FS__ITEM_TYPE_CHANGES:
3756 SVN_ERR(block_read_changes(fs, revision_file,
3767 /* if we crossed a block boundary, read the remainder of
3768 * the last block as well */
3769 offset = entry->offset + entry->size;
3770 if (offset - block_start > ffd->block_size)
3776 while(run_count++ == 1); /* can only be true once and only if a block
3777 * boundary got crossed */
3779 /* if the caller requested a result, we must have provided one by now */
3780 assert(!result || *result);
3781 svn_pool_destroy(iterpool);
3783 return SVN_NO_ERROR;