1 /* stats.c -- implements the svn_fs_fs__get_stats private API.
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
23 #include "svn_dirent_uri.h"
25 #include "svn_pools.h"
26 #include "svn_sorts.h"
28 #include "private/svn_cache.h"
29 #include "private/svn_sorts_private.h"
30 #include "private/svn_string_private.h"
31 #include "private/svn_fs_fs_private.h"
38 #include "cached_data.h"
39 #include "low_level.h"
41 #include "../libsvn_fs/fs-loader.h"
43 #include "svn_private_config.h"
45 /* We group representations into 2x2 different kinds plus one default:
46 * [dir / file] x [text / prop]. The assignment is done by the first node
47 * that references the respective representation.
49 typedef enum rep_kind_t
51 /* The representation is not used _directly_, i.e. not referenced by any
52 * noderev. However, some other representation may use it as delta base.
53 * Null value. Should not occur in real-word repositories. */
56 /* a properties on directory rep */
59 /* a properties on file rep */
69 /* A representation fragment.
71 typedef struct rep_stats_t
73 /* absolute offset in the file */
76 /* item length in bytes */
79 /* item length after de-deltification */
80 apr_uint64_t expanded_size;
82 /* revision that contains this representation
83 * (may be referenced by other revisions, though) */
84 svn_revnum_t revision;
86 /* number of nodes that reference this representation */
87 apr_uint32_t ref_count;
89 /* length of the PLAIN / DELTA line in the source file in bytes */
90 apr_uint16_t header_size;
92 /* classification of the representation. values of rep_kind_t */
97 /* Represents a single revision.
98 * There will be only one instance per revision. */
99 typedef struct revision_info_t
101 /* number of this revision */
102 svn_revnum_t revision;
104 /* pack file offset (manifest value), 0 for non-packed files */
107 /* length of the changes list on bytes */
108 apr_uint64_t changes_len;
110 /* offset of the changes list relative to OFFSET */
111 apr_uint64_t change_count;
113 /* first offset behind the revision data in the pack file (file length
114 * for non-packed revs) */
117 /* number of directory noderevs in this revision */
118 apr_uint64_t dir_noderev_count;
120 /* number of file noderevs in this revision */
121 apr_uint64_t file_noderev_count;
123 /* total size of directory noderevs (i.e. the structs - not the rep) */
124 apr_uint64_t dir_noderev_size;
126 /* total size of file noderevs (i.e. the structs - not the rep) */
127 apr_uint64_t file_noderev_size;
129 /* all rep_stats_t of this revision (in no particular order),
130 * i.e. those that point back to this struct */
131 apr_array_header_t *representations;
133 /* Temporary rev / pack file access object, used in phys. addressing
134 * mode only. NULL when done reading this revision. */
135 svn_fs_fs__revision_file_t *rev_file;
138 /* Root data structure containing all information about a given repository.
139 * We use it as a wrapper around svn_fs_t and pass it around where we would
140 * otherwise just use a svn_fs_t.
142 typedef struct query_t
147 /* The HEAD revision. */
150 /* Number of revs per shard; 0 for non-sharded repos. */
153 /* First non-packed revision. */
154 svn_revnum_t min_unpacked_rev;
157 apr_array_header_t *revisions;
159 /* empty representation.
160 * Used as a dummy base for DELTA reps without base. */
161 rep_stats_t *null_base;
163 /* collected statistics */
164 svn_fs_fs__stats_t *stats;
166 /* Progress notification callback to call after each shard. May be NULL. */
167 svn_fs_progress_notify_func_t progress_func;
169 /* Baton for PROGRESS_FUNC. */
170 void *progress_baton;
172 /* Cancellation support callback to call once in a while. May be NULL. */
173 svn_cancel_func_t cancel_func;
175 /* Baton for CANCEL_FUNC. */
179 /* Return the length of REV_FILE in *FILE_SIZE.
180 * Use SCRATCH_POOL for temporary allocations.
183 get_file_size(apr_off_t *file_size,
184 svn_fs_fs__revision_file_t *rev_file,
185 apr_pool_t *scratch_pool)
189 SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, rev_file->file,
192 *file_size = finfo.size;
196 /* Initialize the LARGEST_CHANGES member in STATS with a capacity of COUNT
197 * entries. Allocate the result in RESULT_POOL.
200 initialize_largest_changes(svn_fs_fs__stats_t *stats,
202 apr_pool_t *result_pool)
206 stats->largest_changes = apr_pcalloc(result_pool,
207 sizeof(*stats->largest_changes));
208 stats->largest_changes->count = count;
209 stats->largest_changes->min_size = 1;
210 stats->largest_changes->changes
211 = apr_palloc(result_pool, count * sizeof(*stats->largest_changes->changes));
213 /* allocate *all* entries before the path stringbufs. This increases
214 * cache locality and enhances performance significantly. */
215 for (i = 0; i < count; ++i)
216 stats->largest_changes->changes[i]
217 = apr_palloc(result_pool, sizeof(**stats->largest_changes->changes));
219 /* now initialize them and allocate the stringbufs */
220 for (i = 0; i < count; ++i)
222 stats->largest_changes->changes[i]->size = 0;
223 stats->largest_changes->changes[i]->revision = SVN_INVALID_REVNUM;
224 stats->largest_changes->changes[i]->path
225 = svn_stringbuf_create_ensure(1024, result_pool);
229 /* Add entry for SIZE to HISTOGRAM.
232 add_to_histogram(svn_fs_fs__histogram_t *histogram,
235 apr_int64_t shift = 0;
237 while (((apr_int64_t)(1) << shift) <= size)
240 histogram->total.count++;
241 histogram->total.sum += size;
242 histogram->lines[(apr_size_t)shift].count++;
243 histogram->lines[(apr_size_t)shift].sum += size;
246 /* Update data aggregators in STATS with this representation of type KIND,
247 * on-disk REP_SIZE and expanded node size EXPANDED_SIZE for PATH in REVSION.
248 * PLAIN_ADDED indicates whether the node has a deltification predecessor.
251 add_change(svn_fs_fs__stats_t *stats,
252 apr_uint64_t rep_size,
253 apr_uint64_t expanded_size,
254 svn_revnum_t revision,
257 svn_boolean_t plain_added)
259 /* identify largest reps */
260 if (rep_size >= stats->largest_changes->min_size)
263 svn_fs_fs__largest_changes_t *largest_changes = stats->largest_changes;
264 svn_fs_fs__large_change_info_t *info
265 = largest_changes->changes[largest_changes->count - 1];
266 info->size = rep_size;
267 info->revision = revision;
268 svn_stringbuf_set(info->path, path);
270 /* linear insertion but not too bad since count is low and insertions
271 * near the end are more likely than close to front */
272 for (i = largest_changes->count - 1; i > 0; --i)
273 if (largest_changes->changes[i-1]->size >= rep_size)
276 largest_changes->changes[i] = largest_changes->changes[i-1];
278 largest_changes->changes[i] = info;
279 largest_changes->min_size
280 = largest_changes->changes[largest_changes->count-1]->size;
283 /* global histograms */
284 add_to_histogram(&stats->rep_size_histogram, rep_size);
285 add_to_histogram(&stats->node_size_histogram, expanded_size);
289 add_to_histogram(&stats->added_rep_size_histogram, rep_size);
290 add_to_histogram(&stats->added_node_size_histogram, expanded_size);
293 /* specific histograms by type */
297 add_to_histogram(&stats->unused_rep_histogram, rep_size);
299 case dir_property_rep:
300 add_to_histogram(&stats->dir_prop_rep_histogram, rep_size);
301 add_to_histogram(&stats->dir_prop_histogram, expanded_size);
303 case file_property_rep:
304 add_to_histogram(&stats->file_prop_rep_histogram, rep_size);
305 add_to_histogram(&stats->file_prop_histogram, expanded_size);
308 add_to_histogram(&stats->dir_rep_histogram, rep_size);
309 add_to_histogram(&stats->dir_histogram, expanded_size);
312 add_to_histogram(&stats->file_rep_histogram, rep_size);
313 add_to_histogram(&stats->file_histogram, expanded_size);
318 if (kind == file_rep)
320 /* determine extension */
321 svn_fs_fs__extension_info_t *info;
322 const char * file_name = strrchr(path, '/');
323 const char * extension = file_name ? strrchr(file_name, '.') : NULL;
325 if (extension == NULL || extension == file_name + 1)
326 extension = "(none)";
328 /* get / auto-insert entry for this extension */
329 info = apr_hash_get(stats->by_extension, extension, APR_HASH_KEY_STRING);
332 apr_pool_t *pool = apr_hash_pool_get(stats->by_extension);
333 info = apr_pcalloc(pool, sizeof(*info));
334 info->extension = apr_pstrdup(pool, extension);
336 apr_hash_set(stats->by_extension, info->extension,
337 APR_HASH_KEY_STRING, info);
340 /* update per-extension histogram */
341 add_to_histogram(&info->node_histogram, expanded_size);
342 add_to_histogram(&info->rep_histogram, rep_size);
346 /* Comparator used for binary search comparing the absolute file offset
347 * of a representation to some other offset. DATA is a *rep_stats_t,
348 * KEY is a pointer to an apr_off_t.
351 compare_representation_offsets(const void *data, const void *key)
353 apr_off_t lhs = (*(const rep_stats_t *const *)data)->offset;
354 apr_off_t rhs = *(const apr_off_t *)key;
358 return (lhs > rhs ? 1 : 0);
361 /* Find the revision_info_t object to the given REVISION in QUERY and
362 * return it in *REVISION_INFO. For performance reasons, we skip the
363 * lookup if the info is already provided.
365 * In that revision, look for the rep_stats_t object for offset OFFSET.
366 * If it already exists, set *IDX to its index in *REVISION_INFO's
367 * representations list and return the representation object. Otherwise,
368 * set the index to where it must be inserted and return NULL.
371 find_representation(int *idx,
373 revision_info_t **revision_info,
374 svn_revnum_t revision,
377 revision_info_t *info;
380 /* first let's find the revision */
381 info = revision_info ? *revision_info : NULL;
382 if (info == NULL || info->revision != revision)
384 info = APR_ARRAY_IDX(query->revisions, revision, revision_info_t*);
386 *revision_info = info;
389 /* not found -> no result */
393 /* look for the representation */
394 *idx = svn_sort__bsearch_lower_bound(info->representations,
396 compare_representation_offsets);
397 if (*idx < info->representations->nelts)
399 /* return the representation, if this is the one we were looking for */
401 = APR_ARRAY_IDX(info->representations, *idx, rep_stats_t *);
402 if (result->offset == offset)
406 /* not parsed, yet */
410 /* Find / auto-construct the representation stats for REP in QUERY and
411 * return it in *REPRESENTATION.
413 * If necessary, allocate the result in RESULT_POOL; use SCRATCH_POOL for
414 * temporary allocations.
417 parse_representation(rep_stats_t **representation,
419 representation_t *rep,
420 revision_info_t *revision_info,
421 apr_pool_t *result_pool,
422 apr_pool_t *scratch_pool)
427 /* read location (revision, offset) and size */
430 result = find_representation(&idx, query, &revision_info, rep->revision,
431 (apr_off_t)rep->item_index);
434 /* not parsed, yet (probably a rep in the same revision).
435 * Create a new rep object and determine its base rep as well.
437 result = apr_pcalloc(result_pool, sizeof(*result));
438 result->revision = rep->revision;
439 result->expanded_size = (rep->expanded_size ? rep->expanded_size
441 result->offset = (apr_off_t)rep->item_index;
442 result->size = rep->size;
444 /* In phys. addressing mode, follow link to the actual representation.
445 * In log. addressing mode, we will find it already as part of our
446 * linear walk through the whole file. */
447 if (!svn_fs_fs__use_log_addressing(query->fs))
449 svn_fs_fs__rep_header_t *header;
450 apr_off_t offset = revision_info->offset + result->offset;
452 SVN_ERR_ASSERT(revision_info->rev_file);
453 SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET,
454 &offset, scratch_pool));
455 SVN_ERR(svn_fs_fs__read_rep_header(&header,
456 revision_info->rev_file->stream,
457 scratch_pool, scratch_pool));
459 result->header_size = header->header_size;
462 svn_sort__array_insert(revision_info->representations, &result, idx);
465 *representation = result;
471 /* forward declaration */
473 read_noderev(query_t *query,
474 svn_stringbuf_t *noderev_str,
475 revision_info_t *revision_info,
476 apr_pool_t *result_pool,
477 apr_pool_t *scratch_pool);
479 /* Read the noderev item at OFFSET in REVISION_INFO from the filesystem
480 * provided by QUERY. Return it in *NODEREV, allocated in RESULT_POOL.
481 * Use SCRATCH_POOL for temporary allocations.
483 * The textual representation of the noderev will be used to determine
484 * the on-disk size of the noderev. Only called in phys. addressing mode.
487 read_phsy_noderev(svn_stringbuf_t **noderev,
490 revision_info_t *revision_info,
491 apr_pool_t *result_pool,
492 apr_pool_t *scratch_pool)
494 svn_stringbuf_t *noderev_str = svn_stringbuf_create_empty(result_pool);
495 svn_stringbuf_t *line;
498 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
500 /* Navigate the file stream to the start of noderev. */
501 SVN_ERR_ASSERT(revision_info->rev_file);
503 offset += revision_info->offset;
504 SVN_ERR(svn_io_file_seek(revision_info->rev_file->file, APR_SET,
505 &offset, scratch_pool));
507 /* Read it (terminated by an empty line) */
510 svn_pool_clear(iterpool);
512 SVN_ERR(svn_stream_readline(revision_info->rev_file->stream, &line,
513 "\n", &eof, iterpool));
514 svn_stringbuf_appendstr(noderev_str, line);
515 svn_stringbuf_appendbyte(noderev_str, '\n');
517 while (line->len > 0 && !eof);
519 /* Return the result. */
520 *noderev = noderev_str;
522 svn_pool_destroy(iterpool);
527 /* Starting at the directory in NODEREV's text, read all DAG nodes,
528 * directories and representations linked in that tree structure.
529 * Store them in QUERY and REVISION_INFO. Also, read them only once.
531 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
535 parse_dir(query_t *query,
536 node_revision_t *noderev,
537 revision_info_t *revision_info,
538 apr_pool_t *result_pool,
539 apr_pool_t *scratch_pool)
541 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
544 apr_array_header_t *entries;
545 SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, query->fs, noderev,
546 scratch_pool, scratch_pool));
548 for (i = 0; i < entries->nelts; ++i)
550 svn_fs_dirent_t *dirent = APR_ARRAY_IDX(entries, i, svn_fs_dirent_t *);
552 if (svn_fs_fs__id_rev(dirent->id) == revision_info->revision)
554 svn_stringbuf_t *noderev_str;
555 svn_pool_clear(iterpool);
557 SVN_ERR(read_phsy_noderev(&noderev_str, query,
558 svn_fs_fs__id_item(dirent->id),
559 revision_info, iterpool, iterpool));
560 SVN_ERR(read_noderev(query, noderev_str, revision_info,
561 result_pool, iterpool));
565 svn_pool_destroy(iterpool);
570 /* Parse the noderev given as NODEREV_STR and store the info in QUERY and
571 * REVISION_INFO. In phys. addressing mode, continue reading all DAG nodes,
572 * directories and representations linked in that tree structure.
574 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
578 read_noderev(query_t *query,
579 svn_stringbuf_t *noderev_str,
580 revision_info_t *revision_info,
581 apr_pool_t *result_pool,
582 apr_pool_t *scratch_pool)
584 rep_stats_t *text = NULL;
585 rep_stats_t *props = NULL;
586 node_revision_t *noderev;
588 svn_stream_t *stream = svn_stream_from_stringbuf(noderev_str, scratch_pool);
589 SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, scratch_pool,
592 if (noderev->data_rep)
594 SVN_ERR(parse_representation(&text, query,
595 noderev->data_rep, revision_info,
596 result_pool, scratch_pool));
598 /* if we are the first to use this rep, mark it as "text rep" */
599 if (++text->ref_count == 1)
600 text->kind = noderev->kind == svn_node_dir ? dir_rep : file_rep;
603 if (noderev->prop_rep)
605 SVN_ERR(parse_representation(&props, query,
606 noderev->prop_rep, revision_info,
607 result_pool, scratch_pool));
609 /* if we are the first to use this rep, mark it as "prop rep" */
610 if (++props->ref_count == 1)
611 props->kind = noderev->kind == svn_node_dir ? dir_property_rep
615 /* record largest changes */
616 if (text && text->ref_count == 1)
617 add_change(query->stats, text->size, text->expanded_size, text->revision,
618 noderev->created_path, text->kind, !noderev->predecessor_id);
619 if (props && props->ref_count == 1)
620 add_change(query->stats, props->size, props->expanded_size,
621 props->revision, noderev->created_path, props->kind,
622 !noderev->predecessor_id);
624 /* if this is a directory and has not been processed, yet, read and
625 * process it recursively */
626 if ( noderev->kind == svn_node_dir && text && text->ref_count == 1
627 && !svn_fs_fs__use_log_addressing(query->fs))
628 SVN_ERR(parse_dir(query, noderev, revision_info, result_pool,
632 if (noderev->kind == svn_node_dir)
634 revision_info->dir_noderev_size += noderev_str->len;
635 revision_info->dir_noderev_count++;
639 revision_info->file_noderev_size += noderev_str->len;
640 revision_info->file_noderev_count++;
646 /* For the revision given as REVISION_INFO within QUERY, determine the number
647 * of entries in its changed paths list and store that info in REVISION_INFO.
648 * Use SCRATCH_POOL for temporary allocations.
651 get_phys_change_count(query_t *query,
652 revision_info_t *revision_info,
653 apr_pool_t *scratch_pool)
655 /* We are going to use our own sub-pool here because the changes object
656 * may well be >100MB and SCRATCH_POOL may not get cleared until all other
657 * info has been read by read_phys_revision(). Therefore, tidy up early.
659 apr_pool_t *subpool = svn_pool_create(scratch_pool);
660 apr_array_header_t *changes;
662 SVN_ERR(svn_fs_fs__get_changes(&changes, query->fs,
663 revision_info->revision, subpool));
664 revision_info->change_count = changes->nelts;
666 /* Release potentially tons of memory. */
667 svn_pool_destroy(subpool);
672 /* Read header information for the revision stored in FILE_CONTENT (one
673 * whole revision). Return the offsets within FILE_CONTENT for the
674 * *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN.
675 * Use POOL for temporary allocations. */
677 read_phys_revision(query_t *query,
678 revision_info_t *info,
679 apr_pool_t *result_pool,
680 apr_pool_t *scratch_pool)
683 apr_off_t root_node_offset;
684 apr_off_t changes_offset;
685 svn_stringbuf_t *trailer;
686 svn_stringbuf_t *noderev_str;
688 /* Read the last 64 bytes of the revision (if long enough). */
689 apr_off_t start = MAX(info->offset, info->end - sizeof(buf));
690 apr_size_t len = (apr_size_t)(info->end - start);
691 SVN_ERR(svn_io_file_seek(info->rev_file->file, APR_SET, &start,
693 SVN_ERR(svn_io_file_read_full2(info->rev_file->file, buf, len, NULL, NULL,
695 trailer = svn_stringbuf_ncreate(buf, len, scratch_pool);
697 /* Parse that trailer. */
698 SVN_ERR(svn_fs_fs__parse_revision_trailer(&root_node_offset,
699 &changes_offset, trailer,
701 SVN_ERR(get_phys_change_count(query, info, scratch_pool));
703 /* Calculate the length of the changes list. */
704 trailer = svn_fs_fs__unparse_revision_trailer(root_node_offset,
707 info->changes_len = info->end - info->offset - changes_offset
710 /* Recursively read nodes added in this rev. */
711 SVN_ERR(read_phsy_noderev(&noderev_str, query, root_node_offset, info,
712 scratch_pool, scratch_pool));
713 SVN_ERR(read_noderev(query, noderev_str, info, result_pool, scratch_pool));
718 /* Read the content of the pack file staring at revision BASE physical
719 * addressing mode and store it in QUERY.
721 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
725 read_phys_pack_file(query_t *query,
727 apr_pool_t *result_pool,
728 apr_pool_t *scratch_pool)
730 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
732 apr_off_t file_size = 0;
733 svn_fs_fs__revision_file_t *rev_file;
735 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base,
736 scratch_pool, scratch_pool));
737 SVN_ERR(get_file_size(&file_size, rev_file, scratch_pool));
739 /* process each revision in the pack file */
740 for (i = 0; i < query->shard_size; ++i)
742 revision_info_t *info;
744 /* cancellation support */
745 if (query->cancel_func)
746 SVN_ERR(query->cancel_func(query->cancel_baton));
748 /* create the revision info for the current rev */
749 info = apr_pcalloc(result_pool, sizeof(*info));
750 info->representations = apr_array_make(result_pool, 4,
751 sizeof(rep_stats_t*));
752 info->rev_file = rev_file;
754 info->revision = base + i;
755 SVN_ERR(svn_fs_fs__get_packed_offset(&info->offset, query->fs, base + i,
757 if (i + 1 == query->shard_size)
758 info->end = file_size;
760 SVN_ERR(svn_fs_fs__get_packed_offset(&info->end, query->fs,
761 base + i + 1, iterpool));
763 SVN_ERR(read_phys_revision(query, info, result_pool, iterpool));
765 info->representations = apr_array_copy(result_pool,
766 info->representations);
768 /* Done with this revision. */
769 info->rev_file = NULL;
771 /* put it into our container */
772 APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info;
775 svn_pool_clear(iterpool);
778 /* Done with this pack file. */
779 SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
781 /* one more pack file processed */
782 if (query->progress_func)
783 query->progress_func(base, query->progress_baton, scratch_pool);
788 /* Read the content of the file for REVISION in physical addressing mode
789 * and store its contents in QUERY.
791 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
795 read_phys_revision_file(query_t *query,
796 svn_revnum_t revision,
797 apr_pool_t *result_pool,
798 apr_pool_t *scratch_pool)
800 revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info));
801 apr_off_t file_size = 0;
802 svn_fs_fs__revision_file_t *rev_file;
804 /* cancellation support */
805 if (query->cancel_func)
806 SVN_ERR(query->cancel_func(query->cancel_baton));
808 /* read the whole pack file into memory */
809 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, revision,
810 scratch_pool, scratch_pool));
811 SVN_ERR(get_file_size(&file_size, rev_file, scratch_pool));
813 /* create the revision info for the current rev */
814 info->representations = apr_array_make(result_pool, 4, sizeof(rep_stats_t*));
816 info->rev_file = rev_file;
817 info->revision = revision;
819 info->end = file_size;
821 SVN_ERR(read_phys_revision(query, info, result_pool, scratch_pool));
823 /* Done with this revision. */
824 SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
825 info->rev_file = NULL;
827 /* put it into our container */
828 APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info;
830 /* show progress every 1000 revs or so */
831 if (query->progress_func)
833 if (query->shard_size && (revision % query->shard_size == 0))
834 query->progress_func(revision, query->progress_baton, scratch_pool);
835 if (!query->shard_size && (revision % 1000 == 0))
836 query->progress_func(revision, query->progress_baton, scratch_pool);
842 /* Given the unparsed changes list in CHANGES with LEN chars, return the
843 * number of changed paths encoded in it. Only used in log. addressing
847 get_log_change_count(const char *changes,
850 apr_size_t lines = 0;
851 const char *end = changes + len;
854 for (; changes < end; ++changes)
855 if (*changes == '\n')
858 /* two lines per change */
862 /* Read the item described by ENTRY from the REV_FILE and return the
863 * respective byte sequence in *CONTENTS, allocated in RESULT_POOL.
864 * Use SCRATCH_POOL for temporary allocations
867 read_item(svn_stringbuf_t **contents,
868 svn_fs_fs__revision_file_t *rev_file,
869 svn_fs_fs__p2l_entry_t *entry,
870 apr_pool_t *result_pool,
871 apr_pool_t *scratch_pool)
873 svn_stringbuf_t *item = svn_stringbuf_create_ensure(entry->size,
875 item->len = entry->size;
876 item->data[item->len] = 0;
878 SVN_ERR(svn_io_file_aligned_seek(rev_file->file, rev_file->block_size,
879 NULL, entry->offset, scratch_pool));
880 SVN_ERR(svn_io_file_read_full2(rev_file->file, item->data, item->len,
881 NULL, NULL, scratch_pool));
888 /* Process the logically addressed revision contents of revisions BASE to
889 * BASE + COUNT - 1 in QUERY.
891 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
895 read_log_rev_or_packfile(query_t *query,
898 apr_pool_t *result_pool,
899 apr_pool_t *scratch_pool)
901 fs_fs_data_t *ffd = query->fs->fsap_data;
902 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
903 apr_off_t max_offset;
904 apr_off_t offset = 0;
906 svn_fs_fs__revision_file_t *rev_file;
908 /* we will process every revision in the rev / pack file */
909 for (i = 0; i < count; ++i)
911 /* create the revision info for the current rev */
912 revision_info_t *info = apr_pcalloc(result_pool, sizeof(*info));
913 info->representations = apr_array_make(result_pool, 4,
914 sizeof(rep_stats_t*));
915 info->revision = base + i;
917 APR_ARRAY_PUSH(query->revisions, revision_info_t*) = info;
920 /* open the pack / rev file that is covered by the p2l index */
921 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, query->fs, base,
922 scratch_pool, iterpool));
923 SVN_ERR(svn_fs_fs__p2l_get_max_offset(&max_offset, query->fs, rev_file,
924 base, scratch_pool));
926 /* record the whole pack size in the first rev so the total sum will
928 APR_ARRAY_IDX(query->revisions, base, revision_info_t*)->end = max_offset;
930 /* for all offsets in the file, get the P2L index entries and process
931 the interesting items (change lists, noderevs) */
932 for (offset = 0; offset < max_offset; )
934 apr_array_header_t *entries;
936 svn_pool_clear(iterpool);
938 /* cancellation support */
939 if (query->cancel_func)
940 SVN_ERR(query->cancel_func(query->cancel_baton));
942 /* get all entries for the current block */
943 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, query->fs, rev_file, base,
944 offset, ffd->p2l_page_size,
945 iterpool, iterpool));
947 /* process all entries (and later continue with the next block) */
948 for (i = 0; i < entries->nelts; ++i)
950 svn_fs_fs__p2l_entry_t *entry
951 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
953 /* skip bits we previously processed */
954 if (i == 0 && entry->offset < offset)
957 /* skip zero-sized entries */
958 if (entry->size == 0)
961 /* read and process interesting items */
962 if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV)
964 svn_stringbuf_t *item;
965 revision_info_t *info = APR_ARRAY_IDX(query->revisions,
966 entry->item.revision,
968 SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool));
969 SVN_ERR(read_noderev(query, item, info, result_pool, iterpool));
971 else if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES)
973 svn_stringbuf_t *item;
974 revision_info_t *info = APR_ARRAY_IDX(query->revisions,
975 entry->item.revision,
977 SVN_ERR(read_item(&item, rev_file, entry, iterpool, iterpool));
979 = get_log_change_count(item->data + 0, item->len);
980 info->changes_len += entry->size;
984 offset += entry->size;
988 /* clean up and close file handles */
989 svn_pool_destroy(iterpool);
994 /* Read the content of the pack file staring at revision BASE logical
995 * addressing mode and store it in QUERY.
997 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
1000 static svn_error_t *
1001 read_log_pack_file(query_t *query,
1003 apr_pool_t *result_pool,
1004 apr_pool_t *scratch_pool)
1006 SVN_ERR(read_log_rev_or_packfile(query, base, query->shard_size,
1007 result_pool, scratch_pool));
1009 /* one more pack file processed */
1010 if (query->progress_func)
1011 query->progress_func(base, query->progress_baton, scratch_pool);
1013 return SVN_NO_ERROR;
1016 /* Read the content of the file for REVISION in logical addressing mode
1017 * and store its contents in QUERY.
1019 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
1022 static svn_error_t *
1023 read_log_revision_file(query_t *query,
1024 svn_revnum_t revision,
1025 apr_pool_t *result_pool,
1026 apr_pool_t *scratch_pool)
1028 SVN_ERR(read_log_rev_or_packfile(query, revision, 1,
1029 result_pool, scratch_pool));
1031 /* show progress every 1000 revs or so */
1032 if (query->progress_func)
1034 if (query->shard_size && (revision % query->shard_size == 0))
1035 query->progress_func(revision, query->progress_baton, scratch_pool);
1036 if (!query->shard_size && (revision % 1000 == 0))
1037 query->progress_func(revision, query->progress_baton, scratch_pool);
1040 return SVN_NO_ERROR;
1043 /* Read the repository and collect the stats info in QUERY.
1045 * Use RESULT_POOL for persistent allocations and SCRATCH_POOL for
1048 static svn_error_t *
1049 read_revisions(query_t *query,
1050 apr_pool_t *result_pool,
1051 apr_pool_t *scratch_pool)
1053 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1054 svn_revnum_t revision;
1056 /* read all packed revs */
1058 ; revision < query->min_unpacked_rev
1059 ; revision += query->shard_size)
1061 svn_pool_clear(iterpool);
1063 if (svn_fs_fs__use_log_addressing(query->fs))
1064 SVN_ERR(read_log_pack_file(query, revision, result_pool, iterpool));
1066 SVN_ERR(read_phys_pack_file(query, revision, result_pool, iterpool));
1069 /* read non-packed revs */
1070 for ( ; revision <= query->head; ++revision)
1072 svn_pool_clear(iterpool);
1074 if (svn_fs_fs__use_log_addressing(query->fs))
1075 SVN_ERR(read_log_revision_file(query, revision, result_pool,
1078 SVN_ERR(read_phys_revision_file(query, revision, result_pool,
1082 svn_pool_destroy(iterpool);
1084 return SVN_NO_ERROR;
1087 /* Accumulate stats of REP in STATS.
1090 add_rep_pack_stats(svn_fs_fs__rep_pack_stats_t *stats,
1095 stats->packed_size += rep->size;
1096 stats->expanded_size += rep->expanded_size;
1097 stats->overhead_size += rep->header_size + 7 /* ENDREP\n */;
1100 /* Accumulate stats of REP in STATS.
1103 add_rep_stats(svn_fs_fs__representation_stats_t *stats,
1106 add_rep_pack_stats(&stats->total, rep);
1107 if (rep->ref_count == 1)
1108 add_rep_pack_stats(&stats->uniques, rep);
1110 add_rep_pack_stats(&stats->shared, rep);
1112 stats->references += rep->ref_count;
1113 stats->expanded_size += rep->ref_count * rep->expanded_size;
1116 /* Aggregate the info the in revision_info_t * array REVISIONS into the
1117 * respectve fields of STATS.
1120 aggregate_stats(const apr_array_header_t *revisions,
1121 svn_fs_fs__stats_t *stats)
1125 /* aggregate info from all revisions */
1126 stats->revision_count = revisions->nelts;
1127 for (i = 0; i < revisions->nelts; ++i)
1129 revision_info_t *revision = APR_ARRAY_IDX(revisions, i,
1132 /* data gathered on a revision level */
1133 stats->change_count += revision->change_count;
1134 stats->change_len += revision->changes_len;
1135 stats->total_size += revision->end - revision->offset;
1137 stats->dir_node_stats.count += revision->dir_noderev_count;
1138 stats->dir_node_stats.size += revision->dir_noderev_size;
1139 stats->file_node_stats.count += revision->file_noderev_count;
1140 stats->file_node_stats.size += revision->file_noderev_size;
1141 stats->total_node_stats.count += revision->dir_noderev_count
1142 + revision->file_noderev_count;
1143 stats->total_node_stats.size += revision->dir_noderev_size
1144 + revision->file_noderev_size;
1146 /* process representations */
1147 for (k = 0; k < revision->representations->nelts; ++k)
1149 rep_stats_t *rep = APR_ARRAY_IDX(revision->representations, k,
1152 /* accumulate in the right bucket */
1156 add_rep_stats(&stats->file_rep_stats, rep);
1159 add_rep_stats(&stats->dir_rep_stats, rep);
1161 case file_property_rep:
1162 add_rep_stats(&stats->file_prop_rep_stats, rep);
1164 case dir_property_rep:
1165 add_rep_stats(&stats->dir_prop_rep_stats, rep);
1171 add_rep_stats(&stats->total_rep_stats, rep);
1176 /* Return a new svn_fs_fs__stats_t instance, allocated in RESULT_POOL.
1178 static svn_fs_fs__stats_t *
1179 create_stats(apr_pool_t *result_pool)
1181 svn_fs_fs__stats_t *stats = apr_pcalloc(result_pool, sizeof(*stats));
1183 initialize_largest_changes(stats, 64, result_pool);
1184 stats->by_extension = apr_hash_make(result_pool);
1189 /* Create a *QUERY, allocated in RESULT_POOL, reading filesystem FS and
1190 * collecting results in STATS. Store the optional PROCESS_FUNC and
1191 * PROGRESS_BATON as well as CANCEL_FUNC and CANCEL_BATON in *QUERY, too.
1192 * Use SCRATCH_POOL for temporary allocations.
1194 static svn_error_t *
1195 create_query(query_t **query,
1197 svn_fs_fs__stats_t *stats,
1198 svn_fs_progress_notify_func_t progress_func,
1199 void *progress_baton,
1200 svn_cancel_func_t cancel_func,
1202 apr_pool_t *result_pool,
1203 apr_pool_t *scratch_pool)
1205 *query = apr_pcalloc(result_pool, sizeof(**query));
1207 /* Read repository dimensions. */
1208 (*query)->shard_size = svn_fs_fs__shard_size(fs);
1209 SVN_ERR(svn_fs_fs__youngest_rev(&(*query)->head, fs, scratch_pool));
1210 SVN_ERR(svn_fs_fs__min_unpacked_rev(&(*query)->min_unpacked_rev, fs,
1213 /* create data containers and caches
1214 * Note: this assumes that int is at least 32-bits and that we only support
1215 * 32-bit wide revision numbers (actually 31-bits due to the signedness
1216 * of both the nelts field of the array and our revision numbers). This
1217 * means this code will fail on platforms where int is less than 32-bits
1218 * and the repository has more revisions than int can hold. */
1219 (*query)->revisions = apr_array_make(result_pool, (int) (*query)->head + 1,
1220 sizeof(revision_info_t *));
1221 (*query)->null_base = apr_pcalloc(result_pool,
1222 sizeof(*(*query)->null_base));
1224 /* Store other parameters */
1226 (*query)->stats = stats;
1227 (*query)->progress_func = progress_func;
1228 (*query)->progress_baton = progress_baton;
1229 (*query)->cancel_func = cancel_func;
1230 (*query)->cancel_baton = cancel_baton;
1232 return SVN_NO_ERROR;
1236 svn_fs_fs__get_stats(svn_fs_fs__stats_t **stats,
1238 svn_fs_progress_notify_func_t progress_func,
1239 void *progress_baton,
1240 svn_cancel_func_t cancel_func,
1242 apr_pool_t *result_pool,
1243 apr_pool_t *scratch_pool)
1247 *stats = create_stats(result_pool);
1248 SVN_ERR(create_query(&query, fs, *stats, progress_func, progress_baton,
1249 cancel_func, cancel_baton, scratch_pool,
1251 SVN_ERR(read_revisions(query, scratch_pool, scratch_pool));
1252 aggregate_stats(query->revisions, *stats);
1254 return SVN_NO_ERROR;