1 /* stats-cmd.c -- implements the size stats sub-command.
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
26 #include "svn_pools.h"
27 #include "svn_sorts.h"
29 #include "private/svn_sorts_private.h"
30 #include "private/svn_string_private.h"
31 #include "private/svn_fs_fs_private.h"
33 #include "svn_private_config.h"
36 /* Return the string, allocated in RESULT_POOL, describing the value 2**I.
39 print_two_power(int i,
40 apr_pool_t *result_pool)
42 /* These are the SI prefixes for base-1000, the binary ones with base-1024
43 are too clumsy and require appending B for "byte" to be intelligible,
46 Therefore, we ignore the official standard and revert to the traditional
47 contextual use were the base-1000 prefixes are understood as base-1024
48 when it came to data sizes.
50 const char *si_prefixes = " kMGTPEZY";
52 int number = (i >= 0) ? (1 << (i % 10)) : 0;
53 int thousands = (i >= 0) ? (i / 10) : 0;
55 char si_prefix = (thousands < strlen(si_prefixes))
56 ? si_prefixes[thousands]
60 return apr_psprintf(result_pool, "%d", number);
62 return apr_psprintf(result_pool, "%d%c", number, si_prefix);
65 /* Print statistics for the given group of representations to console.
66 * Use POOL for allocations.
69 print_rep_stats(svn_fs_fs__representation_stats_t *stats,
72 printf(_("%20s bytes in %12s reps\n"
73 "%20s bytes in %12s shared reps\n"
74 "%20s bytes expanded size\n"
75 "%20s bytes expanded shared size\n"
76 "%20s bytes with rep-sharing off\n"
77 "%20s shared references\n"
78 "%20.3f average delta chain length\n"),
79 svn__ui64toa_sep(stats->total.packed_size, ',', pool),
80 svn__ui64toa_sep(stats->total.count, ',', pool),
81 svn__ui64toa_sep(stats->shared.packed_size, ',', pool),
82 svn__ui64toa_sep(stats->shared.count, ',', pool),
83 svn__ui64toa_sep(stats->total.expanded_size, ',', pool),
84 svn__ui64toa_sep(stats->shared.expanded_size, ',', pool),
85 svn__ui64toa_sep(stats->expanded_size, ',', pool),
86 svn__ui64toa_sep(stats->references - stats->total.count, ',', pool),
87 stats->chain_len / MAX(1.0, (double)stats->total.count));
90 /* Print the (used) contents of CHANGES. Use POOL for allocations.
93 print_largest_reps(svn_fs_fs__largest_changes_t *changes,
97 for (i = 0; i < changes->count && changes->changes[i]->size; ++i)
98 printf(_("%12s r%-8ld %s\n"),
99 svn__ui64toa_sep(changes->changes[i]->size, ',', pool),
100 changes->changes[i]->revision,
101 changes->changes[i]->path->data);
104 /* Print the non-zero section of HISTOGRAM to console.
105 * Use POOL for allocations.
108 print_histogram(svn_fs_fs__histogram_t *histogram,
115 /* identify non-zero range */
116 while (last > 0 && histogram->lines[last].count == 0)
119 while (first <= last && histogram->lines[first].count == 0)
122 /* display histogram lines */
123 for (i = last; i >= first; --i)
124 printf(_(" %4s .. < %-4s %19s (%2d%%) bytes in %12s (%2d%%) items\n"),
125 print_two_power(i-1, pool), print_two_power(i, pool),
126 svn__ui64toa_sep(histogram->lines[i].sum, ',', pool),
127 (int)(histogram->lines[i].sum * 100 / histogram->total.sum),
128 svn__ui64toa_sep(histogram->lines[i].count, ',', pool),
129 (int)(histogram->lines[i].count * 100 / histogram->total.count));
132 /* COMPARISON_FUNC for svn_sort__hash.
133 * Sort extension_info_t values by total count in descending order.
136 compare_count(const svn_sort__item_t *a,
137 const svn_sort__item_t *b)
139 const svn_fs_fs__extension_info_t *lhs = a->value;
140 const svn_fs_fs__extension_info_t *rhs = b->value;
141 apr_int64_t diff = lhs->node_histogram.total.count
142 - rhs->node_histogram.total.count;
144 return diff > 0 ? -1 : (diff < 0 ? 1 : 0);
147 /* COMPARISON_FUNC for svn_sort__hash.
148 * Sort extension_info_t values by total uncompressed size in descending order.
151 compare_node_size(const svn_sort__item_t *a,
152 const svn_sort__item_t *b)
154 const svn_fs_fs__extension_info_t *lhs = a->value;
155 const svn_fs_fs__extension_info_t *rhs = b->value;
156 apr_int64_t diff = lhs->node_histogram.total.sum
157 - rhs->node_histogram.total.sum;
159 return diff > 0 ? -1 : (diff < 0 ? 1 : 0);
162 /* COMPARISON_FUNC for svn_sort__hash.
163 * Sort extension_info_t values by total prep count in descending order.
166 compare_rep_size(const svn_sort__item_t *a,
167 const svn_sort__item_t *b)
169 const svn_fs_fs__extension_info_t *lhs = a->value;
170 const svn_fs_fs__extension_info_t *rhs = b->value;
171 apr_int64_t diff = lhs->rep_histogram.total.sum
172 - rhs->rep_histogram.total.sum;
174 return diff > 0 ? -1 : (diff < 0 ? 1 : 0);
177 /* Return an array of extension_info_t* for the (up to) 16 most prominent
178 * extensions in STATS according to the sort criterion COMPARISON_FUNC.
179 * Allocate results in POOL.
181 static apr_array_header_t *
182 get_by_extensions(svn_fs_fs__stats_t *stats,
183 int (*comparison_func)(const svn_sort__item_t *,
184 const svn_sort__item_t *),
187 /* sort all data by extension */
188 apr_array_header_t *sorted
189 = svn_sort__hash(stats->by_extension, comparison_func, pool);
191 /* select the top (first) 16 entries */
192 int count = MIN(sorted->nelts, 16);
193 apr_array_header_t *result
194 = apr_array_make(pool, count, sizeof(svn_fs_fs__extension_info_t*));
197 for (i = 0; i < count; ++i)
198 APR_ARRAY_PUSH(result, svn_fs_fs__extension_info_t*)
199 = APR_ARRAY_IDX(sorted, i, svn_sort__item_t).value;
204 /* Add all extension_info_t* entries of TO_ADD not already in TARGET to
208 merge_by_extension(apr_array_header_t *target,
209 apr_array_header_t *to_add)
213 count = target->nelts;
214 for (i = 0; i < to_add->nelts; ++i)
216 svn_fs_fs__extension_info_t *info
217 = APR_ARRAY_IDX(to_add, i, svn_fs_fs__extension_info_t *);
218 for (k = 0; k < count; ++k)
219 if (info == APR_ARRAY_IDX(target, k, svn_fs_fs__extension_info_t *))
223 APR_ARRAY_PUSH(target, svn_fs_fs__extension_info_t*) = info;
227 /* Print the (up to) 16 extensions in STATS with the most changes.
228 * Use POOL for allocations.
231 print_extensions_by_changes(svn_fs_fs__stats_t *stats,
234 apr_array_header_t *data = get_by_extensions(stats, compare_count, pool);
238 for (i = 0; i < data->nelts; ++i)
240 svn_fs_fs__extension_info_t *info
241 = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *);
243 /* If there are elements, then their count cannot be 0. */
244 assert(stats->file_histogram.total.count);
246 sum += info->node_histogram.total.count;
247 printf(_("%11s %20s (%2d%%) representations\n"),
249 svn__ui64toa_sep(info->node_histogram.total.count, ',', pool),
250 (int)(info->node_histogram.total.count * 100 /
251 stats->file_histogram.total.count));
254 if (stats->file_histogram.total.count)
256 printf(_("%11s %20s (%2d%%) representations\n"),
258 svn__ui64toa_sep(stats->file_histogram.total.count - sum, ',',
260 (int)((stats->file_histogram.total.count - sum) * 100 /
261 stats->file_histogram.total.count));
265 /* Calculate a percentage, handling edge cases. */
267 get_percentage(apr_uint64_t part,
270 /* This include total == 0. */
275 return (int)(part * 100.0 / total);
278 /* Print the (up to) 16 extensions in STATS with the largest total size of
279 * changed file content. Use POOL for allocations.
282 print_extensions_by_nodes(svn_fs_fs__stats_t *stats,
285 apr_array_header_t *data = get_by_extensions(stats, compare_node_size, pool);
289 for (i = 0; i < data->nelts; ++i)
291 svn_fs_fs__extension_info_t *info
292 = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *);
293 sum += info->node_histogram.total.sum;
294 printf(_("%11s %20s (%2d%%) bytes\n"),
296 svn__ui64toa_sep(info->node_histogram.total.sum, ',', pool),
297 get_percentage(info->node_histogram.total.sum,
298 stats->file_histogram.total.sum));
301 if (stats->file_histogram.total.sum > sum)
303 /* Total sum can't be zero here. */
304 printf(_("%11s %20s (%2d%%) bytes\n"),
306 svn__ui64toa_sep(stats->file_histogram.total.sum - sum, ',',
308 get_percentage(stats->file_histogram.total.sum - sum,
309 stats->file_histogram.total.sum));
313 /* Print the (up to) 16 extensions in STATS with the largest total size of
314 * changed file content. Use POOL for allocations.
317 print_extensions_by_reps(svn_fs_fs__stats_t *stats,
320 apr_array_header_t *data = get_by_extensions(stats, compare_rep_size, pool);
324 for (i = 0; i < data->nelts; ++i)
326 svn_fs_fs__extension_info_t *info
327 = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *);
328 sum += info->rep_histogram.total.sum;
329 printf(_("%11s %20s (%2d%%) bytes\n"),
331 svn__ui64toa_sep(info->rep_histogram.total.sum, ',', pool),
332 get_percentage(info->rep_histogram.total.sum,
333 stats->rep_size_histogram.total.sum));
336 if (stats->rep_size_histogram.total.sum > sum)
338 /* Total sum can't be zero here. */
339 printf(_("%11s %20s (%2d%%) bytes\n"),
341 svn__ui64toa_sep(stats->rep_size_histogram.total.sum - sum, ',',
343 get_percentage(stats->rep_size_histogram.total.sum - sum,
344 stats->rep_size_histogram.total.sum));
348 /* Print per-extension histograms for the most frequent extensions in STATS.
349 * Use POOL for allocations. */
351 print_histograms_by_extension(svn_fs_fs__stats_t *stats,
354 apr_array_header_t *data = get_by_extensions(stats, compare_count, pool);
357 merge_by_extension(data, get_by_extensions(stats, compare_node_size, pool));
358 merge_by_extension(data, get_by_extensions(stats, compare_rep_size, pool));
360 for (i = 0; i < data->nelts; ++i)
362 svn_fs_fs__extension_info_t *info
363 = APR_ARRAY_IDX(data, i, svn_fs_fs__extension_info_t *);
364 printf("\nHistogram of '%s' file sizes:\n", info->extension);
365 print_histogram(&info->node_histogram, pool);
366 printf("\nHistogram of '%s' file representation sizes:\n",
368 print_histogram(&info->rep_histogram, pool);
372 /* Print the contents of STATS to the console.
373 * Use POOL for allocations.
376 print_stats(svn_fs_fs__stats_t *stats,
380 printf("\n\nGlobal statistics:\n");
381 printf(_("%20s bytes in %12s revisions\n"
382 "%20s bytes in %12s changes\n"
383 "%20s bytes in %12s node revision records\n"
384 "%20s bytes in %12s representations\n"
385 "%20s bytes expanded representation size\n"
386 "%20s bytes with rep-sharing off\n"),
387 svn__ui64toa_sep(stats->total_size, ',', pool),
388 svn__ui64toa_sep(stats->revision_count, ',', pool),
389 svn__ui64toa_sep(stats->change_len, ',', pool),
390 svn__ui64toa_sep(stats->change_count, ',', pool),
391 svn__ui64toa_sep(stats->total_node_stats.size, ',', pool),
392 svn__ui64toa_sep(stats->total_node_stats.count, ',', pool),
393 svn__ui64toa_sep(stats->total_rep_stats.total.packed_size, ',',
395 svn__ui64toa_sep(stats->total_rep_stats.total.count, ',', pool),
396 svn__ui64toa_sep(stats->total_rep_stats.total.expanded_size, ',',
398 svn__ui64toa_sep(stats->total_rep_stats.expanded_size, ',', pool));
400 printf("\nNoderev statistics:\n");
401 printf(_("%20s bytes in %12s nodes total\n"
402 "%20s bytes in %12s directory noderevs\n"
403 "%20s bytes in %12s file noderevs\n"),
404 svn__ui64toa_sep(stats->total_node_stats.size, ',', pool),
405 svn__ui64toa_sep(stats->total_node_stats.count, ',', pool),
406 svn__ui64toa_sep(stats->dir_node_stats.size, ',', pool),
407 svn__ui64toa_sep(stats->dir_node_stats.count, ',', pool),
408 svn__ui64toa_sep(stats->file_node_stats.size, ',', pool),
409 svn__ui64toa_sep(stats->file_node_stats.count, ',', pool));
411 printf("\nRepresentation statistics:\n");
412 printf(_("%20s bytes in %12s representations total\n"
413 "%20s bytes in %12s directory representations\n"
414 "%20s bytes in %12s file representations\n"
415 "%20s bytes in %12s representations of added file nodes\n"
416 "%20s bytes in %12s directory property representations\n"
417 "%20s bytes in %12s file property representations\n"
418 " with %12.3f average delta chain length\n"
419 "%20s bytes in header & footer overhead\n"),
420 svn__ui64toa_sep(stats->total_rep_stats.total.packed_size, ',',
422 svn__ui64toa_sep(stats->total_rep_stats.total.count, ',', pool),
423 svn__ui64toa_sep(stats->dir_rep_stats.total.packed_size, ',',
425 svn__ui64toa_sep(stats->dir_rep_stats.total.count, ',', pool),
426 svn__ui64toa_sep(stats->file_rep_stats.total.packed_size, ',',
428 svn__ui64toa_sep(stats->file_rep_stats.total.count, ',', pool),
429 svn__ui64toa_sep(stats->added_rep_size_histogram.total.sum, ',',
431 svn__ui64toa_sep(stats->added_rep_size_histogram.total.count, ',',
433 svn__ui64toa_sep(stats->dir_prop_rep_stats.total.packed_size, ',',
435 svn__ui64toa_sep(stats->dir_prop_rep_stats.total.count, ',', pool),
436 svn__ui64toa_sep(stats->file_prop_rep_stats.total.packed_size, ',',
438 svn__ui64toa_sep(stats->file_prop_rep_stats.total.count, ',', pool),
439 stats->total_rep_stats.chain_len
440 / (double)stats->total_rep_stats.total.count,
441 svn__ui64toa_sep(stats->total_rep_stats.total.overhead_size, ',',
444 printf("\nDirectory representation statistics:\n");
445 print_rep_stats(&stats->dir_rep_stats, pool);
446 printf("\nFile representation statistics:\n");
447 print_rep_stats(&stats->file_rep_stats, pool);
448 printf("\nDirectory property representation statistics:\n");
449 print_rep_stats(&stats->dir_prop_rep_stats, pool);
450 printf("\nFile property representation statistics:\n");
451 print_rep_stats(&stats->file_prop_rep_stats, pool);
453 printf("\nLargest representations:\n");
454 print_largest_reps(stats->largest_changes, pool);
455 printf("\nExtensions by number of representations:\n");
456 print_extensions_by_changes(stats, pool);
457 printf("\nExtensions by size of changed files:\n");
458 print_extensions_by_nodes(stats, pool);
459 printf("\nExtensions by size of representations:\n");
460 print_extensions_by_reps(stats, pool);
462 printf("\nHistogram of expanded node sizes:\n");
463 print_histogram(&stats->node_size_histogram, pool);
464 printf("\nHistogram of representation sizes:\n");
465 print_histogram(&stats->rep_size_histogram, pool);
466 printf("\nHistogram of file sizes:\n");
467 print_histogram(&stats->file_histogram, pool);
468 printf("\nHistogram of file representation sizes:\n");
469 print_histogram(&stats->file_rep_histogram, pool);
470 printf("\nHistogram of file property sizes:\n");
471 print_histogram(&stats->file_prop_histogram, pool);
472 printf("\nHistogram of file property representation sizes:\n");
473 print_histogram(&stats->file_prop_rep_histogram, pool);
474 printf("\nHistogram of directory sizes:\n");
475 print_histogram(&stats->dir_histogram, pool);
476 printf("\nHistogram of directory representation sizes:\n");
477 print_histogram(&stats->dir_rep_histogram, pool);
478 printf("\nHistogram of directory property sizes:\n");
479 print_histogram(&stats->dir_prop_histogram, pool);
480 printf("\nHistogram of directory property representation sizes:\n");
481 print_histogram(&stats->dir_prop_rep_histogram, pool);
483 print_histograms_by_extension(stats, pool);
486 /* Our progress function simply prints the REVISION number and makes it
487 * appear immediately.
490 print_progress(svn_revnum_t revision,
494 printf("%8ld", revision);
498 /* This implements `svn_opt_subcommand_t'. */
500 subcommand__stats(apr_getopt_t *os, void *baton, apr_pool_t *pool)
502 svnfsfs__opt_state *opt_state = baton;
504 svn_fs_fs__ioctl_get_stats_input_t input = {0};
505 svn_fs_fs__ioctl_get_stats_output_t *output;
507 printf("Reading revisions\n");
508 SVN_ERR(open_fs(&fs, opt_state->repository_path, pool));
510 input.progress_func = print_progress;
511 SVN_ERR(svn_fs_ioctl(fs, SVN_FS_FS__IOCTL_GET_STATS, &input, (void **)&output,
512 check_cancel, NULL, pool, pool));
513 print_stats(output->stats, pool);