1 #include "llvm/ADT/DenseMap.h"
2 #include "llvm/ADT/StringExtras.h"
3 #include "llvm/ADT/StringSet.h"
4 #include "llvm/DebugInfo/DIContext.h"
5 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
6 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
7 #include "llvm/Object/ObjectFile.h"
9 #define DEBUG_TYPE "dwarfdump"
11 using namespace object;
13 /// Holds statistics for one function (or other entity that has a PC range and
14 /// contains variables, such as a compile unit).
15 struct PerFunctionStats {
16 /// Number of inlined instances of this function.
17 unsigned NumFnInlined = 0;
18 /// Number of inlined instances that have abstract origins.
19 unsigned NumAbstractOrigins = 0;
20 /// Number of variables and parameters with location across all inlined
22 unsigned TotalVarWithLoc = 0;
23 /// Number of constants with location across all inlined instances.
24 unsigned ConstantMembers = 0;
25 /// List of all Variables and parameters in this function.
26 StringSet<> VarsInFunction;
27 /// Compile units also cover a PC range, but have this flag set to false.
28 bool IsFunction = false;
29 /// Verify function definition has PC addresses (for detecting when
30 /// a function has been inlined everywhere).
31 bool HasPCAddresses = false;
32 /// Function has source location information.
33 bool HasSourceLocation = false;
34 /// Number of function parameters.
35 unsigned NumParams = 0;
36 /// Number of function parameters with source location.
37 unsigned NumParamSourceLocations = 0;
38 /// Number of function parameters with type.
39 unsigned NumParamTypes = 0;
40 /// Number of function parameters with a DW_AT_location.
41 unsigned NumParamLocations = 0;
42 /// Number of variables.
44 /// Number of variables with source location.
45 unsigned NumVarSourceLocations = 0;
46 /// Number of variables wtih type.
47 unsigned NumVarTypes = 0;
48 /// Number of variables wtih DW_AT_location.
49 unsigned NumVarLocations = 0;
52 /// Holds accumulated global statistics about DIEs.
54 /// Total number of PC range bytes covered by DW_AT_locations.
55 unsigned ScopeBytesCovered = 0;
56 /// Total number of PC range bytes in each variable's enclosing scope,
57 /// starting from the first definition of the variable.
58 unsigned ScopeBytesFromFirstDefinition = 0;
59 /// Total number of call site entries (DW_TAG_call_site) or
60 /// (DW_AT_call_file & DW_AT_call_line).
61 unsigned CallSiteEntries = 0;
62 /// Total byte size of concrete functions. This byte size includes
63 /// inline functions contained in the concrete functions.
64 uint64_t FunctionSize = 0;
65 /// Total byte size of inlined functions. This is the total number of bytes
66 /// for the top inline functions within concrete functions. This can help
67 /// tune the inline settings when compiling to match user expectations.
68 uint64_t InlineFunctionSize = 0;
71 /// Extract the low pc from a Die.
72 static uint64_t getLowPC(DWARFDie Die) {
73 auto RangesOrError = Die.getAddressRanges();
74 DWARFAddressRangesVector Ranges;
76 Ranges = RangesOrError.get();
78 llvm::consumeError(RangesOrError.takeError());
80 return Ranges[0].LowPC;
81 return dwarf::toAddress(Die.find(dwarf::DW_AT_low_pc), 0);
84 /// Collect debug info quality metrics for one DIE.
85 static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
86 std::string VarPrefix, uint64_t ScopeLowPC,
87 uint64_t BytesInScope, uint32_t InlineDepth,
88 StringMap<PerFunctionStats> &FnStatMap,
89 GlobalStats &GlobalStats) {
91 bool HasSrcLoc = false;
93 bool IsArtificial = false;
94 uint64_t BytesCovered = 0;
95 uint64_t OffsetToFirstDefinition = 0;
97 if (Die.getTag() == dwarf::DW_TAG_call_site) {
98 GlobalStats.CallSiteEntries++;
102 if (Die.getTag() != dwarf::DW_TAG_formal_parameter &&
103 Die.getTag() != dwarf::DW_TAG_variable &&
104 Die.getTag() != dwarf::DW_TAG_member) {
105 // Not a variable or constant member.
109 if (Die.findRecursively(dwarf::DW_AT_decl_file) &&
110 Die.findRecursively(dwarf::DW_AT_decl_line))
113 if (Die.findRecursively(dwarf::DW_AT_type))
116 if (Die.find(dwarf::DW_AT_artificial))
119 if (Die.find(dwarf::DW_AT_const_value)) {
120 // This catches constant members *and* variables.
122 BytesCovered = BytesInScope;
124 if (Die.getTag() == dwarf::DW_TAG_member) {
128 // Handle variables and function arguments.
129 auto FormValue = Die.find(dwarf::DW_AT_location);
130 HasLoc = FormValue.hasValue();
133 if (auto DebugLocOffset = FormValue->getAsSectionOffset()) {
134 auto *DebugLoc = Die.getDwarfUnit()->getContext().getDebugLoc();
135 if (auto List = DebugLoc->getLocationListAtOffset(*DebugLocOffset)) {
136 for (auto Entry : List->Entries)
137 BytesCovered += Entry.End - Entry.Begin;
138 if (List->Entries.size()) {
139 uint64_t FirstDef = List->Entries[0].Begin;
140 uint64_t UnitOfs = getLowPC(Die.getDwarfUnit()->getUnitDIE());
141 // Ranges sometimes start before the lexical scope.
142 if (UnitOfs + FirstDef >= ScopeLowPC)
143 OffsetToFirstDefinition = UnitOfs + FirstDef - ScopeLowPC;
144 // Or even after it. Count that as a failure.
145 if (OffsetToFirstDefinition > BytesInScope)
146 OffsetToFirstDefinition = 0;
149 assert(BytesInScope);
151 // Assume the entire range is covered by a single location.
152 BytesCovered = BytesInScope;
157 // Collect PC range coverage data.
158 auto &FnStats = FnStatMap[FnPrefix];
160 Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin))
162 // By using the variable name + the path through the lexical block tree, the
163 // keys are consistent across duplicate abstract origins in different CUs.
164 std::string VarName = StringRef(Die.getName(DINameKind::ShortName));
165 FnStats.VarsInFunction.insert(VarPrefix + VarName);
167 FnStats.TotalVarWithLoc += (unsigned)HasLoc;
168 // Adjust for the fact the variables often start their lifetime in the
169 // middle of the scope.
170 BytesInScope -= OffsetToFirstDefinition;
171 // Turns out we have a lot of ranges that extend past the lexical scope.
172 GlobalStats.ScopeBytesCovered += std::min(BytesInScope, BytesCovered);
173 GlobalStats.ScopeBytesFromFirstDefinition += BytesInScope;
174 assert(GlobalStats.ScopeBytesCovered <=
175 GlobalStats.ScopeBytesFromFirstDefinition);
176 } else if (Die.getTag() == dwarf::DW_TAG_member) {
177 FnStats.ConstantMembers++;
179 FnStats.TotalVarWithLoc += (unsigned)HasLoc;
182 if (Die.getTag() == dwarf::DW_TAG_formal_parameter) {
185 FnStats.NumParamTypes++;
187 FnStats.NumParamSourceLocations++;
189 FnStats.NumParamLocations++;
190 } else if (Die.getTag() == dwarf::DW_TAG_variable) {
193 FnStats.NumVarTypes++;
195 FnStats.NumVarSourceLocations++;
197 FnStats.NumVarLocations++;
202 /// Recursively collect debug info quality metrics.
203 static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
204 std::string VarPrefix, uint64_t ScopeLowPC,
205 uint64_t BytesInScope, uint32_t InlineDepth,
206 StringMap<PerFunctionStats> &FnStatMap,
207 GlobalStats &GlobalStats) {
208 // Handle any kind of lexical scope.
209 const dwarf::Tag Tag = Die.getTag();
210 const bool IsFunction = Tag == dwarf::DW_TAG_subprogram;
211 const bool IsBlock = Tag == dwarf::DW_TAG_lexical_block;
212 const bool IsInlinedFunction = Tag == dwarf::DW_TAG_inlined_subroutine;
213 if (IsFunction || IsInlinedFunction || IsBlock) {
215 // Reset VarPrefix when entering a new function.
216 if (Die.getTag() == dwarf::DW_TAG_subprogram ||
217 Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
220 // Ignore forward declarations.
221 if (Die.find(dwarf::DW_AT_declaration))
224 // Check for call sites.
225 if (Die.find(dwarf::DW_AT_call_file) && Die.find(dwarf::DW_AT_call_line))
226 GlobalStats.CallSiteEntries++;
229 auto RangesOrError = Die.getAddressRanges();
230 if (!RangesOrError) {
231 llvm::consumeError(RangesOrError.takeError());
235 auto Ranges = RangesOrError.get();
236 uint64_t BytesInThisScope = 0;
237 for (auto Range : Ranges)
238 BytesInThisScope += Range.HighPC - Range.LowPC;
239 ScopeLowPC = getLowPC(Die);
241 // Count the function.
243 StringRef Name = Die.getName(DINameKind::LinkageName);
245 Name = Die.getName(DINameKind::ShortName);
247 // Skip over abstract origins.
248 if (Die.find(dwarf::DW_AT_inline))
250 // We've seen an (inlined) instance of this function.
251 auto &FnStats = FnStatMap[Name];
252 if (IsInlinedFunction) {
253 FnStats.NumFnInlined++;
254 if (Die.findRecursively(dwarf::DW_AT_abstract_origin))
255 FnStats.NumAbstractOrigins++;
257 FnStats.IsFunction = true;
258 if (BytesInThisScope && !IsInlinedFunction)
259 FnStats.HasPCAddresses = true;
260 std::string FnName = StringRef(Die.getName(DINameKind::ShortName));
261 if (Die.findRecursively(dwarf::DW_AT_decl_file) &&
262 Die.findRecursively(dwarf::DW_AT_decl_line))
263 FnStats.HasSourceLocation = true;
266 if (BytesInThisScope) {
267 BytesInScope = BytesInThisScope;
269 GlobalStats.FunctionSize += BytesInThisScope;
270 else if (IsInlinedFunction && InlineDepth == 0)
271 GlobalStats.InlineFunctionSize += BytesInThisScope;
274 // Not a scope, visit the Die itself. It could be a variable.
275 collectStatsForDie(Die, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope,
276 InlineDepth, FnStatMap, GlobalStats);
279 // Set InlineDepth correctly for child recursion
282 else if (IsInlinedFunction)
285 // Traverse children.
286 unsigned LexicalBlockIndex = 0;
287 DWARFDie Child = Die.getFirstChild();
289 std::string ChildVarPrefix = VarPrefix;
290 if (Child.getTag() == dwarf::DW_TAG_lexical_block)
291 ChildVarPrefix += toHex(LexicalBlockIndex++) + '.';
293 collectStatsRecursive(Child, FnPrefix, ChildVarPrefix, ScopeLowPC,
294 BytesInScope, InlineDepth, FnStatMap, GlobalStats);
295 Child = Child.getSibling();
299 /// Print machine-readable output.
300 /// The machine-readable format is single-line JSON output.
302 static void printDatum(raw_ostream &OS, const char *Key, StringRef Value) {
303 OS << ",\"" << Key << "\":\"" << Value << '"';
304 LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n');
306 static void printDatum(raw_ostream &OS, const char *Key, uint64_t Value) {
307 OS << ",\"" << Key << "\":" << Value;
308 LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n');
312 /// Collect debug info quality metrics for an entire DIContext.
314 /// Do the impossible and reduce the quality of the debug info down to a few
315 /// numbers. The idea is to condense the data into numbers that can be tracked
316 /// over time to identify trends in newer compiler versions and gauge the effect
317 /// of particular optimizations. The raw numbers themselves are not particularly
318 /// useful, only the delta between compiling the same program with different
320 bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
321 Twine Filename, raw_ostream &OS) {
322 StringRef FormatName = Obj.getFileFormatName();
323 GlobalStats GlobalStats;
324 StringMap<PerFunctionStats> Statistics;
325 for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units())
326 if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false))
327 collectStatsRecursive(CUDie, "/", "g", 0, 0, 0, Statistics, GlobalStats);
329 /// The version number should be increased every time the algorithm is changed
330 /// (including bug fixes). New metrics may be added without increasing the
332 unsigned Version = 3;
333 unsigned VarParamTotal = 0;
334 unsigned VarParamUnique = 0;
335 unsigned VarParamWithLoc = 0;
336 unsigned NumFunctions = 0;
337 unsigned NumInlinedFunctions = 0;
338 unsigned NumFuncsWithSrcLoc = 0;
339 unsigned NumAbstractOrigins = 0;
340 unsigned ParamTotal = 0;
341 unsigned ParamWithType = 0;
342 unsigned ParamWithLoc = 0;
343 unsigned ParamWithSrcLoc = 0;
344 unsigned VarTotal = 0;
345 unsigned VarWithType = 0;
346 unsigned VarWithSrcLoc = 0;
347 unsigned VarWithLoc = 0;
348 for (auto &Entry : Statistics) {
349 PerFunctionStats &Stats = Entry.getValue();
350 unsigned TotalVars = Stats.VarsInFunction.size() * Stats.NumFnInlined;
351 // Count variables in concrete out-of-line functions and in global scope.
352 if (Stats.HasPCAddresses || !Stats.IsFunction)
353 TotalVars += Stats.VarsInFunction.size();
354 unsigned Constants = Stats.ConstantMembers;
355 VarParamWithLoc += Stats.TotalVarWithLoc + Constants;
356 VarParamTotal += TotalVars;
357 VarParamUnique += Stats.VarsInFunction.size();
358 LLVM_DEBUG(for (auto &V
359 : Stats.VarsInFunction) llvm::dbgs()
360 << Entry.getKey() << ": " << V.getKey() << "\n");
361 NumFunctions += Stats.IsFunction;
362 NumFuncsWithSrcLoc += Stats.HasSourceLocation;
363 NumInlinedFunctions += Stats.IsFunction * Stats.NumFnInlined;
364 NumAbstractOrigins += Stats.IsFunction * Stats.NumAbstractOrigins;
365 ParamTotal += Stats.NumParams;
366 ParamWithType += Stats.NumParamTypes;
367 ParamWithLoc += Stats.NumParamLocations;
368 ParamWithSrcLoc += Stats.NumParamSourceLocations;
369 VarTotal += Stats.NumVars;
370 VarWithType += Stats.NumVarTypes;
371 VarWithLoc += Stats.NumVarLocations;
372 VarWithSrcLoc += Stats.NumVarSourceLocations;
376 OS.SetBufferSize(1024);
377 OS << "{\"version\":" << Version;
378 LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n";
379 llvm::dbgs() << "---------------------------------\n");
380 printDatum(OS, "file", Filename.str());
381 printDatum(OS, "format", FormatName);
382 printDatum(OS, "source functions", NumFunctions);
383 printDatum(OS, "source functions with location", NumFuncsWithSrcLoc);
384 printDatum(OS, "inlined functions", NumInlinedFunctions);
385 printDatum(OS, "inlined funcs with abstract origins", NumAbstractOrigins);
386 printDatum(OS, "unique source variables", VarParamUnique);
387 printDatum(OS, "source variables", VarParamTotal);
388 printDatum(OS, "variables with location", VarParamWithLoc);
389 printDatum(OS, "call site entries", GlobalStats.CallSiteEntries);
390 printDatum(OS, "scope bytes total",
391 GlobalStats.ScopeBytesFromFirstDefinition);
392 printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered);
393 printDatum(OS, "total function size", GlobalStats.FunctionSize);
394 printDatum(OS, "total inlined function size", GlobalStats.InlineFunctionSize);
395 printDatum(OS, "total formal params", ParamTotal);
396 printDatum(OS, "formal params with source location", ParamWithSrcLoc);
397 printDatum(OS, "formal params with type", ParamWithType);
398 printDatum(OS, "formal params with binary location", ParamWithLoc);
399 printDatum(OS, "total vars", VarTotal);
400 printDatum(OS, "vars with source location", VarWithSrcLoc);
401 printDatum(OS, "vars with type", VarWithType);
402 printDatum(OS, "vars with binary location", VarWithLoc);
405 llvm::dbgs() << "Total Availability: "
406 << (int)std::round((VarParamWithLoc * 100.0) / VarParamTotal)
408 llvm::dbgs() << "PC Ranges covered: "
409 << (int)std::round((GlobalStats.ScopeBytesCovered * 100.0) /
410 GlobalStats.ScopeBytesFromFirstDefinition)