1 //===- GsymCreator.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
8 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
9 #include "llvm/DebugInfo/GSYM/FileWriter.h"
10 #include "llvm/DebugInfo/GSYM/Header.h"
11 #include "llvm/DebugInfo/GSYM/LineTable.h"
12 #include "llvm/MC/StringTableBuilder.h"
13 #include "llvm/Support/raw_ostream.h"
24 GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) {
25 insertFile(StringRef());
28 uint32_t GsymCreator::insertFile(StringRef Path,
29 llvm::sys::path::Style Style) {
30 llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
31 llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
32 // We must insert the strings first, then call the FileEntry constructor.
33 // If we inline the insertString() function call into the constructor, the
34 // call order is undefined due to parameter lists not having any ordering
36 const uint32_t Dir = insertString(directory);
37 const uint32_t Base = insertString(filename);
38 FileEntry FE(Dir, Base);
40 std::lock_guard<std::recursive_mutex> Guard(Mutex);
41 const auto NextIndex = Files.size();
42 // Find FE in hash map and insert if not present.
43 auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
45 Files.emplace_back(FE);
46 return R.first->second;
49 llvm::Error GsymCreator::save(StringRef Path,
50 llvm::support::endianness ByteOrder) const {
52 raw_fd_ostream OutStrm(Path, EC);
54 return llvm::errorCodeToError(EC);
55 FileWriter O(OutStrm, ByteOrder);
59 llvm::Error GsymCreator::encode(FileWriter &O) const {
60 std::lock_guard<std::recursive_mutex> Guard(Mutex);
62 return createStringError(std::errc::invalid_argument,
63 "no functions to encode");
65 return createStringError(std::errc::invalid_argument,
66 "GsymCreator wasn't finalized prior to encoding");
68 if (Funcs.size() > UINT32_MAX)
69 return createStringError(std::errc::invalid_argument,
70 "too many FunctionInfos");
72 const uint64_t MinAddr = BaseAddress ? *BaseAddress : Funcs.front().startAddress();
73 const uint64_t MaxAddr = Funcs.back().startAddress();
74 const uint64_t AddrDelta = MaxAddr - MinAddr;
76 Hdr.Magic = GSYM_MAGIC;
77 Hdr.Version = GSYM_VERSION;
79 Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
80 Hdr.BaseAddress = MinAddr;
81 Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
82 Hdr.StrtabOffset = 0; // We will fix this up later.
83 Hdr.StrtabSize = 0; // We will fix this up later.
84 memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
85 if (UUID.size() > sizeof(Hdr.UUID))
86 return createStringError(std::errc::invalid_argument,
87 "invalid UUID size %u", (uint32_t)UUID.size());
88 // Set the address offset size correctly in the GSYM header.
89 if (AddrDelta <= UINT8_MAX)
91 else if (AddrDelta <= UINT16_MAX)
93 else if (AddrDelta <= UINT32_MAX)
97 // Copy the UUID value if we have one.
99 memcpy(Hdr.UUID, UUID.data(), UUID.size());
100 // Write out the header.
101 llvm::Error Err = Hdr.encode(O);
105 // Write out the address offsets.
106 O.alignTo(Hdr.AddrOffSize);
107 for (const auto &FuncInfo : Funcs) {
108 uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
109 switch(Hdr.AddrOffSize) {
110 case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break;
111 case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break;
112 case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break;
113 case 8: O.writeU64(AddrOffset); break;
117 // Write out all zeros for the AddrInfoOffsets.
119 const off_t AddrInfoOffsetsOffset = O.tell();
120 for (size_t i = 0, n = Funcs.size(); i < n; ++i)
123 // Write out the file table
125 assert(!Files.empty());
126 assert(Files[0].Dir == 0);
127 assert(Files[0].Base == 0);
128 size_t NumFiles = Files.size();
129 if (NumFiles > UINT32_MAX)
130 return createStringError(std::errc::invalid_argument,
132 O.writeU32(static_cast<uint32_t>(NumFiles));
133 for (auto File: Files) {
134 O.writeU32(File.Dir);
135 O.writeU32(File.Base);
138 // Write out the sting table.
139 const off_t StrtabOffset = O.tell();
140 StrTab.write(O.get_stream());
141 const off_t StrtabSize = O.tell() - StrtabOffset;
142 std::vector<uint32_t> AddrInfoOffsets;
144 // Write out the address infos for each function info.
145 for (const auto &FuncInfo : Funcs) {
146 if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
147 AddrInfoOffsets.push_back(OffsetOrErr.get());
149 return OffsetOrErr.takeError();
151 // Fixup the string table offset and size in the header
152 O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
153 O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
155 // Fixup all address info offsets
157 for (auto AddrInfoOffset: AddrInfoOffsets) {
158 O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
161 return ErrorSuccess();
164 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
165 std::lock_guard<std::recursive_mutex> Guard(Mutex);
167 return createStringError(std::errc::invalid_argument,
168 "already finalized");
171 // Sort function infos so we can emit sorted functions.
174 // Don't let the string table indexes change by finalizing in order.
175 StrTab.finalizeInOrder();
177 // Remove duplicates function infos that have both entries from debug info
178 // (DWARF or Breakpad) and entries from the SymbolTable.
180 // Also handle overlapping function. Usually there shouldn't be any, but they
181 // can and do happen in some rare cases.
190 // In (a) and (b), Y is ignored and X will be reported for the full range.
191 // In (c), both functions will be included in the result and lookups for an
192 // address in the intersection will return Y because of binary search.
194 // Note that in case of (b), we cannot include Y in the result because then
195 // we wouldn't find any function for range (end of Y, end of X)
196 // with binary search
197 auto NumBefore = Funcs.size();
198 auto Curr = Funcs.begin();
199 auto Prev = Funcs.end();
200 while (Curr != Funcs.end()) {
201 // Can't check for overlaps or same address ranges if we don't have a
203 if (Prev != Funcs.end()) {
204 if (Prev->Range.intersects(Curr->Range)) {
205 // Overlapping address ranges.
206 if (Prev->Range == Curr->Range) {
207 // Same address range. Check if one is from debug info and the other
208 // is from a symbol table. If so, then keep the one with debug info.
209 // Our sorting guarantees that entries with matching address ranges
210 // that have debug info are last in the sort.
211 if (*Prev == *Curr) {
212 // FunctionInfo entries match exactly (range, lines, inlines)
213 OS << "warning: duplicate function info entries for range: "
214 << Curr->Range << '\n';
215 Curr = Funcs.erase(Prev);
217 if (!Prev->hasRichInfo() && Curr->hasRichInfo()) {
218 // Same address range, one with no debug info (symbol) and the
219 // next with debug info. Keep the latter.
220 Curr = Funcs.erase(Prev);
222 OS << "warning: same address range contains different debug "
223 << "info. Removing:\n"
224 << *Prev << "\nIn favor of this one:\n"
226 Curr = Funcs.erase(Prev);
230 // print warnings about overlaps
231 OS << "warning: function ranges overlap:\n"
235 } else if (Prev->Range.size() == 0 &&
236 Curr->Range.contains(Prev->Range.Start)) {
237 OS << "warning: removing symbol:\n"
238 << *Prev << "\nKeeping:\n"
240 Curr = Funcs.erase(Prev);
243 if (Curr == Funcs.end())
248 // If our last function info entry doesn't have a size and if we have valid
249 // text ranges, we should set the size of the last entry since any search for
250 // a high address might match our last entry. By fixing up this size, we can
251 // help ensure we don't cause lookups to always return the last symbol that
252 // has no size when doing lookups.
253 if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
254 if (auto Range = ValidTextRanges->getRangeThatContains(
255 Funcs.back().Range.Start)) {
256 Funcs.back().Range.End = Range->End;
259 OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
260 << Funcs.size() << " total\n";
261 return Error::success();
264 uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
267 std::lock_guard<std::recursive_mutex> Guard(Mutex);
269 // We need to provide backing storage for the string if requested
270 // since StringTableBuilder stores references to strings. Any string
271 // that comes from a section in an object file doesn't need to be
272 // copied, but any string created by code will need to be copied.
273 // This allows GsymCreator to be really fast when parsing DWARF and
274 // other object files as most strings don't need to be copied.
275 CachedHashStringRef CHStr(S);
276 if (!StrTab.contains(CHStr))
277 S = StringStorage.insert(S).first->getKey();
279 return StrTab.add(S);
282 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
283 std::lock_guard<std::recursive_mutex> Guard(Mutex);
284 Ranges.insert(FI.Range);
285 Funcs.emplace_back(FI);
288 void GsymCreator::forEachFunctionInfo(
289 std::function<bool(FunctionInfo &)> const &Callback) {
290 std::lock_guard<std::recursive_mutex> Guard(Mutex);
291 for (auto &FI : Funcs) {
297 void GsymCreator::forEachFunctionInfo(
298 std::function<bool(const FunctionInfo &)> const &Callback) const {
299 std::lock_guard<std::recursive_mutex> Guard(Mutex);
300 for (const auto &FI : Funcs) {
306 size_t GsymCreator::getNumFunctionInfos() const{
307 std::lock_guard<std::recursive_mutex> Guard(Mutex);
311 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
313 return ValidTextRanges->contains(Addr);
314 return true; // No valid text ranges has been set, so accept all ranges.
317 bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
318 std::lock_guard<std::recursive_mutex> Guard(Mutex);
319 return Ranges.contains(Addr);