1 //===- SymbolTable.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "SymbolTable.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Memory.h"
17 #include "lld/Common/Timer.h"
18 #include "llvm/IR/LLVMContext.h"
19 #include "llvm/Object/WindowsMachineFlag.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/raw_ostream.h"
29 static Timer ltoTimer("LTO", Timer::root());
33 void SymbolTable::addFile(InputFile *file) {
34 log("Reading " + toString(file));
37 MachineTypes mt = file->getMachineType();
38 if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) {
40 } else if (mt != IMAGE_FILE_MACHINE_UNKNOWN && config->machine != mt) {
41 error(toString(file) + ": machine type " + machineToStr(mt) +
42 " conflicts with " + machineToStr(config->machine));
46 if (auto *f = dyn_cast<ObjFile>(file)) {
47 ObjFile::instances.push_back(f);
48 } else if (auto *f = dyn_cast<BitcodeFile>(file)) {
49 BitcodeFile::instances.push_back(f);
50 } else if (auto *f = dyn_cast<ImportFile>(file)) {
51 ImportFile::instances.push_back(f);
54 driver->parseDirectives(file);
57 static void errorOrWarn(const Twine &s) {
58 if (config->forceUnresolved)
64 // Returns the symbol in SC whose value is <= Addr that is closest to Addr.
65 // This is generally the global variable or function whose definition contains
67 static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
68 DefinedRegular *candidate = nullptr;
70 for (Symbol *s : sc->file->getSymbols()) {
71 auto *d = dyn_cast_or_null<DefinedRegular>(s);
72 if (!d || !d->data || d->getChunk() != sc || d->getValue() > addr ||
73 (candidate && d->getValue() < candidate->getValue()))
82 // Given a file and the index of a symbol in that file, returns a description
83 // of all references to that symbol from that file. If no debug information is
84 // available, returns just the name of the file, else one string per actual
85 // reference as described in the debug info.
86 std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex) {
89 std::pair<StringRef, uint32_t> fileLine;
91 std::vector<Location> locations;
93 for (Chunk *c : file->getChunks()) {
94 auto *sc = dyn_cast<SectionChunk>(c);
97 for (const coff_relocation &r : sc->getRelocs()) {
98 if (r.SymbolTableIndex != symIndex)
100 std::pair<StringRef, uint32_t> fileLine =
101 getFileLine(sc, r.VirtualAddress);
102 Symbol *sym = getSymbol(sc, r.VirtualAddress);
103 if (!fileLine.first.empty() || sym)
104 locations.push_back({sym, fileLine});
108 if (locations.empty())
109 return std::vector<std::string>({"\n>>> referenced by " + toString(file)});
111 std::vector<std::string> symbolLocations(locations.size());
113 for (Location loc : locations) {
114 llvm::raw_string_ostream os(symbolLocations[i++]);
115 os << "\n>>> referenced by ";
116 if (!loc.fileLine.first.empty())
117 os << loc.fileLine.first << ":" << loc.fileLine.second
119 os << toString(file);
121 os << ":(" << toString(*loc.sym) << ')';
123 return symbolLocations;
126 // For an undefined symbol, stores all files referencing it and the index of
127 // the undefined symbol in each file.
128 struct UndefinedDiag {
134 std::vector<File> files;
137 static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) {
139 llvm::raw_string_ostream os(out);
140 os << "undefined symbol: " << toString(*undefDiag.sym);
142 const size_t maxUndefReferences = 10;
143 size_t i = 0, numRefs = 0;
144 for (const UndefinedDiag::File &ref : undefDiag.files) {
145 std::vector<std::string> symbolLocations =
146 getSymbolLocations(ref.oFile, ref.symIndex);
147 numRefs += symbolLocations.size();
148 for (const std::string &s : symbolLocations) {
149 if (i >= maxUndefReferences)
156 os << "\n>>> referenced " << numRefs - i << " more times";
157 errorOrWarn(os.str());
160 void SymbolTable::loadMinGWAutomaticImports() {
161 for (auto &i : symMap) {
162 Symbol *sym = i.second;
163 auto *undef = dyn_cast<Undefined>(sym);
166 if (!sym->isUsedInRegularObj)
169 StringRef name = undef->getName();
171 if (name.startswith("__imp_"))
173 // If we have an undefined symbol, but we have a Lazy representing a
174 // symbol we could load from file, make sure to load that.
175 Lazy *l = dyn_cast_or_null<Lazy>(find(("__imp_" + name).str()));
176 if (!l || l->pendingArchiveLoad)
179 log("Loading lazy " + l->getName() + " from " + l->file->getName() +
180 " for automatic import");
181 l->pendingArchiveLoad = true;
182 l->file->addMember(l->sym);
186 bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
187 if (name.startswith("__imp_"))
189 Defined *imp = dyn_cast_or_null<Defined>(find(("__imp_" + name).str()));
193 // Replace the reference directly to a variable with a reference
194 // to the import address table instead. This obviously isn't right,
195 // but we mark the symbol as isRuntimePseudoReloc, and a later pass
196 // will add runtime pseudo relocations for every relocation against
197 // this Symbol. The runtime pseudo relocation framework expects the
198 // reference itself to point at the IAT entry.
200 if (isa<DefinedImportData>(imp)) {
201 log("Automatically importing " + name + " from " +
202 cast<DefinedImportData>(imp)->getDLLName());
203 impSize = sizeof(DefinedImportData);
204 } else if (isa<DefinedRegular>(imp)) {
205 log("Automatically importing " + name + " from " +
206 toString(cast<DefinedRegular>(imp)->file));
207 impSize = sizeof(DefinedRegular);
209 warn("unable to automatically import " + name + " from " + imp->getName() +
210 " from " + toString(cast<DefinedRegular>(imp)->file) +
211 "; unexpected symbol type");
214 sym->replaceKeepingName(imp, impSize);
215 sym->isRuntimePseudoReloc = true;
217 // There may exist symbols named .refptr.<name> which only consist
218 // of a single pointer to <name>. If it turns out <name> is
219 // automatically imported, we don't need to keep the .refptr.<name>
220 // pointer at all, but redirect all accesses to it to the IAT entry
221 // for __imp_<name> instead, and drop the whole .refptr.<name> chunk.
222 DefinedRegular *refptr =
223 dyn_cast_or_null<DefinedRegular>(find((".refptr." + name).str()));
224 if (refptr && refptr->getChunk()->getSize() == config->wordsize) {
225 SectionChunk *sc = dyn_cast_or_null<SectionChunk>(refptr->getChunk());
226 if (sc && sc->getRelocs().size() == 1 && *sc->symbols().begin() == sym) {
227 log("Replacing .refptr." + name + " with " + imp->getName());
228 refptr->getChunk()->live = false;
229 refptr->replaceKeepingName(imp, impSize);
235 void SymbolTable::reportRemainingUndefines() {
236 SmallPtrSet<Symbol *, 8> undefs;
237 DenseMap<Symbol *, Symbol *> localImports;
239 for (auto &i : symMap) {
240 Symbol *sym = i.second;
241 auto *undef = dyn_cast<Undefined>(sym);
244 if (!sym->isUsedInRegularObj)
247 StringRef name = undef->getName();
249 // A weak alias may have been resolved, so check for that.
250 if (Defined *d = undef->getWeakAlias()) {
251 // We want to replace Sym with D. However, we can't just blindly
252 // copy sizeof(SymbolUnion) bytes from D to Sym because D may be an
253 // internal symbol, and internal symbols are stored as "unparented"
254 // Symbols. For that reason we need to check which type of symbol we
255 // are dealing with and copy the correct number of bytes.
256 if (isa<DefinedRegular>(d))
257 memcpy(sym, d, sizeof(DefinedRegular));
258 else if (isa<DefinedAbsolute>(d))
259 memcpy(sym, d, sizeof(DefinedAbsolute));
261 memcpy(sym, d, sizeof(SymbolUnion));
265 // If we can resolve a symbol by removing __imp_ prefix, do that.
266 // This odd rule is for compatibility with MSVC linker.
267 if (name.startswith("__imp_")) {
268 Symbol *imp = find(name.substr(strlen("__imp_")));
269 if (imp && isa<Defined>(imp)) {
270 auto *d = cast<Defined>(imp);
271 replaceSymbol<DefinedLocalImport>(sym, name, d);
272 localImportChunks.push_back(cast<DefinedLocalImport>(sym)->getChunk());
273 localImports[sym] = d;
278 // We don't want to report missing Microsoft precompiled headers symbols.
279 // A proper message will be emitted instead in PDBLinker::aquirePrecompObj
280 if (name.contains("_PchSym_"))
283 if (config->mingw && handleMinGWAutomaticImport(sym, name))
286 // Remaining undefined symbols are not fatal if /force is specified.
287 // They are replaced with dummy defined symbols.
288 if (config->forceUnresolved)
289 replaceSymbol<DefinedAbsolute>(sym, name, 0);
293 if (undefs.empty() && localImports.empty())
296 for (Symbol *b : config->gcroot) {
298 errorOrWarn("<root>: undefined symbol: " + toString(*b));
299 if (config->warnLocallyDefinedImported)
300 if (Symbol *imp = localImports.lookup(b))
301 warn("<root>: locally defined symbol imported: " + toString(*imp) +
302 " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
305 std::vector<UndefinedDiag> undefDiags;
306 DenseMap<Symbol *, int> firstDiag;
308 for (ObjFile *file : ObjFile::instances) {
309 size_t symIndex = (size_t)-1;
310 for (Symbol *sym : file->getSymbols()) {
314 if (undefs.count(sym)) {
315 auto it = firstDiag.find(sym);
316 if (it == firstDiag.end()) {
317 firstDiag[sym] = undefDiags.size();
318 undefDiags.push_back({sym, {{file, symIndex}}});
320 undefDiags[it->second].files.push_back({file, symIndex});
323 if (config->warnLocallyDefinedImported)
324 if (Symbol *imp = localImports.lookup(sym))
325 warn(toString(file) +
326 ": locally defined symbol imported: " + toString(*imp) +
327 " (defined in " + toString(imp->getFile()) + ") [LNK4217]");
331 for (const UndefinedDiag& undefDiag : undefDiags)
332 reportUndefinedSymbol(undefDiag);
335 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
336 bool inserted = false;
337 Symbol *&sym = symMap[CachedHashStringRef(name)];
339 sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
340 sym->isUsedInRegularObj = false;
341 sym->pendingArchiveLoad = false;
344 return {sym, inserted};
347 std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
348 std::pair<Symbol *, bool> result = insert(name);
349 if (!file || !isa<BitcodeFile>(file))
350 result.first->isUsedInRegularObj = true;
354 Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
358 std::tie(s, wasInserted) = insert(name, f);
359 if (wasInserted || (isa<Lazy>(s) && isWeakAlias)) {
360 replaceSymbol<Undefined>(s, name);
363 if (auto *l = dyn_cast<Lazy>(s)) {
364 if (!s->pendingArchiveLoad) {
365 s->pendingArchiveLoad = true;
366 l->file->addMember(l->sym);
372 void SymbolTable::addLazy(ArchiveFile *f, const Archive::Symbol &sym) {
373 StringRef name = sym.getName();
376 std::tie(s, wasInserted) = insert(name);
378 replaceSymbol<Lazy>(s, f, sym);
381 auto *u = dyn_cast<Undefined>(s);
382 if (!u || u->weakAlias || s->pendingArchiveLoad)
384 s->pendingArchiveLoad = true;
388 void SymbolTable::reportDuplicate(Symbol *existing, InputFile *newFile) {
389 std::string msg = "duplicate symbol: " + toString(*existing) + " in " +
390 toString(existing->getFile()) + " and in " +
393 if (config->forceMultiple)
399 Symbol *SymbolTable::addAbsolute(StringRef n, COFFSymbolRef sym) {
402 std::tie(s, wasInserted) = insert(n, nullptr);
403 s->isUsedInRegularObj = true;
404 if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s))
405 replaceSymbol<DefinedAbsolute>(s, n, sym);
406 else if (!isa<DefinedCOFF>(s))
407 reportDuplicate(s, nullptr);
411 Symbol *SymbolTable::addAbsolute(StringRef n, uint64_t va) {
414 std::tie(s, wasInserted) = insert(n, nullptr);
415 s->isUsedInRegularObj = true;
416 if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s))
417 replaceSymbol<DefinedAbsolute>(s, n, va);
418 else if (!isa<DefinedCOFF>(s))
419 reportDuplicate(s, nullptr);
423 Symbol *SymbolTable::addSynthetic(StringRef n, Chunk *c) {
426 std::tie(s, wasInserted) = insert(n, nullptr);
427 s->isUsedInRegularObj = true;
428 if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s))
429 replaceSymbol<DefinedSynthetic>(s, n, c);
430 else if (!isa<DefinedCOFF>(s))
431 reportDuplicate(s, nullptr);
435 Symbol *SymbolTable::addRegular(InputFile *f, StringRef n,
436 const coff_symbol_generic *sym,
440 std::tie(s, wasInserted) = insert(n, f);
441 if (wasInserted || !isa<DefinedRegular>(s))
442 replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ false,
443 /*IsExternal*/ true, sym, c);
445 reportDuplicate(s, f);
449 std::pair<DefinedRegular *, bool>
450 SymbolTable::addComdat(InputFile *f, StringRef n,
451 const coff_symbol_generic *sym) {
454 std::tie(s, wasInserted) = insert(n, f);
455 if (wasInserted || !isa<DefinedRegular>(s)) {
456 replaceSymbol<DefinedRegular>(s, f, n, /*IsCOMDAT*/ true,
457 /*IsExternal*/ true, sym, nullptr);
458 return {cast<DefinedRegular>(s), true};
460 auto *existingSymbol = cast<DefinedRegular>(s);
461 if (!existingSymbol->isCOMDAT)
462 reportDuplicate(s, f);
463 return {existingSymbol, false};
466 Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size,
467 const coff_symbol_generic *sym, CommonChunk *c) {
470 std::tie(s, wasInserted) = insert(n, f);
471 if (wasInserted || !isa<DefinedCOFF>(s))
472 replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
473 else if (auto *dc = dyn_cast<DefinedCommon>(s))
474 if (size > dc->getSize())
475 replaceSymbol<DefinedCommon>(s, f, n, size, sym, c);
479 Symbol *SymbolTable::addImportData(StringRef n, ImportFile *f) {
482 std::tie(s, wasInserted) = insert(n, nullptr);
483 s->isUsedInRegularObj = true;
484 if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s)) {
485 replaceSymbol<DefinedImportData>(s, n, f);
489 reportDuplicate(s, f);
493 Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
497 std::tie(s, wasInserted) = insert(name, nullptr);
498 s->isUsedInRegularObj = true;
499 if (wasInserted || isa<Undefined>(s) || isa<Lazy>(s)) {
500 replaceSymbol<DefinedImportThunk>(s, name, id, machine);
504 reportDuplicate(s, id->file);
508 void SymbolTable::addLibcall(StringRef name) {
509 Symbol *sym = findUnderscore(name);
513 if (Lazy *l = dyn_cast<Lazy>(sym)) {
514 MemoryBufferRef mb = l->getMemberBuffer();
515 if (identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode)
516 addUndefined(sym->getName());
520 std::vector<Chunk *> SymbolTable::getChunks() {
521 std::vector<Chunk *> res;
522 for (ObjFile *file : ObjFile::instances) {
523 ArrayRef<Chunk *> v = file->getChunks();
524 res.insert(res.end(), v.begin(), v.end());
529 Symbol *SymbolTable::find(StringRef name) {
530 return symMap.lookup(CachedHashStringRef(name));
533 Symbol *SymbolTable::findUnderscore(StringRef name) {
534 if (config->machine == I386)
535 return find(("_" + name).str());
539 // Return all symbols that start with Prefix, possibly ignoring the first
540 // character of Prefix or the first character symbol.
541 std::vector<Symbol *> SymbolTable::getSymsWithPrefix(StringRef prefix) {
542 std::vector<Symbol *> syms;
543 for (auto pair : symMap) {
544 StringRef name = pair.first.val();
545 if (name.startswith(prefix) || name.startswith(prefix.drop_front()) ||
546 name.drop_front().startswith(prefix) ||
547 name.drop_front().startswith(prefix.drop_front())) {
548 syms.push_back(pair.second);
554 Symbol *SymbolTable::findMangle(StringRef name) {
555 if (Symbol *sym = find(name))
556 if (!isa<Undefined>(sym))
559 // Efficient fuzzy string lookup is impossible with a hash table, so iterate
560 // the symbol table once and collect all possibly matching symbols into this
561 // vector. Then compare each possibly matching symbol with each possible
563 std::vector<Symbol *> syms = getSymsWithPrefix(name);
564 auto findByPrefix = [&syms](const Twine &t) -> Symbol * {
565 std::string prefix = t.str();
567 if (s->getName().startswith(prefix))
572 // For non-x86, just look for C++ functions.
573 if (config->machine != I386)
574 return findByPrefix("?" + name + "@@Y");
576 if (!name.startswith("_"))
578 // Search for x86 stdcall function.
579 if (Symbol *s = findByPrefix(name + "@"))
581 // Search for x86 fastcall function.
582 if (Symbol *s = findByPrefix("@" + name.substr(1) + "@"))
584 // Search for x86 vectorcall function.
585 if (Symbol *s = findByPrefix(name.substr(1) + "@@"))
587 // Search for x86 C++ non-member function.
588 return findByPrefix("?" + name.substr(1) + "@@Y");
591 Symbol *SymbolTable::addUndefined(StringRef name) {
592 return addUndefined(name, nullptr, false);
595 std::vector<StringRef> SymbolTable::compileBitcodeFiles() {
596 lto.reset(new BitcodeCompiler);
597 for (BitcodeFile *f : BitcodeFile::instances)
599 return lto->compile();
602 void SymbolTable::addCombinedLTOObjects() {
603 if (BitcodeFile::instances.empty())
606 ScopedTimer t(ltoTimer);
607 for (StringRef object : compileBitcodeFiles()) {
608 auto *obj = make<ObjFile>(MemoryBufferRef(object, "lto.tmp"));
610 ObjFile::instances.push_back(obj);