//===- SymbolTable.cpp ----------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Symbol table is a bag of all known symbols. We put all symbols of // all input files to the symbol table. The symbol table is basically // a hash table with the logic to resolve symbol name conflicts using // the symbol types. // //===----------------------------------------------------------------------===// #include "SymbolTable.h" #include "Config.h" #include "Error.h" #include "LinkerScript.h" #include "Strings.h" #include "SymbolListFile.h" #include "Symbols.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/Support/StringSaver.h" using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; using namespace lld; using namespace lld::elf; // All input object files must be for the same architecture // (e.g. it does not make sense to link x86 object files with // MIPS object files.) This function checks for that error. template static bool isCompatible(InputFile *F) { if (!isa>(F) && !isa(F)) return true; if (F->EKind == Config->EKind && F->EMachine == Config->EMachine) return true; StringRef A = F->getName(); StringRef B = Config->Emulation; if (B.empty()) B = Config->FirstElf->getName(); error(A + " is incompatible with " + B); return false; } // Add symbols in File to the symbol table. template void SymbolTable::addFile(std::unique_ptr File) { InputFile *FileP = File.get(); if (!isCompatible(FileP)) return; // .a file if (auto *F = dyn_cast(FileP)) { ArchiveFiles.emplace_back(cast(File.release())); F->parse(); return; } // Lazy object file if (auto *F = dyn_cast(FileP)) { LazyObjectFiles.emplace_back(cast(File.release())); F->parse(); return; } if (Config->Trace) outs() << getFilename(FileP) << "\n"; // .so file if (auto *F = dyn_cast>(FileP)) { // DSOs are uniquified not by filename but by soname. F->parseSoName(); if (!SoNames.insert(F->getSoName()).second) return; SharedFiles.emplace_back(cast>(File.release())); F->parseRest(); return; } // LLVM bitcode file if (auto *F = dyn_cast(FileP)) { BitcodeFiles.emplace_back(cast(File.release())); F->parse(ComdatGroups); return; } // Regular object file auto *F = cast>(FileP); ObjectFiles.emplace_back(cast>(File.release())); F->parse(ComdatGroups); } // This function is where all the optimizations of link-time // optimization happens. When LTO is in use, some input files are // not in native object file format but in the LLVM bitcode format. // This function compiles bitcode files into a few big native files // using LLVM functions and replaces bitcode symbols with the results. // Because all bitcode files that consist of a program are passed // to the compiler at once, it can do whole-program optimization. template void SymbolTable::addCombinedLtoObject() { if (BitcodeFiles.empty()) return; // Compile bitcode files. Lto.reset(new BitcodeCompiler); for (const std::unique_ptr &F : BitcodeFiles) Lto->add(*F); std::vector> IFs = Lto->compile(); // Replace bitcode symbols. for (auto &IF : IFs) { ObjectFile *Obj = cast>(IF.release()); DenseSet DummyGroups; Obj->parse(DummyGroups); ObjectFiles.emplace_back(Obj); } } template DefinedRegular *SymbolTable::addAbsolute(StringRef Name, uint8_t Visibility) { return cast>( addRegular(Name, STB_GLOBAL, Visibility)->body()); } // Add Name as an "ignored" symbol. An ignored symbol is a regular // linker-synthesized defined symbol, but is only defined if needed. template DefinedRegular *SymbolTable::addIgnored(StringRef Name, uint8_t Visibility) { if (!find(Name)) return nullptr; return addAbsolute(Name, Visibility); } // Set a flag for --trace-symbol so that we can print out a log message // if a new symbol with the same name is inserted into the symbol table. template void SymbolTable::trace(StringRef Name) { Symtab.insert({Name, {-1, true}}); } // Rename SYM as __wrap_SYM. The original symbol is preserved as __real_SYM. // Used to implement --wrap. template void SymbolTable::wrap(StringRef Name) { SymbolBody *B = find(Name); if (!B) return; StringSaver Saver(Alloc); Symbol *Sym = B->symbol(); Symbol *Real = addUndefined(Saver.save("__real_" + Name)); Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name)); // We rename symbols by replacing the old symbol's SymbolBody with the new // symbol's SymbolBody. This causes all SymbolBody pointers referring to the // old symbol to instead refer to the new symbol. memcpy(Real->Body.buffer, Sym->Body.buffer, sizeof(Sym->Body)); memcpy(Sym->Body.buffer, Wrap->Body.buffer, sizeof(Wrap->Body)); } static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) { if (VA == STV_DEFAULT) return VB; if (VB == STV_DEFAULT) return VA; return std::min(VA, VB); } // Find an existing symbol or create and insert a new one. template std::pair SymbolTable::insert(StringRef Name) { auto P = Symtab.insert({Name, {(int)SymVector.size(), false}}); SymIndex &V = P.first->second; bool IsNew = P.second; if (V.Idx == -1) { IsNew = true; V = {(int)SymVector.size(), true}; } Symbol *Sym; if (IsNew) { Sym = new (Alloc) Symbol; Sym->Binding = STB_WEAK; Sym->Visibility = STV_DEFAULT; Sym->IsUsedInRegularObj = false; Sym->ExportDynamic = false; Sym->VersionId = Config->DefaultSymbolVersion; Sym->Traced = V.Traced; SymVector.push_back(Sym); } else { Sym = SymVector[V.Idx]; } return {Sym, IsNew}; } // Find an existing symbol or create and insert a new one, then apply the given // attributes. template std::pair SymbolTable::insert(StringRef Name, uint8_t Type, uint8_t Visibility, bool CanOmitFromDynSym, bool IsUsedInRegularObj, InputFile *File) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name); // Merge in the new symbol's visibility. S->Visibility = getMinVisibility(S->Visibility, Visibility); if (!CanOmitFromDynSym && (Config->Shared || Config->ExportDynamic)) S->ExportDynamic = true; if (IsUsedInRegularObj) S->IsUsedInRegularObj = true; if (!WasInserted && S->body()->Type != SymbolBody::UnknownType && ((Type == STT_TLS) != S->body()->isTls())) error("TLS attribute mismatch for symbol: " + conflictMsg(S->body(), File)); return {S, WasInserted}; } // Construct a string in the form of "Sym in File1 and File2". // Used to construct an error message. template std::string SymbolTable::conflictMsg(SymbolBody *Existing, InputFile *NewFile) { std::string Sym = Existing->getName(); if (Config->Demangle) Sym = demangle(Sym); return Sym + " in " + getFilename(Existing->File) + " and " + getFilename(NewFile); } template Symbol *SymbolTable::addUndefined(StringRef Name) { return addUndefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0, /*CanOmitFromDynSym*/ false, /*File*/ nullptr); } template Symbol *SymbolTable::addUndefined(StringRef Name, uint8_t Binding, uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym, InputFile *File) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name, Type, StOther & 3, CanOmitFromDynSym, /*IsUsedInRegularObj*/ !File || !isa(File), File); if (WasInserted) { S->Binding = Binding; replaceBody(S, Name, StOther, Type, File); return S; } if (Binding != STB_WEAK) { if (S->body()->isShared() || S->body()->isLazy()) S->Binding = Binding; if (auto *SS = dyn_cast>(S->body())) SS->file()->IsUsed = true; } if (auto *L = dyn_cast(S->body())) { // An undefined weak will not fetch archive members, but we have to remember // its type. See also comment in addLazyArchive. if (S->isWeak()) L->Type = Type; else if (auto F = L->fetch()) addFile(std::move(F)); } return S; } // We have a new defined symbol with the specified binding. Return 1 if the new // symbol should win, -1 if the new symbol should lose, or 0 if both symbols are // strong defined symbols. static int compareDefined(Symbol *S, bool WasInserted, uint8_t Binding) { if (WasInserted) return 1; SymbolBody *Body = S->body(); if (Body->isLazy() || Body->isUndefined() || Body->isShared()) return 1; if (Binding == STB_WEAK) return -1; if (S->isWeak()) return 1; return 0; } // We have a new non-common defined symbol with the specified binding. Return 1 // if the new symbol should win, -1 if the new symbol should lose, or 0 if there // is a conflict. If the new symbol wins, also update the binding. static int compareDefinedNonCommon(Symbol *S, bool WasInserted, uint8_t Binding) { if (int Cmp = compareDefined(S, WasInserted, Binding)) { if (Cmp > 0) S->Binding = Binding; return Cmp; } if (isa(S->body())) { // Non-common symbols take precedence over common symbols. if (Config->WarnCommon) warning("common " + S->body()->getName() + " is overridden"); return 1; } return 0; } template Symbol *SymbolTable::addCommon(StringRef N, uint64_t Size, uint64_t Alignment, uint8_t Binding, uint8_t StOther, uint8_t Type, InputFile *File) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(N, Type, StOther & 3, /*CanOmitFromDynSym*/ false, /*IsUsedInRegularObj*/ true, File); int Cmp = compareDefined(S, WasInserted, Binding); if (Cmp > 0) { S->Binding = Binding; replaceBody(S, N, Size, Alignment, StOther, Type, File); } else if (Cmp == 0) { auto *C = dyn_cast(S->body()); if (!C) { // Non-common symbols take precedence over common symbols. if (Config->WarnCommon) warning("common " + S->body()->getName() + " is overridden"); return S; } if (Config->WarnCommon) warning("multiple common of " + S->body()->getName()); C->Size = std::max(C->Size, Size); C->Alignment = std::max(C->Alignment, Alignment); } return S; } template void SymbolTable::reportDuplicate(SymbolBody *Existing, InputFile *NewFile) { std::string Msg = "duplicate symbol: " + conflictMsg(Existing, NewFile); if (Config->AllowMultipleDefinition) warning(Msg); else error(Msg); } template Symbol *SymbolTable::addRegular(StringRef Name, const Elf_Sym &Sym, InputSectionBase *Section) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name, Sym.getType(), Sym.getVisibility(), /*CanOmitFromDynSym*/ false, /*IsUsedInRegularObj*/ true, Section ? Section->getFile() : nullptr); int Cmp = compareDefinedNonCommon(S, WasInserted, Sym.getBinding()); if (Cmp > 0) replaceBody>(S, Name, Sym, Section); else if (Cmp == 0) reportDuplicate(S->body(), Section->getFile()); return S; } template Symbol *SymbolTable::addRegular(StringRef Name, uint8_t Binding, uint8_t StOther) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name, STT_NOTYPE, StOther & 3, /*CanOmitFromDynSym*/ false, /*IsUsedInRegularObj*/ true, nullptr); int Cmp = compareDefinedNonCommon(S, WasInserted, Binding); if (Cmp > 0) replaceBody>(S, Name, StOther); else if (Cmp == 0) reportDuplicate(S->body(), nullptr); return S; } template Symbol *SymbolTable::addSynthetic(StringRef N, OutputSectionBase *Section, uintX_t Value) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(N, STT_NOTYPE, STV_HIDDEN, /*CanOmitFromDynSym*/ false, /*IsUsedInRegularObj*/ true, nullptr); int Cmp = compareDefinedNonCommon(S, WasInserted, STB_GLOBAL); if (Cmp > 0) replaceBody>(S, N, Value, Section); else if (Cmp == 0) reportDuplicate(S->body(), nullptr); return S; } template void SymbolTable::addShared(SharedFile *F, StringRef Name, const Elf_Sym &Sym, const typename ELFT::Verdef *Verdef) { // DSO symbols do not affect visibility in the output, so we pass STV_DEFAULT // as the visibility, which will leave the visibility in the symbol table // unchanged. Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name, Sym.getType(), STV_DEFAULT, /*CanOmitFromDynSym*/ true, /*IsUsedInRegularObj*/ false, F); // Make sure we preempt DSO symbols with default visibility. if (Sym.getVisibility() == STV_DEFAULT) S->ExportDynamic = true; if (WasInserted || isa(S->body())) { replaceBody>(S, F, Name, Sym, Verdef); if (!S->isWeak()) F->IsUsed = true; } } template Symbol *SymbolTable::addBitcode(StringRef Name, bool IsWeak, uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym, BitcodeFile *F) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name, Type, StOther & 3, CanOmitFromDynSym, /*IsUsedInRegularObj*/ false, F); int Cmp = compareDefinedNonCommon(S, WasInserted, IsWeak ? STB_WEAK : STB_GLOBAL); if (Cmp > 0) replaceBody(S, Name, StOther, Type, F); else if (Cmp == 0) reportDuplicate(S->body(), F); return S; } template SymbolBody *SymbolTable::find(StringRef Name) { auto It = Symtab.find(Name); if (It == Symtab.end()) return nullptr; SymIndex V = It->second; if (V.Idx == -1) return nullptr; return SymVector[V.Idx]->body(); } // Returns a list of defined symbols that match with a given glob pattern. template std::vector SymbolTable::findAll(StringRef Pattern) { std::vector Res; for (Symbol *Sym : SymVector) { SymbolBody *B = Sym->body(); if (!B->isUndefined() && globMatch(Pattern, B->getName())) Res.push_back(B); } return Res; } template void SymbolTable::addLazyArchive(ArchiveFile *F, const object::Archive::Symbol Sym) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Sym.getName()); if (WasInserted) { replaceBody(S, *F, Sym, SymbolBody::UnknownType); return; } if (!S->body()->isUndefined()) return; // Weak undefined symbols should not fetch members from archives. If we were // to keep old symbol we would not know that an archive member was available // if a strong undefined symbol shows up afterwards in the link. If a strong // undefined symbol never shows up, this lazy symbol will get to the end of // the link and must be treated as the weak undefined one. We already marked // this symbol as used when we added it to the symbol table, but we also need // to preserve its type. FIXME: Move the Type field to Symbol. if (S->isWeak()) { replaceBody(S, *F, Sym, S->body()->Type); return; } MemoryBufferRef MBRef = F->getMember(&Sym); if (!MBRef.getBuffer().empty()) addFile(createObjectFile(MBRef, F->getName())); } template void SymbolTable::addLazyObject(StringRef Name, LazyObjectFile &Obj) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name); if (WasInserted) { replaceBody(S, Name, Obj, SymbolBody::UnknownType); return; } if (!S->body()->isUndefined()) return; // See comment for addLazyArchive above. if (S->isWeak()) { replaceBody(S, Name, Obj, S->body()->Type); } else { MemoryBufferRef MBRef = Obj.getBuffer(); if (!MBRef.getBuffer().empty()) addFile(createObjectFile(MBRef)); } } // Process undefined (-u) flags by loading lazy symbols named by those flags. template void SymbolTable::scanUndefinedFlags() { for (StringRef S : Config->Undefined) if (auto *L = dyn_cast_or_null(find(S))) if (std::unique_ptr File = L->fetch()) addFile(std::move(File)); } // This function takes care of the case in which shared libraries depend on // the user program (not the other way, which is usual). Shared libraries // may have undefined symbols, expecting that the user program provides // the definitions for them. An example is BSD's __progname symbol. // We need to put such symbols to the main program's .dynsym so that // shared libraries can find them. // Except this, we ignore undefined symbols in DSOs. template void SymbolTable::scanShlibUndefined() { for (std::unique_ptr> &File : SharedFiles) for (StringRef U : File->getUndefinedSymbols()) if (SymbolBody *Sym = find(U)) if (Sym->isDefined()) Sym->symbol()->ExportDynamic = true; } // This function process the dynamic list option by marking all the symbols // to be exported in the dynamic table. template void SymbolTable::scanDynamicList() { for (StringRef S : Config->DynamicList) if (SymbolBody *B = find(S)) B->symbol()->ExportDynamic = true; } static bool hasWildcard(StringRef S) { return S.find_first_of("?*") != StringRef::npos; } static void setVersionId(SymbolBody *Body, StringRef VersionName, StringRef Name, uint16_t Version) { if (!Body || Body->isUndefined()) { if (Config->NoUndefinedVersion) error("version script assignment of " + VersionName + " to symbol " + Name + " failed: symbol not defined"); return; } Symbol *Sym = Body->symbol(); if (Sym->VersionId != Config->DefaultSymbolVersion) warning("duplicate symbol " + Name + " in version script"); Sym->VersionId = Version; } template std::map SymbolTable::getDemangledSyms() { std::map Result; for (Symbol *Sym : SymVector) { SymbolBody *B = Sym->body(); Result[demangle(B->getName())] = B; } return Result; } static bool hasExternCpp() { for (VersionDefinition &V : Config->VersionDefinitions) for (SymbolVersion Sym : V.Globals) if (Sym.IsExternCpp) return true; return false; } // This function processes the --version-script option by marking all global // symbols with the VersionScriptGlobal flag, which acts as a filter on the // dynamic symbol table. template void SymbolTable::scanVersionScript() { // If version script does not contain versions declarations, // we just should mark global symbols. if (!Config->VersionScriptGlobals.empty()) { for (SymbolVersion &Sym : Config->VersionScriptGlobals) if (SymbolBody *B = find(Sym.Name)) B->symbol()->VersionId = VER_NDX_GLOBAL; return; } if (Config->VersionDefinitions.empty()) return; // If we have symbols version declarations, we should // assign version references for each symbol. // Current rules are: // * If there is an exact match for the mangled name or we have extern C++ // exact match, then we use it. // * Otherwise, we look through the wildcard patterns. We look through the // version tags in reverse order. We use the first match we find (the last // matching version tag in the file). // Handle exact matches and build a map of demangled externs for // quick search during next step. std::map Demangled; if (hasExternCpp()) Demangled = getDemangledSyms(); for (VersionDefinition &V : Config->VersionDefinitions) { for (SymbolVersion Sym : V.Globals) { if (hasWildcard(Sym.Name)) continue; SymbolBody *B = Sym.IsExternCpp ? Demangled[Sym.Name] : find(Sym.Name); setVersionId(B, V.Name, Sym.Name, V.Id); } } // Handle wildcards. for (size_t I = Config->VersionDefinitions.size() - 1; I != (size_t)-1; --I) { VersionDefinition &V = Config->VersionDefinitions[I]; for (SymbolVersion &Sym : V.Globals) if (hasWildcard(Sym.Name)) for (SymbolBody *B : findAll(Sym.Name)) if (B->symbol()->VersionId == Config->DefaultSymbolVersion) B->symbol()->VersionId = V.Id; } } // Returns the size of the longest version name. static int getMaxVersionLen() { size_t Len = 0; for (VersionDefinition &V : Config->VersionDefinitions) Len = std::max(Len, V.Name.size()); return Len; } // Parses a symbol name in the form of @ or @@. static std::pair getSymbolVersion(SymbolBody *B, int MaxVersionLen) { StringRef S = B->getName(); // MaxVersionLen was passed so that we don't need to scan // all characters in a symbol name. It is effective because // versions are usually short and symbol names can be very long. size_t Pos = S.find('@', std::max(0, int(S.size()) - MaxVersionLen - 2)); if (Pos == 0 || Pos == StringRef::npos) return {"", 0}; StringRef Name = S.substr(0, Pos); StringRef Verstr = S.substr(Pos + 1); if (Verstr.empty()) return {"", 0}; // '@@' in a symbol name means the default version. // It is usually the most recent one. bool IsDefault = (Verstr[0] == '@'); if (IsDefault) Verstr = Verstr.substr(1); for (VersionDefinition &V : Config->VersionDefinitions) { if (V.Name == Verstr) return {Name, IsDefault ? V.Id : (V.Id | VERSYM_HIDDEN)}; } // It is an error if the specified version was not defined. error("symbol " + S + " has undefined version " + Verstr); return {"", 0}; } // Versions are usually assigned to symbols using version scripts, // but there's another way to assign versions to symbols. // If a symbol name contains '@', the string after it is not // actually a part of the symbol name but specifies a version. // This function takes care of it. template void SymbolTable::scanSymbolVersions() { if (Config->VersionDefinitions.empty()) return; int MaxVersionLen = getMaxVersionLen(); // Unfortunately there's no way other than iterating over all // symbols to look for '@' characters in symbol names. // So this is inherently slow. A good news is that we do this // only when versions have been defined. for (Symbol *Sym : SymVector) { // Symbol versions for exported symbols are by nature // only for defined global symbols. SymbolBody *B = Sym->body(); if (!B->isDefined()) continue; uint8_t Visibility = B->getVisibility(); if (Visibility != STV_DEFAULT && Visibility != STV_PROTECTED) continue; // Look for '@' in the symbol name. StringRef Name; uint16_t Version; std::tie(Name, Version) = getSymbolVersion(B, MaxVersionLen); if (Name.empty()) continue; B->setName(Name); Sym->VersionId = Version; } } template class elf::SymbolTable; template class elf::SymbolTable; template class elf::SymbolTable; template class elf::SymbolTable;