1 //===- InputFiles.cpp -----------------------------------------------------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "InputFiles.h"
14 #include "SymbolTable.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm-c/lto.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/BinaryFormat/COFF.h"
23 #include "llvm/Object/Binary.h"
24 #include "llvm/Object/COFF.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/ErrorOr.h"
29 #include "llvm/Support/FileSystem.h"
30 #include "llvm/Support/Path.h"
31 #include "llvm/Target/TargetOptions.h"
33 #include <system_error>
37 using namespace llvm::COFF;
38 using namespace llvm::object;
39 using namespace llvm::support::endian;
42 using llvm::support::ulittle32_t;
47 std::vector<ObjFile *> ObjFile::Instances;
48 std::vector<ImportFile *> ImportFile::Instances;
49 std::vector<BitcodeFile *> BitcodeFile::Instances;
51 /// Checks that Source is compatible with being a weak alias to Target.
52 /// If Source is Undefined and has no weak alias set, makes it a weak
54 static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F,
55 Symbol *Source, Symbol *Target) {
56 if (auto *U = dyn_cast<Undefined>(Source)) {
57 if (U->WeakAlias && U->WeakAlias != Target) {
58 // Weak aliases as produced by GCC are named in the form
59 // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
60 // of another symbol emitted near the weak symbol.
61 // Just use the definition from the first object file that defined
65 Symtab->reportDuplicate(Source, F);
67 U->WeakAlias = Target;
71 ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
73 void ArchiveFile::parse() {
74 // Parse a MemoryBufferRef as an archive file.
75 File = CHECK(Archive::create(MB), this);
77 // Read the symbol table to construct Lazy objects.
78 for (const Archive::Symbol &Sym : File->symbols())
79 Symtab->addLazy(this, Sym);
82 // Returns a buffer pointing to a member file containing a given symbol.
83 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
84 const Archive::Child &C =
85 CHECK(Sym->getMember(),
86 "could not get the member for symbol " + Sym->getName());
88 // Return an empty buffer if we have already returned the same buffer.
89 if (!Seen.insert(C.getChildOffset()).second)
92 Driver->enqueueArchiveMember(C, Sym->getName(), getName());
95 std::vector<MemoryBufferRef> getArchiveMembers(Archive *File) {
96 std::vector<MemoryBufferRef> V;
97 Error Err = Error::success();
98 for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) {
101 File->getFileName() + ": could not get the child of the archive");
102 MemoryBufferRef MBRef =
103 CHECK(C.getMemoryBufferRef(),
104 File->getFileName() +
105 ": could not get the buffer for a child of the archive");
109 fatal(File->getFileName() +
110 ": Archive::children failed: " + toString(std::move(Err)));
114 void ObjFile::parse() {
115 // Parse a memory buffer as a COFF file.
116 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), this);
118 if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) {
122 fatal(toString(this) + " is not a COFF file");
125 // Read section and symbol tables.
130 // We set SectionChunk pointers in the SparseChunks vector to this value
131 // temporarily to mark comdat sections as having an unknown resolution. As we
132 // walk the object file's symbol table, once we visit either a leader symbol or
133 // an associative section definition together with the parent comdat's leader,
134 // we set the pointer to either nullptr (to mark the section as discarded) or a
135 // valid SectionChunk for that section.
136 static SectionChunk *const PendingComdat = reinterpret_cast<SectionChunk *>(1);
138 void ObjFile::initializeChunks() {
139 uint32_t NumSections = COFFObj->getNumberOfSections();
140 Chunks.reserve(NumSections);
141 SparseChunks.resize(NumSections + 1);
142 for (uint32_t I = 1; I < NumSections + 1; ++I) {
143 const coff_section *Sec;
144 if (auto EC = COFFObj->getSection(I, Sec))
145 fatal("getSection failed: #" + Twine(I) + ": " + EC.message());
147 if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
148 SparseChunks[I] = PendingComdat;
150 SparseChunks[I] = readSection(I, nullptr, "");
154 SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
155 const coff_aux_section_definition *Def,
156 StringRef LeaderName) {
157 const coff_section *Sec;
158 if (auto EC = COFFObj->getSection(SectionNumber, Sec))
159 fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message());
162 if (auto EC = COFFObj->getSectionName(Sec, Name))
163 fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " +
166 if (Name == ".drectve") {
167 ArrayRef<uint8_t> Data;
168 COFFObj->getSectionContents(Sec, Data);
169 Directives = std::string((const char *)Data.data(), Data.size());
173 if (Name == ".llvm_addrsig") {
178 // Object files may have DWARF debug info or MS CodeView debug info
181 // DWARF sections don't need any special handling from the perspective
182 // of the linker; they are just a data section containing relocations.
183 // We can just link them to complete debug info.
185 // CodeView needs linker support. We need to interpret debug info,
186 // and then write it to a separate .pdb file.
188 // Ignore DWARF debug info unless /debug is given.
189 if (!Config->Debug && Name.startswith(".debug_"))
192 if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
194 auto *C = make<SectionChunk>(this, Sec);
196 C->Checksum = Def->CheckSum;
198 // CodeView sections are stored to a different vector because they are not
199 // linked in the regular manner.
201 DebugChunks.push_back(C);
202 else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gfids$y")
203 GuardFidChunks.push_back(C);
204 else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gljmp$y")
205 GuardLJmpChunks.push_back(C);
206 else if (Name == ".sxdata")
207 SXDataChunks.push_back(C);
208 else if (Config->TailMerge && Sec->NumberOfRelocations == 0 &&
209 Name == ".rdata" && LeaderName.startswith("??_C@"))
210 // COFF sections that look like string literal sections (i.e. no
211 // relocations, in .rdata, leader symbol name matches the MSVC name mangling
212 // for string literals) are subject to string tail merging.
213 MergeChunk::addSection(C);
220 void ObjFile::readAssociativeDefinition(
221 COFFSymbolRef Sym, const coff_aux_section_definition *Def) {
222 readAssociativeDefinition(Sym, Def, Def->getNumber(Sym.isBigObj()));
225 void ObjFile::readAssociativeDefinition(COFFSymbolRef Sym,
226 const coff_aux_section_definition *Def,
227 uint32_t ParentSection) {
228 SectionChunk *Parent = SparseChunks[ParentSection];
230 // If the parent is pending, it probably means that its section definition
231 // appears after us in the symbol table. Leave the associated section as
232 // pending; we will handle it during the second pass in initializeSymbols().
233 if (Parent == PendingComdat)
236 // Check whether the parent is prevailing. If it is, so are we, and we read
237 // the section; otherwise mark it as discarded.
238 int32_t SectionNumber = Sym.getSectionNumber();
240 SparseChunks[SectionNumber] = readSection(SectionNumber, Def, "");
241 if (SparseChunks[SectionNumber])
242 Parent->addAssociative(SparseChunks[SectionNumber]);
244 SparseChunks[SectionNumber] = nullptr;
248 void ObjFile::recordPrevailingSymbolForMingw(
249 COFFSymbolRef Sym, DenseMap<StringRef, uint32_t> &PrevailingSectionMap) {
250 // For comdat symbols in executable sections, where this is the copy
251 // of the section chunk we actually include instead of discarding it,
252 // add the symbol to a map to allow using it for implicitly
253 // associating .[px]data$<func> sections to it.
254 int32_t SectionNumber = Sym.getSectionNumber();
255 SectionChunk *SC = SparseChunks[SectionNumber];
256 if (SC && SC->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
258 COFFObj->getSymbolName(Sym, Name);
259 PrevailingSectionMap[Name] = SectionNumber;
263 void ObjFile::maybeAssociateSEHForMingw(
264 COFFSymbolRef Sym, const coff_aux_section_definition *Def,
265 const DenseMap<StringRef, uint32_t> &PrevailingSectionMap) {
267 COFFObj->getSymbolName(Sym, Name);
268 if (Name.consume_front(".pdata$") || Name.consume_front(".xdata$")) {
269 // For MinGW, treat .[px]data$<func> as implicitly associative to
270 // the symbol <func>.
271 auto ParentSym = PrevailingSectionMap.find(Name);
272 if (ParentSym != PrevailingSectionMap.end())
273 readAssociativeDefinition(Sym, Def, ParentSym->second);
277 Symbol *ObjFile::createRegular(COFFSymbolRef Sym) {
278 SectionChunk *SC = SparseChunks[Sym.getSectionNumber()];
279 if (Sym.isExternal()) {
281 COFFObj->getSymbolName(Sym, Name);
283 return Symtab->addRegular(this, Name, Sym.getGeneric(), SC);
284 // For MinGW symbols named .weak.* that point to a discarded section,
285 // don't create an Undefined symbol. If nothing ever refers to the symbol,
286 // everything should be fine. If something actually refers to the symbol
287 // (e.g. the undefined weak alias), linking will fail due to undefined
288 // references at the end.
289 if (Config->MinGW && Name.startswith(".weak."))
291 return Symtab->addUndefined(Name, this, false);
294 return make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
295 /*IsExternal*/ false, Sym.getGeneric(), SC);
299 void ObjFile::initializeSymbols() {
300 uint32_t NumSymbols = COFFObj->getNumberOfSymbols();
301 Symbols.resize(NumSymbols);
303 SmallVector<std::pair<Symbol *, uint32_t>, 8> WeakAliases;
304 std::vector<uint32_t> PendingIndexes;
305 PendingIndexes.reserve(NumSymbols);
307 DenseMap<StringRef, uint32_t> PrevailingSectionMap;
308 std::vector<const coff_aux_section_definition *> ComdatDefs(
309 COFFObj->getNumberOfSections() + 1);
311 for (uint32_t I = 0; I < NumSymbols; ++I) {
312 COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I));
313 bool PrevailingComdat;
314 if (COFFSym.isUndefined()) {
315 Symbols[I] = createUndefined(COFFSym);
316 } else if (COFFSym.isWeakExternal()) {
317 Symbols[I] = createUndefined(COFFSym);
318 uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex;
319 WeakAliases.emplace_back(Symbols[I], TagIndex);
320 } else if (Optional<Symbol *> OptSym =
321 createDefined(COFFSym, ComdatDefs, PrevailingComdat)) {
322 Symbols[I] = *OptSym;
323 if (Config->MinGW && PrevailingComdat)
324 recordPrevailingSymbolForMingw(COFFSym, PrevailingSectionMap);
326 // createDefined() returns None if a symbol belongs to a section that
327 // was pending at the point when the symbol was read. This can happen in
329 // 1) section definition symbol for a comdat leader;
330 // 2) symbol belongs to a comdat section associated with a section whose
331 // section definition symbol appears later in the symbol table.
332 // In both of these cases, we can expect the section to be resolved by
333 // the time we finish visiting the remaining symbols in the symbol
334 // table. So we postpone the handling of this symbol until that time.
335 PendingIndexes.push_back(I);
337 I += COFFSym.getNumberOfAuxSymbols();
340 for (uint32_t I : PendingIndexes) {
341 COFFSymbolRef Sym = check(COFFObj->getSymbol(I));
342 if (const coff_aux_section_definition *Def = Sym.getSectionDefinition()) {
343 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
344 readAssociativeDefinition(Sym, Def);
345 else if (Config->MinGW)
346 maybeAssociateSEHForMingw(Sym, Def, PrevailingSectionMap);
348 if (SparseChunks[Sym.getSectionNumber()] == PendingComdat) {
350 COFFObj->getSymbolName(Sym, Name);
351 log("comdat section " + Name +
352 " without leader and unassociated, discarding");
355 Symbols[I] = createRegular(Sym);
358 for (auto &KV : WeakAliases) {
359 Symbol *Sym = KV.first;
360 uint32_t Idx = KV.second;
361 checkAndSetWeakAlias(Symtab, this, Sym, Symbols[Idx]);
365 Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) {
367 COFFObj->getSymbolName(Sym, Name);
368 return Symtab->addUndefined(Name, this, Sym.isWeakExternal());
371 Optional<Symbol *> ObjFile::createDefined(
373 std::vector<const coff_aux_section_definition *> &ComdatDefs,
376 auto GetName = [&]() {
378 COFFObj->getSymbolName(Sym, S);
382 if (Sym.isCommon()) {
383 auto *C = make<CommonChunk>(Sym);
385 return Symtab->addCommon(this, GetName(), Sym.getValue(), Sym.getGeneric(),
389 if (Sym.isAbsolute()) {
390 StringRef Name = GetName();
392 // Skip special symbols.
393 if (Name == "@comp.id")
395 if (Name == "@feat.00") {
396 Feat00Flags = Sym.getValue();
400 if (Sym.isExternal())
401 return Symtab->addAbsolute(Name, Sym);
402 return make<DefinedAbsolute>(Name, Sym);
405 int32_t SectionNumber = Sym.getSectionNumber();
406 if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
409 if (llvm::COFF::isReservedSectionNumber(SectionNumber))
410 fatal(toString(this) + ": " + GetName() +
411 " should not refer to special section " + Twine(SectionNumber));
413 if ((uint32_t)SectionNumber >= SparseChunks.size())
414 fatal(toString(this) + ": " + GetName() +
415 " should not refer to non-existent section " + Twine(SectionNumber));
417 // Handle comdat leader symbols.
418 if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) {
419 ComdatDefs[SectionNumber] = nullptr;
421 if (Sym.isExternal()) {
422 std::tie(Leader, Prevailing) =
423 Symtab->addComdat(this, GetName(), Sym.getGeneric());
425 Leader = make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
426 /*IsExternal*/ false, Sym.getGeneric());
431 SectionChunk *C = readSection(SectionNumber, Def, GetName());
432 SparseChunks[SectionNumber] = C;
433 C->Sym = cast<DefinedRegular>(Leader);
434 cast<DefinedRegular>(Leader)->Data = &C->Repl;
436 SparseChunks[SectionNumber] = nullptr;
441 // Read associative section definitions and prepare to handle the comdat
442 // leader symbol by setting the section's ComdatDefs pointer if we encounter a
443 // non-associative comdat.
444 if (SparseChunks[SectionNumber] == PendingComdat) {
445 if (const coff_aux_section_definition *Def = Sym.getSectionDefinition()) {
446 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
447 readAssociativeDefinition(Sym, Def);
449 ComdatDefs[SectionNumber] = Def;
453 // readAssociativeDefinition() writes to SparseChunks, so need to check again.
454 if (SparseChunks[SectionNumber] == PendingComdat)
457 return createRegular(Sym);
460 MachineTypes ObjFile::getMachineType() {
462 return static_cast<MachineTypes>(COFFObj->getMachine());
463 return IMAGE_FILE_MACHINE_UNKNOWN;
466 StringRef ltrim1(StringRef S, const char *Chars) {
467 if (!S.empty() && strchr(Chars, S[0]))
472 void ImportFile::parse() {
473 const char *Buf = MB.getBufferStart();
474 const char *End = MB.getBufferEnd();
475 const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf);
477 // Check if the total size is valid.
478 if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData))
479 fatal("broken import library");
481 // Read names and create an __imp_ symbol.
482 StringRef Name = Saver.save(StringRef(Buf + sizeof(*Hdr)));
483 StringRef ImpName = Saver.save("__imp_" + Name);
484 const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1;
485 DLLName = StringRef(NameStart);
487 switch (Hdr->getNameType()) {
494 case IMPORT_NAME_NOPREFIX:
495 ExtName = ltrim1(Name, "?@_");
497 case IMPORT_NAME_UNDECORATE:
498 ExtName = ltrim1(Name, "?@_");
499 ExtName = ExtName.substr(0, ExtName.find('@'));
504 ExternalName = ExtName;
506 ImpSym = Symtab->addImportData(ImpName, this);
507 // If this was a duplicate, we logged an error but may continue;
508 // in this case, ImpSym is nullptr.
512 if (Hdr->getType() == llvm::COFF::IMPORT_CONST)
513 static_cast<void>(Symtab->addImportData(Name, this));
515 // If type is function, we need to create a thunk which jump to an
516 // address pointed by the __imp_ symbol. (This allows you to call
517 // DLL functions just like regular non-DLL functions.)
518 if (Hdr->getType() == llvm::COFF::IMPORT_CODE)
519 ThunkSym = Symtab->addImportThunk(
520 Name, cast_or_null<DefinedImportData>(ImpSym), Hdr->Machine);
523 void BitcodeFile::parse() {
524 Obj = check(lto::InputFile::create(MemoryBufferRef(
525 MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier()))));
526 std::vector<std::pair<Symbol *, bool>> Comdat(Obj->getComdatTable().size());
527 for (size_t I = 0; I != Obj->getComdatTable().size(); ++I)
528 Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I]));
529 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) {
530 StringRef SymName = Saver.save(ObjSym.getName());
531 int ComdatIndex = ObjSym.getComdatIndex();
533 if (ObjSym.isUndefined()) {
534 Sym = Symtab->addUndefined(SymName, this, false);
535 } else if (ObjSym.isCommon()) {
536 Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize());
537 } else if (ObjSym.isWeak() && ObjSym.isIndirect()) {
539 Sym = Symtab->addUndefined(SymName, this, true);
540 std::string Fallback = ObjSym.getCOFFWeakExternalFallback();
541 Symbol *Alias = Symtab->addUndefined(Saver.save(Fallback));
542 checkAndSetWeakAlias(Symtab, this, Sym, Alias);
543 } else if (ComdatIndex != -1) {
544 if (SymName == Obj->getComdatTable()[ComdatIndex])
545 Sym = Comdat[ComdatIndex].first;
546 else if (Comdat[ComdatIndex].second)
547 Sym = Symtab->addRegular(this, SymName);
549 Sym = Symtab->addUndefined(SymName, this, false);
551 Sym = Symtab->addRegular(this, SymName);
553 Symbols.push_back(Sym);
555 Directives = Obj->getCOFFLinkerOpts();
558 MachineTypes BitcodeFile::getMachineType() {
559 switch (Triple(Obj->getTargetTriple()).getArch()) {
566 case Triple::aarch64:
569 return IMAGE_FILE_MACHINE_UNKNOWN;
575 // Returns the last element of a path, which is supposed to be a filename.
576 static StringRef getBasename(StringRef Path) {
577 return sys::path::filename(Path, sys::path::Style::windows);
580 // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
581 std::string lld::toString(const coff::InputFile *File) {
584 if (File->ParentName.empty())
585 return File->getName();
587 return (getBasename(File->ParentName) + "(" + getBasename(File->getName()) +