1 //===- InputFiles.cpp -----------------------------------------------------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "InputFiles.h"
14 #include "SymbolTable.h"
16 #include "lld/Common/ErrorHandler.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm-c/lto.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/BinaryFormat/COFF.h"
23 #include "llvm/Object/Binary.h"
24 #include "llvm/Object/COFF.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/ErrorOr.h"
29 #include "llvm/Support/FileSystem.h"
30 #include "llvm/Target/TargetOptions.h"
32 #include <system_error>
36 using namespace llvm::COFF;
37 using namespace llvm::object;
38 using namespace llvm::support::endian;
41 using llvm::support::ulittle32_t;
46 std::vector<ObjFile *> ObjFile::Instances;
47 std::vector<ImportFile *> ImportFile::Instances;
48 std::vector<BitcodeFile *> BitcodeFile::Instances;
50 /// Checks that Source is compatible with being a weak alias to Target.
51 /// If Source is Undefined and has no weak alias set, makes it a weak
53 static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F,
54 Symbol *Source, Symbol *Target) {
55 if (auto *U = dyn_cast<Undefined>(Source)) {
56 if (U->WeakAlias && U->WeakAlias != Target)
57 Symtab->reportDuplicate(Source, F);
58 U->WeakAlias = Target;
62 ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
64 void ArchiveFile::parse() {
65 // Parse a MemoryBufferRef as an archive file.
66 File = CHECK(Archive::create(MB), this);
68 // Read the symbol table to construct Lazy objects.
69 for (const Archive::Symbol &Sym : File->symbols())
70 Symtab->addLazy(this, Sym);
73 // Returns a buffer pointing to a member file containing a given symbol.
74 void ArchiveFile::addMember(const Archive::Symbol *Sym) {
75 const Archive::Child &C =
76 CHECK(Sym->getMember(),
77 "could not get the member for symbol " + Sym->getName());
79 // Return an empty buffer if we have already returned the same buffer.
80 if (!Seen.insert(C.getChildOffset()).second)
83 Driver->enqueueArchiveMember(C, Sym->getName(), getName());
86 std::vector<MemoryBufferRef> getArchiveMembers(Archive *File) {
87 std::vector<MemoryBufferRef> V;
88 Error Err = Error::success();
89 for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) {
92 File->getFileName() + ": could not get the child of the archive");
93 MemoryBufferRef MBRef =
94 CHECK(C.getMemoryBufferRef(),
96 ": could not get the buffer for a child of the archive");
100 fatal(File->getFileName() +
101 ": Archive::children failed: " + toString(std::move(Err)));
105 void ObjFile::parse() {
106 // Parse a memory buffer as a COFF file.
107 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), this);
109 if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) {
113 fatal(toString(this) + " is not a COFF file");
116 // Read section and symbol tables.
121 // We set SectionChunk pointers in the SparseChunks vector to this value
122 // temporarily to mark comdat sections as having an unknown resolution. As we
123 // walk the object file's symbol table, once we visit either a leader symbol or
124 // an associative section definition together with the parent comdat's leader,
125 // we set the pointer to either nullptr (to mark the section as discarded) or a
126 // valid SectionChunk for that section.
127 static SectionChunk *const PendingComdat = reinterpret_cast<SectionChunk *>(1);
129 void ObjFile::initializeChunks() {
130 uint32_t NumSections = COFFObj->getNumberOfSections();
131 Chunks.reserve(NumSections);
132 SparseChunks.resize(NumSections + 1);
133 for (uint32_t I = 1; I < NumSections + 1; ++I) {
134 const coff_section *Sec;
135 if (auto EC = COFFObj->getSection(I, Sec))
136 fatal("getSection failed: #" + Twine(I) + ": " + EC.message());
138 if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
139 SparseChunks[I] = PendingComdat;
141 SparseChunks[I] = readSection(I, nullptr);
145 SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
146 const coff_aux_section_definition *Def) {
147 const coff_section *Sec;
149 if (auto EC = COFFObj->getSection(SectionNumber, Sec))
150 fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message());
151 if (auto EC = COFFObj->getSectionName(Sec, Name))
152 fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " +
154 if (Name == ".sxdata") {
155 ArrayRef<uint8_t> Data;
156 COFFObj->getSectionContents(Sec, Data);
157 if (Data.size() % 4 != 0)
158 fatal(".sxdata must be an array of symbol table indices");
159 SXData = {reinterpret_cast<const ulittle32_t *>(Data.data()),
163 if (Name == ".drectve") {
164 ArrayRef<uint8_t> Data;
165 COFFObj->getSectionContents(Sec, Data);
166 Directives = std::string((const char *)Data.data(), Data.size());
170 // Object files may have DWARF debug info or MS CodeView debug info
173 // DWARF sections don't need any special handling from the perspective
174 // of the linker; they are just a data section containing relocations.
175 // We can just link them to complete debug info.
177 // CodeView needs a linker support. We need to interpret and debug
178 // info, and then write it to a separate .pdb file.
180 // Ignore debug info unless /debug is given.
181 if (!Config->Debug && Name.startswith(".debug"))
184 if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
186 auto *C = make<SectionChunk>(this, Sec);
188 C->Checksum = Def->CheckSum;
190 // CodeView sections are stored to a different vector because they are not
191 // linked in the regular manner.
193 DebugChunks.push_back(C);
200 void ObjFile::readAssociativeDefinition(
201 COFFSymbolRef Sym, const coff_aux_section_definition *Def) {
202 SectionChunk *Parent = SparseChunks[Def->getNumber(Sym.isBigObj())];
204 // If the parent is pending, it probably means that its section definition
205 // appears after us in the symbol table. Leave the associated section as
206 // pending; we will handle it during the second pass in initializeSymbols().
207 if (Parent == PendingComdat)
210 // Check whether the parent is prevailing. If it is, so are we, and we read
211 // the section; otherwise mark it as discarded.
212 int32_t SectionNumber = Sym.getSectionNumber();
214 SparseChunks[SectionNumber] = readSection(SectionNumber, Def);
215 if (SparseChunks[SectionNumber])
216 Parent->addAssociative(SparseChunks[SectionNumber]);
218 SparseChunks[SectionNumber] = nullptr;
222 Symbol *ObjFile::createRegular(COFFSymbolRef Sym) {
223 SectionChunk *SC = SparseChunks[Sym.getSectionNumber()];
224 if (Sym.isExternal()) {
226 COFFObj->getSymbolName(Sym, Name);
228 return Symtab->addRegular(this, Name, Sym.getGeneric(), SC);
229 return Symtab->addUndefined(Name, this, false);
232 return make<DefinedRegular>(this, /*Name*/ "", false,
233 /*IsExternal*/ false, Sym.getGeneric(), SC);
237 void ObjFile::initializeSymbols() {
238 uint32_t NumSymbols = COFFObj->getNumberOfSymbols();
239 Symbols.resize(NumSymbols);
241 SmallVector<std::pair<Symbol *, uint32_t>, 8> WeakAliases;
242 std::vector<uint32_t> PendingIndexes;
243 PendingIndexes.reserve(NumSymbols);
245 std::vector<const coff_aux_section_definition *> ComdatDefs(
246 COFFObj->getNumberOfSections() + 1);
248 for (uint32_t I = 0; I < NumSymbols; ++I) {
249 COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I));
250 if (COFFSym.isUndefined()) {
251 Symbols[I] = createUndefined(COFFSym);
252 } else if (COFFSym.isWeakExternal()) {
253 Symbols[I] = createUndefined(COFFSym);
254 uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex;
255 WeakAliases.emplace_back(Symbols[I], TagIndex);
256 } else if (Optional<Symbol *> OptSym = createDefined(COFFSym, ComdatDefs)) {
257 Symbols[I] = *OptSym;
259 // createDefined() returns None if a symbol belongs to a section that
260 // was pending at the point when the symbol was read. This can happen in
262 // 1) section definition symbol for a comdat leader;
263 // 2) symbol belongs to a comdat section associated with a section whose
264 // section definition symbol appears later in the symbol table.
265 // In both of these cases, we can expect the section to be resolved by
266 // the time we finish visiting the remaining symbols in the symbol
267 // table. So we postpone the handling of this symbol until that time.
268 PendingIndexes.push_back(I);
270 I += COFFSym.getNumberOfAuxSymbols();
273 for (uint32_t I : PendingIndexes) {
274 COFFSymbolRef Sym = check(COFFObj->getSymbol(I));
275 if (auto *Def = Sym.getSectionDefinition())
276 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
277 readAssociativeDefinition(Sym, Def);
278 Symbols[I] = createRegular(Sym);
281 for (auto &KV : WeakAliases) {
282 Symbol *Sym = KV.first;
283 uint32_t Idx = KV.second;
284 checkAndSetWeakAlias(Symtab, this, Sym, Symbols[Idx]);
288 Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) {
290 COFFObj->getSymbolName(Sym, Name);
291 return Symtab->addUndefined(Name, this, Sym.isWeakExternal());
294 Optional<Symbol *> ObjFile::createDefined(
296 std::vector<const coff_aux_section_definition *> &ComdatDefs) {
298 if (Sym.isCommon()) {
299 auto *C = make<CommonChunk>(Sym);
301 COFFObj->getSymbolName(Sym, Name);
303 Symtab->addCommon(this, Name, Sym.getValue(), Sym.getGeneric(), C);
306 if (Sym.isAbsolute()) {
307 COFFObj->getSymbolName(Sym, Name);
308 // Skip special symbols.
309 if (Name == "@comp.id")
311 // COFF spec 5.10.1. The .sxdata section.
312 if (Name == "@feat.00") {
313 if (Sym.getValue() & 1)
317 if (Sym.isExternal())
318 return Symtab->addAbsolute(Name, Sym);
320 return make<DefinedAbsolute>(Name, Sym);
322 int32_t SectionNumber = Sym.getSectionNumber();
323 if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
326 // Reserved sections numbers don't have contents.
327 if (llvm::COFF::isReservedSectionNumber(SectionNumber))
328 fatal("broken object file: " + toString(this));
330 // This symbol references a section which is not present in the section
332 if ((uint32_t)SectionNumber >= SparseChunks.size())
333 fatal("broken object file: " + toString(this));
335 // Handle comdat leader symbols.
336 if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) {
337 ComdatDefs[SectionNumber] = nullptr;
340 if (Sym.isExternal()) {
341 COFFObj->getSymbolName(Sym, Name);
342 std::tie(Leader, Prevailing) =
343 Symtab->addComdat(this, Name, Sym.getGeneric());
345 Leader = make<DefinedRegular>(this, /*Name*/ "", false,
346 /*IsExternal*/ false, Sym.getGeneric());
350 SectionChunk *C = readSection(SectionNumber, Def);
351 SparseChunks[SectionNumber] = C;
352 C->Sym = cast<DefinedRegular>(Leader);
353 cast<DefinedRegular>(Leader)->Data = &C->Repl;
355 SparseChunks[SectionNumber] = nullptr;
360 // Read associative section definitions and prepare to handle the comdat
361 // leader symbol by setting the section's ComdatDefs pointer if we encounter a
362 // non-associative comdat.
363 if (SparseChunks[SectionNumber] == PendingComdat) {
364 if (auto *Def = Sym.getSectionDefinition()) {
365 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
366 readAssociativeDefinition(Sym, Def);
368 ComdatDefs[SectionNumber] = Def;
372 if (SparseChunks[SectionNumber] == PendingComdat)
374 return createRegular(Sym);
377 MachineTypes ObjFile::getMachineType() {
379 return static_cast<MachineTypes>(COFFObj->getMachine());
380 return IMAGE_FILE_MACHINE_UNKNOWN;
383 StringRef ltrim1(StringRef S, const char *Chars) {
384 if (!S.empty() && strchr(Chars, S[0]))
389 void ImportFile::parse() {
390 const char *Buf = MB.getBufferStart();
391 const char *End = MB.getBufferEnd();
392 const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf);
394 // Check if the total size is valid.
395 if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData))
396 fatal("broken import library");
398 // Read names and create an __imp_ symbol.
399 StringRef Name = Saver.save(StringRef(Buf + sizeof(*Hdr)));
400 StringRef ImpName = Saver.save("__imp_" + Name);
401 const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1;
402 DLLName = StringRef(NameStart);
404 switch (Hdr->getNameType()) {
411 case IMPORT_NAME_NOPREFIX:
412 ExtName = ltrim1(Name, "?@_");
414 case IMPORT_NAME_UNDECORATE:
415 ExtName = ltrim1(Name, "?@_");
416 ExtName = ExtName.substr(0, ExtName.find('@'));
421 ExternalName = ExtName;
423 ImpSym = Symtab->addImportData(ImpName, this);
425 if (Hdr->getType() == llvm::COFF::IMPORT_CONST)
426 static_cast<void>(Symtab->addImportData(Name, this));
428 // If type is function, we need to create a thunk which jump to an
429 // address pointed by the __imp_ symbol. (This allows you to call
430 // DLL functions just like regular non-DLL functions.)
431 if (Hdr->getType() == llvm::COFF::IMPORT_CODE)
432 ThunkSym = Symtab->addImportThunk(Name, ImpSym, Hdr->Machine);
435 void BitcodeFile::parse() {
436 Obj = check(lto::InputFile::create(MemoryBufferRef(
437 MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier()))));
438 std::vector<std::pair<Symbol *, bool>> Comdat(Obj->getComdatTable().size());
439 for (size_t I = 0; I != Obj->getComdatTable().size(); ++I)
440 Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I]));
441 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) {
442 StringRef SymName = Saver.save(ObjSym.getName());
443 int ComdatIndex = ObjSym.getComdatIndex();
445 if (ObjSym.isUndefined()) {
446 Sym = Symtab->addUndefined(SymName, this, false);
447 } else if (ObjSym.isCommon()) {
448 Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize());
449 } else if (ObjSym.isWeak() && ObjSym.isIndirect()) {
451 Sym = Symtab->addUndefined(SymName, this, true);
452 std::string Fallback = ObjSym.getCOFFWeakExternalFallback();
453 Symbol *Alias = Symtab->addUndefined(Saver.save(Fallback));
454 checkAndSetWeakAlias(Symtab, this, Sym, Alias);
455 } else if (ComdatIndex != -1) {
456 if (SymName == Obj->getComdatTable()[ComdatIndex])
457 Sym = Comdat[ComdatIndex].first;
458 else if (Comdat[ComdatIndex].second)
459 Sym = Symtab->addRegular(this, SymName);
461 Sym = Symtab->addUndefined(SymName, this, false);
463 Sym = Symtab->addRegular(this, SymName);
465 SymbolBodies.push_back(Sym);
467 Directives = Obj->getCOFFLinkerOpts();
470 MachineTypes BitcodeFile::getMachineType() {
471 switch (Triple(Obj->getTargetTriple()).getArch()) {
478 case Triple::aarch64:
481 return IMAGE_FILE_MACHINE_UNKNOWN;
487 // Returns the last element of a path, which is supposed to be a filename.
488 static StringRef getBasename(StringRef Path) {
489 size_t Pos = Path.find_last_of("\\/");
490 if (Pos == StringRef::npos)
492 return Path.substr(Pos + 1);
495 // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
496 std::string lld::toString(const coff::InputFile *File) {
499 if (File->ParentName.empty())
500 return File->getName();
502 return (getBasename(File->ParentName) + "(" + getBasename(File->getName()) +