1 //===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the Link Time Optimization library. This library is
11 // intended to be used by linker to optimize code at link time.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/LTO/legacy/LTOModule.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/Analysis/ObjectUtils.h"
18 #include "llvm/Bitcode/BitcodeReader.h"
19 #include "llvm/CodeGen/TargetLoweringObjectFile.h"
20 #include "llvm/CodeGen/TargetSubtargetInfo.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/LLVMContext.h"
23 #include "llvm/IR/Mangler.h"
24 #include "llvm/IR/Metadata.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCParser/MCAsmParser.h"
29 #include "llvm/MC/MCSection.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/SubtargetFeature.h"
33 #include "llvm/Object/IRObjectFile.h"
34 #include "llvm/Object/ObjectFile.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/Host.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/Path.h"
39 #include "llvm/Support/SourceMgr.h"
40 #include "llvm/Support/TargetRegistry.h"
41 #include "llvm/Support/TargetSelect.h"
42 #include "llvm/Transforms/Utils/GlobalStatus.h"
43 #include <system_error>
45 using namespace llvm::object;
47 LTOModule::LTOModule(std::unique_ptr<Module> M, MemoryBufferRef MBRef,
48 llvm::TargetMachine *TM)
49 : Mod(std::move(M)), MBRef(MBRef), _target(TM) {
50 SymTab.addModule(Mod.get());
53 LTOModule::~LTOModule() {}
55 /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
57 bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) {
58 Expected<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
59 MemoryBufferRef(StringRef((const char *)Mem, Length), "<mem>"));
61 consumeError(BCData.takeError());
67 bool LTOModule::isBitcodeFile(StringRef Path) {
68 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
69 MemoryBuffer::getFile(Path);
73 Expected<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
74 BufferOrErr.get()->getMemBufferRef());
76 consumeError(BCData.takeError());
82 bool LTOModule::isThinLTO() {
83 Expected<BitcodeLTOInfo> Result = getBitcodeLTOInfo(MBRef);
85 logAllUnhandledErrors(Result.takeError(), errs(), "");
88 return Result->IsThinLTO;
91 bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
92 StringRef TriplePrefix) {
93 Expected<MemoryBufferRef> BCOrErr =
94 IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef());
96 consumeError(BCOrErr.takeError());
100 ErrorOr<std::string> TripleOrErr =
101 expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(*BCOrErr));
104 return StringRef(*TripleOrErr).startswith(TriplePrefix);
107 std::string LTOModule::getProducerString(MemoryBuffer *Buffer) {
108 Expected<MemoryBufferRef> BCOrErr =
109 IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef());
111 consumeError(BCOrErr.takeError());
115 ErrorOr<std::string> ProducerOrErr = expectedToErrorOrAndEmitErrors(
116 Context, getBitcodeProducerString(*BCOrErr));
119 return *ProducerOrErr;
122 ErrorOr<std::unique_ptr<LTOModule>>
123 LTOModule::createFromFile(LLVMContext &Context, StringRef path,
124 const TargetOptions &options) {
125 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
126 MemoryBuffer::getFile(path);
127 if (std::error_code EC = BufferOrErr.getError()) {
128 Context.emitError(EC.message());
131 std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
132 return makeLTOModule(Buffer->getMemBufferRef(), options, Context,
133 /* ShouldBeLazy*/ false);
136 ErrorOr<std::unique_ptr<LTOModule>>
137 LTOModule::createFromOpenFile(LLVMContext &Context, int fd, StringRef path,
138 size_t size, const TargetOptions &options) {
139 return createFromOpenFileSlice(Context, fd, path, size, 0, options);
142 ErrorOr<std::unique_ptr<LTOModule>>
143 LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, StringRef path,
144 size_t map_size, off_t offset,
145 const TargetOptions &options) {
146 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
147 MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
148 if (std::error_code EC = BufferOrErr.getError()) {
149 Context.emitError(EC.message());
152 std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
153 return makeLTOModule(Buffer->getMemBufferRef(), options, Context,
154 /* ShouldBeLazy */ false);
157 ErrorOr<std::unique_ptr<LTOModule>>
158 LTOModule::createFromBuffer(LLVMContext &Context, const void *mem,
159 size_t length, const TargetOptions &options,
161 StringRef Data((const char *)mem, length);
162 MemoryBufferRef Buffer(Data, path);
163 return makeLTOModule(Buffer, options, Context, /* ShouldBeLazy */ false);
166 ErrorOr<std::unique_ptr<LTOModule>>
167 LTOModule::createInLocalContext(std::unique_ptr<LLVMContext> Context,
168 const void *mem, size_t length,
169 const TargetOptions &options, StringRef path) {
170 StringRef Data((const char *)mem, length);
171 MemoryBufferRef Buffer(Data, path);
172 // If we own a context, we know this is being used only for symbol extraction,
173 // not linking. Be lazy in that case.
174 ErrorOr<std::unique_ptr<LTOModule>> Ret =
175 makeLTOModule(Buffer, options, *Context, /* ShouldBeLazy */ true);
177 (*Ret)->OwnedContext = std::move(Context);
181 static ErrorOr<std::unique_ptr<Module>>
182 parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context,
185 Expected<MemoryBufferRef> MBOrErr =
186 IRObjectFile::findBitcodeInMemBuffer(Buffer);
187 if (Error E = MBOrErr.takeError()) {
188 std::error_code EC = errorToErrorCode(std::move(E));
189 Context.emitError(EC.message());
194 // Parse the full file.
195 return expectedToErrorOrAndEmitErrors(Context,
196 parseBitcodeFile(*MBOrErr, Context));
200 return expectedToErrorOrAndEmitErrors(
202 getLazyBitcodeModule(*MBOrErr, Context, true /*ShouldLazyLoadMetadata*/));
205 ErrorOr<std::unique_ptr<LTOModule>>
206 LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options,
207 LLVMContext &Context, bool ShouldBeLazy) {
208 ErrorOr<std::unique_ptr<Module>> MOrErr =
209 parseBitcodeFileImpl(Buffer, Context, ShouldBeLazy);
210 if (std::error_code EC = MOrErr.getError())
212 std::unique_ptr<Module> &M = *MOrErr;
214 std::string TripleStr = M->getTargetTriple();
215 if (TripleStr.empty())
216 TripleStr = sys::getDefaultTargetTriple();
217 llvm::Triple Triple(TripleStr);
219 // find machine architecture for this module
221 const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
223 return std::unique_ptr<LTOModule>(nullptr);
225 // construct LTOModule, hand over ownership of module and target
226 SubtargetFeatures Features;
227 Features.getDefaultSubtargetFeatures(Triple);
228 std::string FeatureStr = Features.getString();
229 // Set a default CPU for Darwin triples.
231 if (Triple.isOSDarwin()) {
232 if (Triple.getArch() == llvm::Triple::x86_64)
234 else if (Triple.getArch() == llvm::Triple::x86)
236 else if (Triple.getArch() == llvm::Triple::aarch64)
240 TargetMachine *target =
241 march->createTargetMachine(TripleStr, CPU, FeatureStr, options, None);
243 std::unique_ptr<LTOModule> Ret(new LTOModule(std::move(M), Buffer, target));
245 Ret->parseMetadata();
247 return std::move(Ret);
250 /// Create a MemoryBuffer from a memory range with an optional name.
251 std::unique_ptr<MemoryBuffer>
252 LTOModule::makeBuffer(const void *mem, size_t length, StringRef name) {
253 const char *startPtr = (const char*)mem;
254 return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), name, false);
257 /// objcClassNameFromExpression - Get string that the data pointer points to.
259 LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
260 if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
261 Constant *op = ce->getOperand(0);
262 if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
263 Constant *cn = gvn->getInitializer();
264 if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
265 if (ca->isCString()) {
266 name = (".objc_class_name_" + ca->getAsCString()).str();
275 /// addObjCClass - Parse i386/ppc ObjC class data structure.
276 void LTOModule::addObjCClass(const GlobalVariable *clgv) {
277 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
280 // second slot in __OBJC,__class is pointer to superclass name
281 std::string superclassName;
282 if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
284 _undefines.insert(std::make_pair(superclassName, NameAndAttributes()));
285 if (IterBool.second) {
286 NameAndAttributes &info = IterBool.first->second;
287 info.name = IterBool.first->first();
288 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
289 info.isFunction = false;
294 // third slot in __OBJC,__class is pointer to class name
295 std::string className;
296 if (objcClassNameFromExpression(c->getOperand(2), className)) {
297 auto Iter = _defines.insert(className).first;
299 NameAndAttributes info;
300 info.name = Iter->first();
301 info.attributes = LTO_SYMBOL_PERMISSIONS_DATA |
302 LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT;
303 info.isFunction = false;
305 _symbols.push_back(info);
309 /// addObjCCategory - Parse i386/ppc ObjC category data structure.
310 void LTOModule::addObjCCategory(const GlobalVariable *clgv) {
311 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
314 // second slot in __OBJC,__category is pointer to target class name
315 std::string targetclassName;
316 if (!objcClassNameFromExpression(c->getOperand(1), targetclassName))
320 _undefines.insert(std::make_pair(targetclassName, NameAndAttributes()));
322 if (!IterBool.second)
325 NameAndAttributes &info = IterBool.first->second;
326 info.name = IterBool.first->first();
327 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
328 info.isFunction = false;
332 /// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
333 void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
334 std::string targetclassName;
335 if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName))
339 _undefines.insert(std::make_pair(targetclassName, NameAndAttributes()));
341 if (!IterBool.second)
344 NameAndAttributes &info = IterBool.first->second;
345 info.name = IterBool.first->first();
346 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
347 info.isFunction = false;
351 void LTOModule::addDefinedDataSymbol(ModuleSymbolTable::Symbol Sym) {
352 SmallString<64> Buffer;
354 raw_svector_ostream OS(Buffer);
355 SymTab.printSymbolName(OS, Sym);
359 const GlobalValue *V = Sym.get<GlobalValue *>();
360 addDefinedDataSymbol(Buffer, V);
363 void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) {
364 // Add to list of defined symbols.
365 addDefinedSymbol(Name, v, false);
367 if (!v->hasSection() /* || !isTargetDarwin */)
370 // Special case i386/ppc ObjC data structures in magic sections:
371 // The issue is that the old ObjC object format did some strange
372 // contortions to avoid real linker symbols. For instance, the
373 // ObjC class data structure is allocated statically in the executable
374 // that defines that class. That data structures contains a pointer to
375 // its superclass. But instead of just initializing that part of the
376 // struct to the address of its superclass, and letting the static and
377 // dynamic linkers do the rest, the runtime works by having that field
378 // instead point to a C-string that is the name of the superclass.
379 // At runtime the objc initialization updates that pointer and sets
380 // it to point to the actual super class. As far as the linker
381 // knows it is just a pointer to a string. But then someone wanted the
382 // linker to issue errors at build time if the superclass was not found.
383 // So they figured out a way in mach-o object format to use an absolute
384 // symbols (.objc_class_name_Foo = 0) and a floating reference
385 // (.reference .objc_class_name_Bar) to cause the linker into erroring when
386 // a class was missing.
387 // The following synthesizes the implicit .objc_* symbols for the linker
388 // from the ObjC data structures generated by the front end.
390 // special case if this data blob is an ObjC class definition
391 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(v)) {
392 StringRef Section = GV->getSection();
393 if (Section.startswith("__OBJC,__class,")) {
397 // special case if this data blob is an ObjC category definition
398 else if (Section.startswith("__OBJC,__category,")) {
402 // special case if this data blob is the list of referenced classes
403 else if (Section.startswith("__OBJC,__cls_refs,")) {
409 void LTOModule::addDefinedFunctionSymbol(ModuleSymbolTable::Symbol Sym) {
410 SmallString<64> Buffer;
412 raw_svector_ostream OS(Buffer);
413 SymTab.printSymbolName(OS, Sym);
417 const Function *F = cast<Function>(Sym.get<GlobalValue *>());
418 addDefinedFunctionSymbol(Buffer, F);
421 void LTOModule::addDefinedFunctionSymbol(StringRef Name, const Function *F) {
422 // add to list of defined symbols
423 addDefinedSymbol(Name, F, true);
426 void LTOModule::addDefinedSymbol(StringRef Name, const GlobalValue *def,
428 // set alignment part log2() can have rounding errors
429 uint32_t align = def->getAlignment();
430 uint32_t attr = align ? countTrailingZeros(align) : 0;
432 // set permissions part
434 attr |= LTO_SYMBOL_PERMISSIONS_CODE;
436 const GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
437 if (gv && gv->isConstant())
438 attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
440 attr |= LTO_SYMBOL_PERMISSIONS_DATA;
443 // set definition part
444 if (def->hasWeakLinkage() || def->hasLinkOnceLinkage())
445 attr |= LTO_SYMBOL_DEFINITION_WEAK;
446 else if (def->hasCommonLinkage())
447 attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
449 attr |= LTO_SYMBOL_DEFINITION_REGULAR;
452 if (def->hasLocalLinkage())
453 // Ignore visibility if linkage is local.
454 attr |= LTO_SYMBOL_SCOPE_INTERNAL;
455 else if (def->hasHiddenVisibility())
456 attr |= LTO_SYMBOL_SCOPE_HIDDEN;
457 else if (def->hasProtectedVisibility())
458 attr |= LTO_SYMBOL_SCOPE_PROTECTED;
459 else if (canBeOmittedFromSymbolTable(def))
460 attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
462 attr |= LTO_SYMBOL_SCOPE_DEFAULT;
464 if (def->hasComdat())
465 attr |= LTO_SYMBOL_COMDAT;
467 if (isa<GlobalAlias>(def))
468 attr |= LTO_SYMBOL_ALIAS;
470 auto Iter = _defines.insert(Name).first;
472 // fill information structure
473 NameAndAttributes info;
474 StringRef NameRef = Iter->first();
476 assert(NameRef.data()[NameRef.size()] == '\0');
477 info.attributes = attr;
478 info.isFunction = isFunction;
481 // add to table of symbols
482 _symbols.push_back(info);
485 /// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the
487 void LTOModule::addAsmGlobalSymbol(StringRef name,
488 lto_symbol_attributes scope) {
489 auto IterBool = _defines.insert(name);
491 // only add new define if not already defined
492 if (!IterBool.second)
495 NameAndAttributes &info = _undefines[IterBool.first->first()];
497 if (info.symbol == nullptr) {
498 // FIXME: This is trying to take care of module ASM like this:
500 // module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0"
502 // but is gross and its mother dresses it funny. Have the ASM parser give us
503 // more details for this type of situation so that we're not guessing so
506 // fill information structure
507 info.name = IterBool.first->first();
509 LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
510 info.isFunction = false;
511 info.symbol = nullptr;
513 // add to table of symbols
514 _symbols.push_back(info);
519 addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol));
521 addDefinedDataSymbol(info.name, info.symbol);
523 _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK;
524 _symbols.back().attributes |= scope;
527 /// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the
529 void LTOModule::addAsmGlobalSymbolUndef(StringRef name) {
530 auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes()));
532 _asm_undefines.push_back(IterBool.first->first());
534 // we already have the symbol
535 if (!IterBool.second)
538 uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;
539 attr |= LTO_SYMBOL_SCOPE_DEFAULT;
540 NameAndAttributes &info = IterBool.first->second;
541 info.name = IterBool.first->first();
542 info.attributes = attr;
543 info.isFunction = false;
544 info.symbol = nullptr;
547 /// Add a symbol which isn't defined just yet to a list to be resolved later.
548 void LTOModule::addPotentialUndefinedSymbol(ModuleSymbolTable::Symbol Sym,
550 SmallString<64> name;
552 raw_svector_ostream OS(name);
553 SymTab.printSymbolName(OS, Sym);
557 auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes()));
559 // we already have the symbol
560 if (!IterBool.second)
563 NameAndAttributes &info = IterBool.first->second;
565 info.name = IterBool.first->first();
567 const GlobalValue *decl = Sym.dyn_cast<GlobalValue *>();
569 if (decl->hasExternalWeakLinkage())
570 info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
572 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
574 info.isFunction = isFunc;
578 void LTOModule::parseSymbols() {
579 for (auto Sym : SymTab.symbols()) {
580 auto *GV = Sym.dyn_cast<GlobalValue *>();
581 uint32_t Flags = SymTab.getSymbolFlags(Sym);
582 if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
585 bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined;
588 SmallString<64> Buffer;
590 raw_svector_ostream OS(Buffer);
591 SymTab.printSymbolName(OS, Sym);
594 StringRef Name(Buffer);
597 addAsmGlobalSymbolUndef(Name);
598 else if (Flags & object::BasicSymbolRef::SF_Global)
599 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT);
601 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL);
605 auto *F = dyn_cast<Function>(GV);
607 addPotentialUndefinedSymbol(Sym, F != nullptr);
612 addDefinedFunctionSymbol(Sym);
616 if (isa<GlobalVariable>(GV)) {
617 addDefinedDataSymbol(Sym);
621 assert(isa<GlobalAlias>(GV));
622 addDefinedDataSymbol(Sym);
625 // make symbols for all undefines
626 for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(),
627 e = _undefines.end(); u != e; ++u) {
628 // If this symbol also has a definition, then don't make an undefine because
629 // it is a tentative definition.
630 if (_defines.count(u->getKey())) continue;
631 NameAndAttributes info = u->getValue();
632 _symbols.push_back(info);
636 /// parseMetadata - Parse metadata from the module
637 void LTOModule::parseMetadata() {
638 raw_string_ostream OS(LinkerOpts);
641 if (NamedMDNode *LinkerOptions =
642 getModule().getNamedMetadata("llvm.linker.options")) {
643 for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
644 MDNode *MDOptions = LinkerOptions->getOperand(i);
645 for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
646 MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
647 OS << " " << MDOption->getString();
652 // Globals - we only need to do this for COFF.
653 const Triple TT(_target->getTargetTriple());
654 if (!TT.isOSBinFormatCOFF())
657 for (const NameAndAttributes &Sym : _symbols) {
660 emitLinkerFlagsForGlobalCOFF(OS, Sym.symbol, TT, M);
663 // Add other interesting metadata here.