1 //===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
14 #include "ARMCallLowering.h"
15 #include "ARMInstructionSelector.h"
16 #include "ARMLegalizerInfo.h"
17 #include "ARMRegisterBankInfo.h"
18 #include "ARMSubtarget.h"
19 #include "ARMTargetMachine.h"
20 #include "ARMTargetObjectFile.h"
21 #include "ARMTargetTransformInfo.h"
22 #include "MCTargetDesc/ARMMCTargetDesc.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/Triple.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/CodeGen/ExecutionDepsFix.h"
29 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
30 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
31 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
32 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
33 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
34 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
35 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
36 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
37 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
38 #include "llvm/CodeGen/MachineFunction.h"
39 #include "llvm/CodeGen/Passes.h"
40 #include "llvm/CodeGen/TargetPassConfig.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/CodeGen.h"
46 #include "llvm/Support/CommandLine.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include "llvm/Support/TargetParser.h"
49 #include "llvm/Support/TargetRegistry.h"
50 #include "llvm/Target/TargetLoweringObjectFile.h"
51 #include "llvm/Target/TargetOptions.h"
52 #include "llvm/Transforms/Scalar.h"
60 DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
61 cl::desc("Inhibit optimization of S->D register accesses on A15"),
65 EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden,
66 cl::desc("Run SimplifyCFG after expanding atomic operations"
67 " to make use of cmpxchg flow-based information"),
71 EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden,
72 cl::desc("Enable ARM load/store optimization pass"),
75 // FIXME: Unify control over GlobalMerge.
76 static cl::opt<cl::boolOrDefault>
77 EnableGlobalMerge("arm-global-merge", cl::Hidden,
78 cl::desc("Enable the global merge pass"));
81 void initializeARMExecutionDepsFixPass(PassRegistry&);
84 extern "C" void LLVMInitializeARMTarget() {
85 // Register the target.
86 RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget());
87 RegisterTargetMachine<ARMBETargetMachine> Y(getTheARMBETarget());
88 RegisterTargetMachine<ThumbLETargetMachine> A(getTheThumbLETarget());
89 RegisterTargetMachine<ThumbBETargetMachine> B(getTheThumbBETarget());
91 PassRegistry &Registry = *PassRegistry::getPassRegistry();
92 initializeGlobalISel(Registry);
93 initializeARMLoadStoreOptPass(Registry);
94 initializeARMPreAllocLoadStoreOptPass(Registry);
95 initializeARMConstantIslandsPass(Registry);
96 initializeARMExecutionDepsFixPass(Registry);
99 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
100 if (TT.isOSBinFormatMachO())
101 return llvm::make_unique<TargetLoweringObjectFileMachO>();
102 if (TT.isOSWindows())
103 return llvm::make_unique<TargetLoweringObjectFileCOFF>();
104 return llvm::make_unique<ARMElfTargetObjectFile>();
107 static ARMBaseTargetMachine::ARMABI
108 computeTargetABI(const Triple &TT, StringRef CPU,
109 const TargetOptions &Options) {
110 if (Options.MCOptions.getABIName() == "aapcs16")
111 return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
112 else if (Options.MCOptions.getABIName().startswith("aapcs"))
113 return ARMBaseTargetMachine::ARM_ABI_AAPCS;
114 else if (Options.MCOptions.getABIName().startswith("apcs"))
115 return ARMBaseTargetMachine::ARM_ABI_APCS;
117 assert(Options.MCOptions.getABIName().empty() &&
118 "Unknown target-abi option!");
120 ARMBaseTargetMachine::ARMABI TargetABI =
121 ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
123 unsigned ArchKind = ARM::parseCPUArch(CPU);
124 StringRef ArchName = ARM::getArchName(ArchKind);
125 // FIXME: This is duplicated code from the front end and should be unified.
126 if (TT.isOSBinFormatMachO()) {
127 if (TT.getEnvironment() == Triple::EABI ||
128 (TT.getOS() == Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
129 ARM::parseArchProfile(ArchName) == ARM::PK_M) {
130 TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
131 } else if (TT.isWatchABI()) {
132 TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
134 TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
136 } else if (TT.isOSWindows()) {
137 // FIXME: this is invalid for WindowsCE
138 TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
140 // Select the default based on the platform.
141 switch (TT.getEnvironment()) {
142 case Triple::Android:
143 case Triple::GNUEABI:
144 case Triple::GNUEABIHF:
145 case Triple::MuslEABI:
146 case Triple::MuslEABIHF:
149 TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
152 TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
156 TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
158 TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
166 static std::string computeDataLayout(const Triple &TT, StringRef CPU,
167 const TargetOptions &Options,
169 auto ABI = computeTargetABI(TT, CPU, Options);
179 Ret += DataLayout::getManglingComponent(TT);
181 // Pointers are 32 bits and aligned to 32 bits.
184 // ABIs other than APCS have 64 bit integers with natural alignment.
185 if (ABI != ARMBaseTargetMachine::ARM_ABI_APCS)
188 // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
189 // bits, others to 64 bits. We always try to align to 64 bits.
190 if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
193 // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
194 // to 64. We always ty to give them natural alignment.
195 if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
196 Ret += "-v64:32:64-v128:32:128";
197 else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16)
198 Ret += "-v128:64:128";
200 // Try to align aggregates to 32 bits (the default is 64 bits, which has no
201 // particular hardware support on 32-bit ARM).
204 // Integer registers are 32 bits.
207 // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
208 // aligned everywhere else.
209 if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
211 else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
219 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
220 Optional<Reloc::Model> RM) {
222 // Default relocation model on Darwin is PIC.
223 return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static;
225 if (*RM == Reloc::ROPI || *RM == Reloc::RWPI || *RM == Reloc::ROPI_RWPI)
226 assert(TT.isOSBinFormatELF() &&
227 "ROPI/RWPI currently only supported for ELF");
229 // DynamicNoPIC is only used on darwin.
230 if (*RM == Reloc::DynamicNoPIC && !TT.isOSDarwin())
231 return Reloc::Static;
236 /// Create an ARM architecture model.
238 ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
239 StringRef CPU, StringRef FS,
240 const TargetOptions &Options,
241 Optional<Reloc::Model> RM,
243 CodeGenOpt::Level OL, bool isLittle)
244 : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
245 CPU, FS, Options, getEffectiveRelocModel(TT, RM), CM,
247 TargetABI(computeTargetABI(TT, CPU, Options)),
248 TLOF(createTLOF(getTargetTriple())),
249 Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
251 // Default to triple-appropriate float ABI
252 if (Options.FloatABIType == FloatABI::Default)
253 this->Options.FloatABIType =
254 Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft;
256 // Default to triple-appropriate EABI
257 if (Options.EABIVersion == EABI::Default ||
258 Options.EABIVersion == EABI::Unknown) {
259 // musl is compatible with glibc with regard to EABI version
260 if (Subtarget.isTargetGNUAEABI() || Subtarget.isTargetMuslAEABI())
261 this->Options.EABIVersion = EABI::GNU;
263 this->Options.EABIVersion = EABI::EABI5;
267 ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
269 #ifdef LLVM_BUILD_GLOBAL_ISEL
272 struct ARMGISelActualAccessor : public GISelAccessor {
273 std::unique_ptr<CallLowering> CallLoweringInfo;
274 std::unique_ptr<InstructionSelector> InstSelector;
275 std::unique_ptr<LegalizerInfo> Legalizer;
276 std::unique_ptr<RegisterBankInfo> RegBankInfo;
278 const CallLowering *getCallLowering() const override {
279 return CallLoweringInfo.get();
282 const InstructionSelector *getInstructionSelector() const override {
283 return InstSelector.get();
286 const LegalizerInfo *getLegalizerInfo() const override {
287 return Legalizer.get();
290 const RegisterBankInfo *getRegBankInfo() const override {
291 return RegBankInfo.get();
295 } // end anonymous namespace
299 ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
300 Attribute CPUAttr = F.getFnAttribute("target-cpu");
301 Attribute FSAttr = F.getFnAttribute("target-features");
303 std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
304 ? CPUAttr.getValueAsString().str()
306 std::string FS = !FSAttr.hasAttribute(Attribute::None)
307 ? FSAttr.getValueAsString().str()
310 // FIXME: This is related to the code below to reset the target options,
311 // we need to know whether or not the soft float flag is set on the
312 // function before we can generate a subtarget. We also need to use
313 // it as a key for the subtarget since that can be the only difference
314 // between two functions.
316 F.getFnAttribute("use-soft-float").getValueAsString() == "true";
317 // If the soft float attribute is set on the function turn on the soft float
318 // subtarget feature.
320 FS += FS.empty() ? "+soft-float" : ",+soft-float";
322 auto &I = SubtargetMap[CPU + FS];
324 // This needs to be done before we create a new subtarget since any
325 // creation will depend on the TM and the code generation flags on the
326 // function that reside in TargetOptions.
327 resetTargetOptions(F);
328 I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
330 #ifndef LLVM_BUILD_GLOBAL_ISEL
331 GISelAccessor *GISel = new GISelAccessor();
333 ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor();
334 GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering()));
335 GISel->Legalizer.reset(new ARMLegalizerInfo(*I));
337 auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo());
339 // FIXME: At this point, we can't rely on Subtarget having RBI.
340 // It's awkward to mix passing RBI and the Subtarget; should we pass
342 GISel->InstSelector.reset(new ARMInstructionSelector(*I, *RBI));
344 GISel->RegBankInfo.reset(RBI);
346 I->setGISelAccessor(*GISel);
351 TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() {
352 return TargetIRAnalysis([this](const Function &F) {
353 return TargetTransformInfo(ARMTTIImpl(this, F));
357 void ARMTargetMachine::anchor() {}
359 ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
360 StringRef CPU, StringRef FS,
361 const TargetOptions &Options,
362 Optional<Reloc::Model> RM,
363 CodeModel::Model CM, CodeGenOpt::Level OL,
365 : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
367 if (!Subtarget.hasARMOps())
368 report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
369 "support ARM mode execution!");
372 void ARMLETargetMachine::anchor() {}
374 ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
375 StringRef CPU, StringRef FS,
376 const TargetOptions &Options,
377 Optional<Reloc::Model> RM,
379 CodeGenOpt::Level OL)
380 : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
382 void ARMBETargetMachine::anchor() {}
384 ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
385 StringRef CPU, StringRef FS,
386 const TargetOptions &Options,
387 Optional<Reloc::Model> RM,
389 CodeGenOpt::Level OL)
390 : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
392 void ThumbTargetMachine::anchor() {}
394 ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
395 StringRef CPU, StringRef FS,
396 const TargetOptions &Options,
397 Optional<Reloc::Model> RM,
399 CodeGenOpt::Level OL, bool isLittle)
400 : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
404 void ThumbLETargetMachine::anchor() {}
406 ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
407 StringRef CPU, StringRef FS,
408 const TargetOptions &Options,
409 Optional<Reloc::Model> RM,
411 CodeGenOpt::Level OL)
412 : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
414 void ThumbBETargetMachine::anchor() {}
416 ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT,
417 StringRef CPU, StringRef FS,
418 const TargetOptions &Options,
419 Optional<Reloc::Model> RM,
421 CodeGenOpt::Level OL)
422 : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
426 /// ARM Code Generator Pass Configuration Options.
427 class ARMPassConfig : public TargetPassConfig {
429 ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM)
430 : TargetPassConfig(TM, PM) {}
432 ARMBaseTargetMachine &getARMTargetMachine() const {
433 return getTM<ARMBaseTargetMachine>();
436 void addIRPasses() override;
437 bool addPreISel() override;
438 bool addInstSelector() override;
439 #ifdef LLVM_BUILD_GLOBAL_ISEL
440 bool addIRTranslator() override;
441 bool addLegalizeMachineIR() override;
442 bool addRegBankSelect() override;
443 bool addGlobalInstructionSelect() override;
445 void addPreRegAlloc() override;
446 void addPreSched2() override;
447 void addPreEmitPass() override;
450 class ARMExecutionDepsFix : public ExecutionDepsFix {
453 ARMExecutionDepsFix() : ExecutionDepsFix(ID, ARM::DPRRegClass) {}
454 StringRef getPassName() const override {
455 return "ARM Execution Dependency Fix";
458 char ARMExecutionDepsFix::ID;
460 } // end anonymous namespace
462 INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix",
463 "ARM Execution Dependency Fix", false, false)
465 TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
466 return new ARMPassConfig(this, PM);
469 void ARMPassConfig::addIRPasses() {
470 if (TM->Options.ThreadModel == ThreadModel::Single)
471 addPass(createLowerAtomicPass());
473 addPass(createAtomicExpandPass(TM));
475 // Cmpxchg instructions are often used with a subsequent comparison to
476 // determine whether it succeeded. We can exploit existing control-flow in
477 // ldrex/strex loops to simplify this, but it needs tidying up.
478 if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
479 addPass(createCFGSimplificationPass(-1, [this](const Function &F) {
480 const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
481 return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
484 TargetPassConfig::addIRPasses();
486 // Match interleaved memory accesses to ldN/stN intrinsics.
487 if (TM->getOptLevel() != CodeGenOpt::None)
488 addPass(createInterleavedAccessPass(TM));
491 bool ARMPassConfig::addPreISel() {
492 if ((TM->getOptLevel() != CodeGenOpt::None &&
493 EnableGlobalMerge == cl::BOU_UNSET) ||
494 EnableGlobalMerge == cl::BOU_TRUE) {
495 // FIXME: This is using the thumb1 only constant value for
496 // maximal global offset for merging globals. We may want
497 // to look into using the old value for non-thumb1 code of
498 // 4095 based on the TargetMachine, but this starts to become
499 // tricky when doing code gen per function.
500 bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
501 (EnableGlobalMerge == cl::BOU_UNSET);
502 // Merging of extern globals is enabled by default on non-Mach-O as we
503 // expect it to be generally either beneficial or harmless. On Mach-O it
504 // is disabled as we emit the .subsections_via_symbols directive which
505 // means that merging extern globals is not safe.
506 bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
507 addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize,
508 MergeExternalByDefault));
514 bool ARMPassConfig::addInstSelector() {
515 addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
519 #ifdef LLVM_BUILD_GLOBAL_ISEL
520 bool ARMPassConfig::addIRTranslator() {
521 addPass(new IRTranslator());
525 bool ARMPassConfig::addLegalizeMachineIR() {
526 addPass(new Legalizer());
530 bool ARMPassConfig::addRegBankSelect() {
531 addPass(new RegBankSelect());
535 bool ARMPassConfig::addGlobalInstructionSelect() {
536 addPass(new InstructionSelect());
541 void ARMPassConfig::addPreRegAlloc() {
542 if (getOptLevel() != CodeGenOpt::None) {
543 addPass(createMLxExpansionPass());
545 if (EnableARMLoadStoreOpt)
546 addPass(createARMLoadStoreOptimizationPass(/* pre-register alloc */ true));
548 if (!DisableA15SDOptimization)
549 addPass(createA15SDOptimizerPass());
553 void ARMPassConfig::addPreSched2() {
554 if (getOptLevel() != CodeGenOpt::None) {
555 if (EnableARMLoadStoreOpt)
556 addPass(createARMLoadStoreOptimizationPass());
558 addPass(new ARMExecutionDepsFix());
561 // Expand some pseudo instructions into multiple instructions to allow
562 // proper scheduling.
563 addPass(createARMExpandPseudoPass());
565 if (getOptLevel() != CodeGenOpt::None) {
566 // in v8, IfConversion depends on Thumb instruction widths
567 addPass(createThumb2SizeReductionPass([this](const Function &F) {
568 return this->TM->getSubtarget<ARMSubtarget>(F).restrictIT();
571 addPass(createIfConverter([](const MachineFunction &MF) {
572 return !MF.getSubtarget<ARMSubtarget>().isThumb1Only();
575 addPass(createThumb2ITBlockPass());
578 void ARMPassConfig::addPreEmitPass() {
579 addPass(createThumb2SizeReductionPass());
581 // Constant island pass work on unbundled instructions.
582 addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
583 return MF.getSubtarget<ARMSubtarget>().isThumb2();
586 // Don't optimize barriers at -O0.
587 if (getOptLevel() != CodeGenOpt::None)
588 addPass(createARMOptimizeBarriersPass());
590 addPass(createARMConstantIslandPass());