contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp

   1 //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Top-level implementation for the PowerPC target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "PPCTargetMachine.h"
  15 #include "PPC.h"
  16 #include "PPCTargetObjectFile.h"
  17 #include "PPCTargetTransformInfo.h"
  18 #include "llvm/CodeGen/LiveVariables.h"
  19 #include "llvm/CodeGen/Passes.h"
  20 #include "llvm/CodeGen/TargetPassConfig.h"
  21 #include "llvm/IR/Function.h"
  22 #include "llvm/IR/LegacyPassManager.h"
  23 #include "llvm/MC/MCStreamer.h"
  24 #include "llvm/Support/CommandLine.h"
  25 #include "llvm/Support/FormattedStream.h"
  26 #include "llvm/Support/TargetRegistry.h"
  27 #include "llvm/Target/TargetOptions.h"
  28 #include "llvm/Transforms/Scalar.h"
  29 using namespace llvm;
  30
  31 static cl::
  32 opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
  33                         cl::desc("Disable CTR loops for PPC"));
  34
  35 static cl::
  36 opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
  37                             cl::desc("Disable PPC loop preinc prep"));
  38
  39 static cl::opt<bool>
  40 VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
  41   cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
  42
  43 static cl::
  44 opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
  45                                 cl::desc("Disable VSX Swap Removal for PPC"));
  46
  47 static cl::
  48 opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
  49                               cl::desc("Disable QPX load splat simplification"));
  50
  51 static cl::
  52 opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
  53                             cl::desc("Disable machine peepholes for PPC"));
  54
  55 static cl::opt<bool>
  56 EnableGEPOpt("ppc-gep-opt", cl::Hidden,
  57              cl::desc("Enable optimizations on complex GEPs"),
  58              cl::init(true));
  59
  60 static cl::opt<bool>
  61 EnablePrefetch("enable-ppc-prefetching",
  62                   cl::desc("disable software prefetching on PPC"),
  63                   cl::init(false), cl::Hidden);
  64
  65 static cl::opt<bool>
  66 EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
  67                       cl::desc("Add extra TOC register dependencies"),
  68                       cl::init(true), cl::Hidden);
  69
  70 static cl::opt<bool>
  71 EnableMachineCombinerPass("ppc-machine-combiner",
  72                           cl::desc("Enable the machine combiner pass"),
  73                           cl::init(true), cl::Hidden);
  74
  75 extern "C" void LLVMInitializePowerPCTarget() {
  76   // Register the targets
  77   RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
  78   RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
  79   RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
  80
  81   PassRegistry &PR = *PassRegistry::getPassRegistry();
  82   initializePPCBoolRetToIntPass(PR);
  83 }
  84
  85 /// Return the datalayout string of a subtarget.
  86 static std::string getDataLayoutString(const Triple &T) {
  87   bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
  88   std::string Ret;
  89
  90   // Most PPC* platforms are big endian, PPC64LE is little endian.
  91   if (T.getArch() == Triple::ppc64le)
  92     Ret = "e";
  93   else
  94     Ret = "E";
  95
  96   Ret += DataLayout::getManglingComponent(T);
  97
  98   // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
  99   // pointers.
 100   if (!is64Bit || T.getOS() == Triple::Lv2)
 101     Ret += "-p:32:32";
 102
 103   // Note, the alignment values for f64 and i64 on ppc64 in Darwin
 104   // documentation are wrong; these are correct (i.e. "what gcc does").
 105   if (is64Bit || !T.isOSDarwin())
 106     Ret += "-i64:64";
 107   else
 108     Ret += "-f64:32:64";
 109
 110   // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
 111   if (is64Bit)
 112     Ret += "-n32:64";
 113   else
 114     Ret += "-n32";
 115
 116   return Ret;
 117 }
 118
 119 static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
 120                                       const Triple &TT) {
 121   std::string FullFS = FS;
 122
 123   // Make sure 64-bit features are available when CPUname is generic
 124   if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
 125     if (!FullFS.empty())
 126       FullFS = "+64bit," + FullFS;
 127     else
 128       FullFS = "+64bit";
 129   }
 130
 131   if (OL >= CodeGenOpt::Default) {
 132     if (!FullFS.empty())
 133       FullFS = "+crbits," + FullFS;
 134     else
 135       FullFS = "+crbits";
 136   }
 137
 138   if (OL != CodeGenOpt::None) {
 139     if (!FullFS.empty())
 140       FullFS = "+invariant-function-descriptors," + FullFS;
 141     else
 142       FullFS = "+invariant-function-descriptors";
 143   }
 144
 145   return FullFS;
 146 }
 147
 148 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
 149   // If it isn't a Mach-O file then it's going to be a linux ELF
 150   // object file.
 151   if (TT.isOSDarwin())
 152     return make_unique<TargetLoweringObjectFileMachO>();
 153
 154   return make_unique<PPC64LinuxTargetObjectFile>();
 155 }
 156
 157 static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
 158                                                  const TargetOptions &Options) {
 159   if (Options.MCOptions.getABIName().startswith("elfv1"))
 160     return PPCTargetMachine::PPC_ABI_ELFv1;
 161   else if (Options.MCOptions.getABIName().startswith("elfv2"))
 162     return PPCTargetMachine::PPC_ABI_ELFv2;
 163
 164   assert(Options.MCOptions.getABIName().empty() &&
 165          "Unknown target-abi option!");
 166
 167   if (!TT.isMacOSX()) {
 168     switch (TT.getArch()) {
 169     case Triple::ppc64le:
 170       return PPCTargetMachine::PPC_ABI_ELFv2;
 171     case Triple::ppc64:
 172       return PPCTargetMachine::PPC_ABI_ELFv1;
 173     default:
 174       // Fallthrough.
 175       ;
 176     }
 177   }
 178   return PPCTargetMachine::PPC_ABI_UNKNOWN;
 179 }
 180
 181 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
 182                                            Optional<Reloc::Model> RM) {
 183   if (!RM.hasValue()) {
 184     if (TT.isOSDarwin())
 185       return Reloc::DynamicNoPIC;
 186     return Reloc::Static;
 187   }
 188   return *RM;
 189 }
 190
 191 // The FeatureString here is a little subtle. We are modifying the feature
 192 // string with what are (currently) non-function specific overrides as it goes
 193 // into the LLVMTargetMachine constructor and then using the stored value in the
 194 // Subtarget constructor below it.
 195 PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
 196                                    StringRef CPU, StringRef FS,
 197                                    const TargetOptions &Options,
 198                                    Optional<Reloc::Model> RM,
 199                                    CodeModel::Model CM, CodeGenOpt::Level OL)
 200     : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
 201                         computeFSAdditions(FS, OL, TT), Options,
 202                         getEffectiveRelocModel(TT, RM), CM, OL),
 203       TLOF(createTLOF(getTargetTriple())),
 204       TargetABI(computeTargetABI(TT, Options)),
 205       Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
 206
 207   // For the estimates, convergence is quadratic, so we essentially double the
 208   // number of digits correct after every iteration. For both FRE and FRSQRTE,
 209   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
 210   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
 211   unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
 212            RefinementSteps64 = RefinementSteps + 1;
 213
 214   this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
 215   this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
 216   this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
 217   this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
 218
 219   this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
 220   this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
 221   this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
 222   this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
 223
 224   initAsmInfo();
 225 }
 226
 227 PPCTargetMachine::~PPCTargetMachine() {}
 228
 229 void PPC32TargetMachine::anchor() { }
 230
 231 PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
 232                                        StringRef CPU, StringRef FS,
 233                                        const TargetOptions &Options,
 234                                        Optional<Reloc::Model> RM,
 235                                        CodeModel::Model CM,
 236                                        CodeGenOpt::Level OL)
 237     : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
 238
 239 void PPC64TargetMachine::anchor() { }
 240
 241 PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT,
 242                                        StringRef CPU, StringRef FS,
 243                                        const TargetOptions &Options,
 244                                        Optional<Reloc::Model> RM,
 245                                        CodeModel::Model CM,
 246                                        CodeGenOpt::Level OL)
 247     : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
 248
 249 const PPCSubtarget *
 250 PPCTargetMachine::getSubtargetImpl(const Function &F) const {
 251   Attribute CPUAttr = F.getFnAttribute("target-cpu");
 252   Attribute FSAttr = F.getFnAttribute("target-features");
 253
 254   std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
 255                         ? CPUAttr.getValueAsString().str()
 256                         : TargetCPU;
 257   std::string FS = !FSAttr.hasAttribute(Attribute::None)
 258                        ? FSAttr.getValueAsString().str()
 259                        : TargetFS;
 260
 261   // FIXME: This is related to the code below to reset the target options,
 262   // we need to know whether or not the soft float flag is set on the
 263   // function before we can generate a subtarget. We also need to use
 264   // it as a key for the subtarget since that can be the only difference
 265   // between two functions.
 266   bool SoftFloat =
 267       F.getFnAttribute("use-soft-float").getValueAsString() == "true";
 268   // If the soft float attribute is set on the function turn on the soft float
 269   // subtarget feature.
 270   if (SoftFloat)
 271     FS += FS.empty() ? "-hard-float" : ",-hard-float";
 272
 273   auto &I = SubtargetMap[CPU + FS];
 274   if (!I) {
 275     // This needs to be done before we create a new subtarget since any
 276     // creation will depend on the TM and the code generation flags on the
 277     // function that reside in TargetOptions.
 278     resetTargetOptions(F);
 279     I = llvm::make_unique<PPCSubtarget>(
 280         TargetTriple, CPU,
 281         // FIXME: It would be good to have the subtarget additions here
 282         // not necessary. Anything that turns them on/off (overrides) ends
 283         // up being put at the end of the feature string, but the defaults
 284         // shouldn't require adding them. Fixing this means pulling Feature64Bit
 285         // out of most of the target cpus in the .td file and making it set only
 286         // as part of initialization via the TargetTriple.
 287         computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
 288   }
 289   return I.get();
 290 }
 291
 292 //===----------------------------------------------------------------------===//
 293 // Pass Pipeline Configuration
 294 //===----------------------------------------------------------------------===//
 295
 296 namespace {
 297 /// PPC Code Generator Pass Configuration Options.
 298 class PPCPassConfig : public TargetPassConfig {
 299 public:
 300   PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
 301     : TargetPassConfig(TM, PM) {}
 302
 303   PPCTargetMachine &getPPCTargetMachine() const {
 304     return getTM<PPCTargetMachine>();
 305   }
 306
 307   void addIRPasses() override;
 308   bool addPreISel() override;
 309   bool addILPOpts() override;
 310   bool addInstSelector() override;
 311   void addMachineSSAOptimization() override;
 312   void addPreRegAlloc() override;
 313   void addPreSched2() override;
 314   void addPreEmitPass() override;
 315 };
 316 } // namespace
 317
 318 TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
 319   return new PPCPassConfig(this, PM);
 320 }
 321
 322 void PPCPassConfig::addIRPasses() {
 323   if (TM->getOptLevel() != CodeGenOpt::None)
 324     addPass(createPPCBoolRetToIntPass());
 325   addPass(createAtomicExpandPass(&getPPCTargetMachine()));
 326
 327   // For the BG/Q (or if explicitly requested), add explicit data prefetch
 328   // intrinsics.
 329   bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
 330                         getOptLevel() != CodeGenOpt::None;
 331   if (EnablePrefetch.getNumOccurrences() > 0)
 332     UsePrefetching = EnablePrefetch;
 333   if (UsePrefetching)
 334     addPass(createLoopDataPrefetchPass());
 335
 336   if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
 337     // Call SeparateConstOffsetFromGEP pass to extract constants within indices
 338     // and lower a GEP with multiple indices to either arithmetic operations or
 339     // multiple GEPs with single index.
 340     addPass(createSeparateConstOffsetFromGEPPass(TM, true));
 341     // Call EarlyCSE pass to find and remove subexpressions in the lowered
 342     // result.
 343     addPass(createEarlyCSEPass());
 344     // Do loop invariant code motion in case part of the lowered result is
 345     // invariant.
 346     addPass(createLICMPass());
 347   }
 348
 349   TargetPassConfig::addIRPasses();
 350 }
 351
 352 bool PPCPassConfig::addPreISel() {
 353   if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
 354     addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
 355
 356   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
 357     addPass(createPPCCTRLoops(getPPCTargetMachine()));
 358
 359   return false;
 360 }
 361
 362 bool PPCPassConfig::addILPOpts() {
 363   addPass(&EarlyIfConverterID);
 364
 365   if (EnableMachineCombinerPass)
 366     addPass(&MachineCombinerID);
 367
 368   return true;
 369 }
 370
 371 bool PPCPassConfig::addInstSelector() {
 372   // Install an instruction selector.
 373   addPass(createPPCISelDag(getPPCTargetMachine()));
 374
 375 #ifndef NDEBUG
 376   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
 377     addPass(createPPCCTRLoopsVerify());
 378 #endif
 379
 380   addPass(createPPCVSXCopyPass());
 381   return false;
 382 }
 383
 384 void PPCPassConfig::addMachineSSAOptimization() {
 385   TargetPassConfig::addMachineSSAOptimization();
 386   // For little endian, remove where possible the vector swap instructions
 387   // introduced at code generation to normalize vector element order.
 388   if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
 389       !DisableVSXSwapRemoval)
 390     addPass(createPPCVSXSwapRemovalPass());
 391   // Target-specific peephole cleanups performed after instruction
 392   // selection.
 393   if (!DisableMIPeephole) {
 394     addPass(createPPCMIPeepholePass());
 395     addPass(&DeadMachineInstructionElimID);
 396   }
 397 }
 398
 399 void PPCPassConfig::addPreRegAlloc() {
 400   if (getOptLevel() != CodeGenOpt::None) {
 401     initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
 402     insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
 403                &PPCVSXFMAMutateID);
 404   }
 405
 406   // FIXME: We probably don't need to run these for -fPIE.
 407   if (getPPCTargetMachine().isPositionIndependent()) {
 408     // FIXME: LiveVariables should not be necessary here!
 409     // PPCTLSDYnamicCallPass uses LiveIntervals which previously dependet on
 410     // LiveVariables. This (unnecessary) dependency has been removed now,
 411     // however a stage-2 clang build fails without LiveVariables computed here.
 412     addPass(&LiveVariablesID, false);
 413     addPass(createPPCTLSDynamicCallPass());
 414   }
 415   if (EnableExtraTOCRegDeps)
 416     addPass(createPPCTOCRegDepsPass());
 417 }
 418
 419 void PPCPassConfig::addPreSched2() {
 420   if (getOptLevel() != CodeGenOpt::None) {
 421     addPass(&IfConverterID);
 422
 423     // This optimization must happen after anything that might do store-to-load
 424     // forwarding. Here we're after RA (and, thus, when spills are inserted)
 425     // but before post-RA scheduling.
 426     if (!DisableQPXLoadSplat)
 427       addPass(createPPCQPXLoadSplatPass());
 428   }
 429 }
 430
 431 void PPCPassConfig::addPreEmitPass() {
 432   if (getOptLevel() != CodeGenOpt::None)
 433     addPass(createPPCEarlyReturnPass(), false);
 434   // Must run branch selection immediately preceding the asm printer.
 435   addPass(createPPCBranchSelectionPass(), false);
 436 }
 437
 438 TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
 439   return TargetIRAnalysis([this](const Function &F) {
 440     return TargetTransformInfo(PPCTTIImpl(this, F));
 441   });
 442 }