contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp

   1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the AArch64 specific subclass of TargetSubtarget.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AArch64Subtarget.h"
  15
  16 #include "AArch64.h"
  17 #include "AArch64InstrInfo.h"
  18 #include "AArch64PBQPRegAlloc.h"
  19 #include "AArch64TargetMachine.h"
  20
  21 #include "AArch64CallLowering.h"
  22 #include "AArch64LegalizerInfo.h"
  23 #include "AArch64RegisterBankInfo.h"
  24 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
  25 #include "llvm/CodeGen/MachineScheduler.h"
  26 #include "llvm/IR/GlobalValue.h"
  27
  28 using namespace llvm;
  29
  30 #define DEBUG_TYPE "aarch64-subtarget"
  31
  32 #define GET_SUBTARGETINFO_CTOR
  33 #define GET_SUBTARGETINFO_TARGET_DESC
  34 #include "AArch64GenSubtargetInfo.inc"
  35
  36 static cl::opt<bool>
  37 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
  38                      "converter pass"), cl::init(true), cl::Hidden);
  39
  40 // If OS supports TBI, use this flag to enable it.
  41 static cl::opt<bool>
  42 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
  43                          "an address is ignored"), cl::init(false), cl::Hidden);
  44
  45 static cl::opt<bool>
  46     UseNonLazyBind("aarch64-enable-nonlazybind",
  47                    cl::desc("Call nonlazybind functions via direct GOT load"),
  48                    cl::init(false), cl::Hidden);
  49
  50 AArch64Subtarget &
  51 AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
  52                                                   StringRef CPUString) {
  53   // Determine default and user-specified characteristics
  54
  55   if (CPUString.empty())
  56     CPUString = "generic";
  57
  58   ParseSubtargetFeatures(CPUString, FS);
  59   initializeProperties();
  60
  61   return *this;
  62 }
  63
  64 void AArch64Subtarget::initializeProperties() {
  65   // Initialize CPU specific properties. We should add a tablegen feature for
  66   // this in the future so we can specify it together with the subtarget
  67   // features.
  68   switch (ARMProcFamily) {
  69   case Cyclone:
  70     CacheLineSize = 64;
  71     PrefetchDistance = 280;
  72     MinPrefetchStride = 2048;
  73     MaxPrefetchIterationsAhead = 3;
  74     break;
  75   case CortexA57:
  76     MaxInterleaveFactor = 4;
  77     PrefFunctionAlignment = 4;
  78     break;
  79   case ExynosM1:
  80     MaxInterleaveFactor = 4;
  81     MaxJumpTableSize = 8;
  82     PrefFunctionAlignment = 4;
  83     PrefLoopAlignment = 3;
  84     break;
  85   case Falkor:
  86     MaxInterleaveFactor = 4;
  87     // FIXME: remove this to enable 64-bit SLP if performance looks good.
  88     MinVectorRegisterBitWidth = 128;
  89     CacheLineSize = 128;
  90     PrefetchDistance = 820;
  91     MinPrefetchStride = 2048;
  92     MaxPrefetchIterationsAhead = 8;
  93     break;
  94   case Saphira:
  95     MaxInterleaveFactor = 4;
  96     // FIXME: remove this to enable 64-bit SLP if performance looks good.
  97     MinVectorRegisterBitWidth = 128;
  98     break;
  99   case Kryo:
 100     MaxInterleaveFactor = 4;
 101     VectorInsertExtractBaseCost = 2;
 102     CacheLineSize = 128;
 103     PrefetchDistance = 740;
 104     MinPrefetchStride = 1024;
 105     MaxPrefetchIterationsAhead = 11;
 106     // FIXME: remove this to enable 64-bit SLP if performance looks good.
 107     MinVectorRegisterBitWidth = 128;
 108     break;
 109   case ThunderX2T99:
 110     CacheLineSize = 64;
 111     PrefFunctionAlignment = 3;
 112     PrefLoopAlignment = 2;
 113     MaxInterleaveFactor = 4;
 114     PrefetchDistance = 128;
 115     MinPrefetchStride = 1024;
 116     MaxPrefetchIterationsAhead = 4;
 117     // FIXME: remove this to enable 64-bit SLP if performance looks good.
 118     MinVectorRegisterBitWidth = 128;
 119     break;
 120   case ThunderX:
 121   case ThunderXT88:
 122   case ThunderXT81:
 123   case ThunderXT83:
 124     CacheLineSize = 128;
 125     PrefFunctionAlignment = 3;
 126     PrefLoopAlignment = 2;
 127     // FIXME: remove this to enable 64-bit SLP if performance looks good.
 128     MinVectorRegisterBitWidth = 128;
 129     break;
 130   case CortexA35: break;
 131   case CortexA53:
 132     PrefFunctionAlignment = 3;
 133     break;
 134   case CortexA55: break;
 135   case CortexA72:
 136   case CortexA73:
 137   case CortexA75:
 138     PrefFunctionAlignment = 4;
 139     break;
 140   case Others: break;
 141   }
 142 }
 143
 144 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
 145                                    const std::string &FS,
 146                                    const TargetMachine &TM, bool LittleEndian)
 147     : AArch64GenSubtargetInfo(TT, CPU, FS),
 148       ReserveX18(TT.isOSDarwin() || TT.isOSWindows()), IsLittle(LittleEndian),
 149       TargetTriple(TT), FrameLowering(),
 150       InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
 151       TLInfo(TM, *this) {
 152   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
 153   Legalizer.reset(new AArch64LegalizerInfo(*this));
 154
 155   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
 156
 157   // FIXME: At this point, we can't rely on Subtarget having RBI.
 158   // It's awkward to mix passing RBI and the Subtarget; should we pass
 159   // TII/TRI as well?
 160   InstSelector.reset(createAArch64InstructionSelector(
 161       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
 162
 163   RegBankInfo.reset(RBI);
 164 }
 165
 166 const CallLowering *AArch64Subtarget::getCallLowering() const {
 167   return CallLoweringInfo.get();
 168 }
 169
 170 const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
 171   return InstSelector.get();
 172 }
 173
 174 const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
 175   return Legalizer.get();
 176 }
 177
 178 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
 179   return RegBankInfo.get();
 180 }
 181
 182 /// Find the target operand flags that describe how a global value should be
 183 /// referenced for the current subtarget.
 184 unsigned char
 185 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
 186                                           const TargetMachine &TM) const {
 187   // MachO large model always goes via a GOT, simply to get a single 8-byte
 188   // absolute relocation on all global addresses.
 189   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
 190     return AArch64II::MO_GOT;
 191
 192   unsigned Flags = GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
 193                                                   : AArch64II::MO_NO_FLAG;
 194
 195   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
 196     return AArch64II::MO_GOT | Flags;
 197
 198   // The small code model's direct accesses use ADRP, which cannot
 199   // necessarily produce the value 0 (if the code is above 4GB).
 200   if (useSmallAddressing() && GV->hasExternalWeakLinkage())
 201     return AArch64II::MO_GOT | Flags;
 202
 203   return Flags;
 204 }
 205
 206 unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
 207     const GlobalValue *GV, const TargetMachine &TM) const {
 208   // MachO large model always goes via a GOT, because we don't have the
 209   // relocations available to do anything else..
 210   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
 211       !GV->hasInternalLinkage())
 212     return AArch64II::MO_GOT;
 213
 214   // NonLazyBind goes via GOT unless we know it's available locally.
 215   auto *F = dyn_cast<Function>(GV);
 216   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
 217       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
 218     return AArch64II::MO_GOT;
 219
 220   return AArch64II::MO_NO_FLAG;
 221 }
 222
 223 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
 224                                            unsigned NumRegionInstrs) const {
 225   // LNT run (at least on Cyclone) showed reasonably significant gains for
 226   // bi-directional scheduling. 253.perlbmk.
 227   Policy.OnlyTopDown = false;
 228   Policy.OnlyBottomUp = false;
 229   // Enabling or Disabling the latency heuristic is a close call: It seems to
 230   // help nearly no benchmark on out-of-order architectures, on the other hand
 231   // it regresses register pressure on a few benchmarking.
 232   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
 233 }
 234
 235 bool AArch64Subtarget::enableEarlyIfConversion() const {
 236   return EnableEarlyIfConvert;
 237 }
 238
 239 bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
 240   if (!UseAddressTopByteIgnored)
 241     return false;
 242
 243   if (TargetTriple.isiOS()) {
 244     unsigned Major, Minor, Micro;
 245     TargetTriple.getiOSVersion(Major, Minor, Micro);
 246     return Major >= 8;
 247   }
 248
 249   return false;
 250 }
 251
 252 std::unique_ptr<PBQPRAConstraint>
 253 AArch64Subtarget::getCustomPBQPConstraints() const {
 254   return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
 255 }