contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp

   1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the AArch64 specific subclass of TargetSubtarget.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AArch64Subtarget.h"
  15
  16 #include "AArch64.h"
  17 #include "AArch64InstrInfo.h"
  18 #include "AArch64PBQPRegAlloc.h"
  19 #include "AArch64TargetMachine.h"
  20
  21 #include "AArch64CallLowering.h"
  22 #include "AArch64LegalizerInfo.h"
  23 #include "AArch64RegisterBankInfo.h"
  24 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
  25 #include "llvm/CodeGen/MachineScheduler.h"
  26 #include "llvm/IR/GlobalValue.h"
  27 #include "llvm/Support/TargetParser.h"
  28
  29 using namespace llvm;
  30
  31 #define DEBUG_TYPE "aarch64-subtarget"
  32
  33 #define GET_SUBTARGETINFO_CTOR
  34 #define GET_SUBTARGETINFO_TARGET_DESC
  35 #include "AArch64GenSubtargetInfo.inc"
  36
  37 static cl::opt<bool>
  38 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
  39                      "converter pass"), cl::init(true), cl::Hidden);
  40
  41 // If OS supports TBI, use this flag to enable it.
  42 static cl::opt<bool>
  43 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
  44                          "an address is ignored"), cl::init(false), cl::Hidden);
  45
  46 static cl::opt<bool>
  47     UseNonLazyBind("aarch64-enable-nonlazybind",
  48                    cl::desc("Call nonlazybind functions via direct GOT load"),
  49                    cl::init(false), cl::Hidden);
  50
  51 AArch64Subtarget &
  52 AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
  53                                                   StringRef CPUString) {
  54   // Determine default and user-specified characteristics
  55
  56   if (CPUString.empty())
  57     CPUString = "generic";
  58
  59   ParseSubtargetFeatures(CPUString, FS);
  60   initializeProperties();
  61
  62   return *this;
  63 }
  64
  65 void AArch64Subtarget::initializeProperties() {
  66   // Initialize CPU specific properties. We should add a tablegen feature for
  67   // this in the future so we can specify it together with the subtarget
  68   // features.
  69   switch (ARMProcFamily) {
  70   case Cyclone:
  71     CacheLineSize = 64;
  72     PrefetchDistance = 280;
  73     MinPrefetchStride = 2048;
  74     MaxPrefetchIterationsAhead = 3;
  75     break;
  76   case CortexA57:
  77     MaxInterleaveFactor = 4;
  78     PrefFunctionAlignment = 4;
  79     break;
  80   case ExynosM1:
  81     MaxInterleaveFactor = 4;
  82     MaxJumpTableSize = 8;
  83     PrefFunctionAlignment = 4;
  84     PrefLoopAlignment = 3;
  85     break;
  86   case ExynosM3:
  87     MaxInterleaveFactor = 4;
  88     MaxJumpTableSize = 20;
  89     PrefFunctionAlignment = 5;
  90     PrefLoopAlignment = 4;
  91     break;
  92   case Falkor:
  93     MaxInterleaveFactor = 4;
  94     // FIXME: remove this to enable 64-bit SLP if performance looks good.
  95     MinVectorRegisterBitWidth = 128;
  96     CacheLineSize = 128;
  97     PrefetchDistance = 820;
  98     MinPrefetchStride = 2048;
  99     MaxPrefetchIterationsAhead = 8;
 100     break;
 101   case Saphira:
 102     MaxInterleaveFactor = 4;
 103     // FIXME: remove this to enable 64-bit SLP if performance looks good.
 104     MinVectorRegisterBitWidth = 128;
 105     break;
 106   case Kryo:
 107     MaxInterleaveFactor = 4;
 108     VectorInsertExtractBaseCost = 2;
 109     CacheLineSize = 128;
 110     PrefetchDistance = 740;
 111     MinPrefetchStride = 1024;
 112     MaxPrefetchIterationsAhead = 11;
 113     // FIXME: remove this to enable 64-bit SLP if performance looks good.
 114     MinVectorRegisterBitWidth = 128;
 115     break;
 116   case ThunderX2T99:
 117     CacheLineSize = 64;
 118     PrefFunctionAlignment = 3;
 119     PrefLoopAlignment = 2;
 120     MaxInterleaveFactor = 4;
 121     PrefetchDistance = 128;
 122     MinPrefetchStride = 1024;
 123     MaxPrefetchIterationsAhead = 4;
 124     // FIXME: remove this to enable 64-bit SLP if performance looks good.
 125     MinVectorRegisterBitWidth = 128;
 126     break;
 127   case ThunderX:
 128   case ThunderXT88:
 129   case ThunderXT81:
 130   case ThunderXT83:
 131     CacheLineSize = 128;
 132     PrefFunctionAlignment = 3;
 133     PrefLoopAlignment = 2;
 134     // FIXME: remove this to enable 64-bit SLP if performance looks good.
 135     MinVectorRegisterBitWidth = 128;
 136     break;
 137   case CortexA35: break;
 138   case CortexA53:
 139     PrefFunctionAlignment = 3;
 140     break;
 141   case CortexA55: break;
 142   case CortexA72:
 143   case CortexA73:
 144   case CortexA75:
 145     PrefFunctionAlignment = 4;
 146     break;
 147   case Others: break;
 148   }
 149 }
 150
 151 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
 152                                    const std::string &FS,
 153                                    const TargetMachine &TM, bool LittleEndian)
 154     : AArch64GenSubtargetInfo(TT, CPU, FS),
 155       ReserveX18(AArch64::isX18ReservedByDefault(TT)), IsLittle(LittleEndian),
 156       TargetTriple(TT), FrameLowering(),
 157       InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
 158       TLInfo(TM, *this) {
 159   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
 160   Legalizer.reset(new AArch64LegalizerInfo(*this));
 161
 162   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
 163
 164   // FIXME: At this point, we can't rely on Subtarget having RBI.
 165   // It's awkward to mix passing RBI and the Subtarget; should we pass
 166   // TII/TRI as well?
 167   InstSelector.reset(createAArch64InstructionSelector(
 168       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
 169
 170   RegBankInfo.reset(RBI);
 171 }
 172
 173 const CallLowering *AArch64Subtarget::getCallLowering() const {
 174   return CallLoweringInfo.get();
 175 }
 176
 177 const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
 178   return InstSelector.get();
 179 }
 180
 181 const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
 182   return Legalizer.get();
 183 }
 184
 185 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
 186   return RegBankInfo.get();
 187 }
 188
 189 /// Find the target operand flags that describe how a global value should be
 190 /// referenced for the current subtarget.
 191 unsigned char
 192 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
 193                                           const TargetMachine &TM) const {
 194   // MachO large model always goes via a GOT, simply to get a single 8-byte
 195   // absolute relocation on all global addresses.
 196   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
 197     return AArch64II::MO_GOT;
 198
 199   unsigned Flags = GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
 200                                                   : AArch64II::MO_NO_FLAG;
 201
 202   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
 203     return AArch64II::MO_GOT | Flags;
 204
 205   // The small code model's direct accesses use ADRP, which cannot
 206   // necessarily produce the value 0 (if the code is above 4GB).
 207   if (useSmallAddressing() && GV->hasExternalWeakLinkage())
 208     return AArch64II::MO_GOT | Flags;
 209
 210   return Flags;
 211 }
 212
 213 unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
 214     const GlobalValue *GV, const TargetMachine &TM) const {
 215   // MachO large model always goes via a GOT, because we don't have the
 216   // relocations available to do anything else..
 217   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
 218       !GV->hasInternalLinkage())
 219     return AArch64II::MO_GOT;
 220
 221   // NonLazyBind goes via GOT unless we know it's available locally.
 222   auto *F = dyn_cast<Function>(GV);
 223   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
 224       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
 225     return AArch64II::MO_GOT;
 226
 227   return AArch64II::MO_NO_FLAG;
 228 }
 229
 230 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
 231                                            unsigned NumRegionInstrs) const {
 232   // LNT run (at least on Cyclone) showed reasonably significant gains for
 233   // bi-directional scheduling. 253.perlbmk.
 234   Policy.OnlyTopDown = false;
 235   Policy.OnlyBottomUp = false;
 236   // Enabling or Disabling the latency heuristic is a close call: It seems to
 237   // help nearly no benchmark on out-of-order architectures, on the other hand
 238   // it regresses register pressure on a few benchmarking.
 239   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
 240 }
 241
 242 bool AArch64Subtarget::enableEarlyIfConversion() const {
 243   return EnableEarlyIfConvert;
 244 }
 245
 246 bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
 247   if (!UseAddressTopByteIgnored)
 248     return false;
 249
 250   if (TargetTriple.isiOS()) {
 251     unsigned Major, Minor, Micro;
 252     TargetTriple.getiOSVersion(Major, Minor, Micro);
 253     return Major >= 8;
 254   }
 255
 256   return false;
 257 }
 258
 259 std::unique_ptr<PBQPRAConstraint>
 260 AArch64Subtarget::getCustomPBQPConstraints() const {
 261   return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
 262 }
 263
 264 void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
 265   // We usually compute max call frame size after ISel. Do the computation now
 266   // if the .mir file didn't specify it. Note that this will probably give you
 267   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
 268   // instructions, specify explicitely if you need it to be correct.
 269   MachineFrameInfo &MFI = MF.getFrameInfo();
 270   if (!MFI.isMaxCallFrameSizeComputed())
 271     MFI.computeMaxCallFrameSize(MF);
 272 }