contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp

   1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements the AArch64 specific subclass of TargetSubtarget.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "AArch64Subtarget.h"
  14
  15 #include "AArch64.h"
  16 #include "AArch64CallLowering.h"
  17 #include "AArch64InstrInfo.h"
  18 #include "AArch64LegalizerInfo.h"
  19 #include "AArch64PBQPRegAlloc.h"
  20 #include "AArch64RegisterBankInfo.h"
  21 #include "AArch64TargetMachine.h"
  22 #include "MCTargetDesc/AArch64AddressingModes.h"
  23 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
  24 #include "llvm/CodeGen/MachineScheduler.h"
  25 #include "llvm/IR/GlobalValue.h"
  26 #include "llvm/Support/TargetParser.h"
  27
  28 using namespace llvm;
  29
  30 #define DEBUG_TYPE "aarch64-subtarget"
  31
  32 #define GET_SUBTARGETINFO_CTOR
  33 #define GET_SUBTARGETINFO_TARGET_DESC
  34 #include "AArch64GenSubtargetInfo.inc"
  35
  36 static cl::opt<bool>
  37 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
  38                      "converter pass"), cl::init(true), cl::Hidden);
  39
  40 // If OS supports TBI, use this flag to enable it.
  41 static cl::opt<bool>
  42 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
  43                          "an address is ignored"), cl::init(false), cl::Hidden);
  44
  45 static cl::opt<bool>
  46     UseNonLazyBind("aarch64-enable-nonlazybind",
  47                    cl::desc("Call nonlazybind functions via direct GOT load"),
  48                    cl::init(false), cl::Hidden);
  49
  50 AArch64Subtarget &
  51 AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
  52                                                   StringRef CPUString) {
  53   // Determine default and user-specified characteristics
  54
  55   if (CPUString.empty())
  56     CPUString = "generic";
  57
  58   ParseSubtargetFeatures(CPUString, FS);
  59   initializeProperties();
  60
  61   return *this;
  62 }
  63
  64 void AArch64Subtarget::initializeProperties() {
  65   // Initialize CPU specific properties. We should add a tablegen feature for
  66   // this in the future so we can specify it together with the subtarget
  67   // features.
  68   switch (ARMProcFamily) {
  69   case Others:
  70     break;
  71   case CortexA35:
  72     break;
  73   case CortexA53:
  74     PrefFunctionAlignment = 3;
  75     break;
  76   case CortexA55:
  77     break;
  78   case CortexA57:
  79     MaxInterleaveFactor = 4;
  80     PrefFunctionAlignment = 4;
  81     break;
  82   case CortexA72:
  83   case CortexA73:
  84   case CortexA75:
  85   case CortexA76:
  86     PrefFunctionAlignment = 4;
  87     break;
  88   case Cyclone:
  89     CacheLineSize = 64;
  90     PrefetchDistance = 280;
  91     MinPrefetchStride = 2048;
  92     MaxPrefetchIterationsAhead = 3;
  93     break;
  94   case ExynosM1:
  95     MaxInterleaveFactor = 4;
  96     MaxJumpTableSize = 8;
  97     PrefFunctionAlignment = 4;
  98     PrefLoopAlignment = 3;
  99     break;
 100   case ExynosM3:
 101     MaxInterleaveFactor = 4;
 102     MaxJumpTableSize = 20;
 103     PrefFunctionAlignment = 5;
 104     PrefLoopAlignment = 4;
 105     break;
 106   case Falkor:
 107     MaxInterleaveFactor = 4;
 108     // FIXME: remove this to enable 64-bit SLP if performance looks good.
 109     MinVectorRegisterBitWidth = 128;
 110     CacheLineSize = 128;
 111     PrefetchDistance = 820;
 112     MinPrefetchStride = 2048;
 113     MaxPrefetchIterationsAhead = 8;
 114     break;
 115   case Kryo:
 116     MaxInterleaveFactor = 4;
 117     VectorInsertExtractBaseCost = 2;
 118     CacheLineSize = 128;
 119     PrefetchDistance = 740;
 120     MinPrefetchStride = 1024;
 121     MaxPrefetchIterationsAhead = 11;
 122     // FIXME: remove this to enable 64-bit SLP if performance looks good.
 123     MinVectorRegisterBitWidth = 128;
 124     break;
 125   case Saphira:
 126     MaxInterleaveFactor = 4;
 127     // FIXME: remove this to enable 64-bit SLP if performance looks good.
 128     MinVectorRegisterBitWidth = 128;
 129     break;
 130   case ThunderX2T99:
 131     CacheLineSize = 64;
 132     PrefFunctionAlignment = 3;
 133     PrefLoopAlignment = 2;
 134     MaxInterleaveFactor = 4;
 135     PrefetchDistance = 128;
 136     MinPrefetchStride = 1024;
 137     MaxPrefetchIterationsAhead = 4;
 138     // FIXME: remove this to enable 64-bit SLP if performance looks good.
 139     MinVectorRegisterBitWidth = 128;
 140     break;
 141   case ThunderX:
 142   case ThunderXT88:
 143   case ThunderXT81:
 144   case ThunderXT83:
 145     CacheLineSize = 128;
 146     PrefFunctionAlignment = 3;
 147     PrefLoopAlignment = 2;
 148     // FIXME: remove this to enable 64-bit SLP if performance looks good.
 149     MinVectorRegisterBitWidth = 128;
 150     break;
 151   case TSV110:
 152     CacheLineSize = 64;
 153     PrefFunctionAlignment = 4;
 154     PrefLoopAlignment = 2;
 155     break;
 156   }
 157 }
 158
 159 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
 160                                    const std::string &FS,
 161                                    const TargetMachine &TM, bool LittleEndian)
 162     : AArch64GenSubtargetInfo(TT, CPU, FS),
 163       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
 164       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
 165       IsLittle(LittleEndian),
 166       TargetTriple(TT), FrameLowering(),
 167       InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
 168       TLInfo(TM, *this) {
 169   if (AArch64::isX18ReservedByDefault(TT))
 170     ReserveXRegister.set(18);
 171
 172   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
 173   Legalizer.reset(new AArch64LegalizerInfo(*this));
 174
 175   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
 176
 177   // FIXME: At this point, we can't rely on Subtarget having RBI.
 178   // It's awkward to mix passing RBI and the Subtarget; should we pass
 179   // TII/TRI as well?
 180   InstSelector.reset(createAArch64InstructionSelector(
 181       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
 182
 183   RegBankInfo.reset(RBI);
 184 }
 185
 186 const CallLowering *AArch64Subtarget::getCallLowering() const {
 187   return CallLoweringInfo.get();
 188 }
 189
 190 const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
 191   return InstSelector.get();
 192 }
 193
 194 const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
 195   return Legalizer.get();
 196 }
 197
 198 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
 199   return RegBankInfo.get();
 200 }
 201
 202 /// Find the target operand flags that describe how a global value should be
 203 /// referenced for the current subtarget.
 204 unsigned char
 205 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
 206                                           const TargetMachine &TM) const {
 207   // MachO large model always goes via a GOT, simply to get a single 8-byte
 208   // absolute relocation on all global addresses.
 209   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
 210     return AArch64II::MO_GOT;
 211
 212   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
 213     if (GV->hasDLLImportStorageClass())
 214       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
 215     if (getTargetTriple().isOSWindows())
 216       return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
 217     return AArch64II::MO_GOT;
 218   }
 219
 220   // The small code model's direct accesses use ADRP, which cannot
 221   // necessarily produce the value 0 (if the code is above 4GB).
 222   // Same for the tiny code model, where we have a pc relative LDR.
 223   if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
 224       GV->hasExternalWeakLinkage())
 225     return AArch64II::MO_GOT;
 226
 227   return AArch64II::MO_NO_FLAG;
 228 }
 229
 230 unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
 231     const GlobalValue *GV, const TargetMachine &TM) const {
 232   // MachO large model always goes via a GOT, because we don't have the
 233   // relocations available to do anything else..
 234   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
 235       !GV->hasInternalLinkage())
 236     return AArch64II::MO_GOT;
 237
 238   // NonLazyBind goes via GOT unless we know it's available locally.
 239   auto *F = dyn_cast<Function>(GV);
 240   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
 241       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
 242     return AArch64II::MO_GOT;
 243
 244   return AArch64II::MO_NO_FLAG;
 245 }
 246
 247 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
 248                                            unsigned NumRegionInstrs) const {
 249   // LNT run (at least on Cyclone) showed reasonably significant gains for
 250   // bi-directional scheduling. 253.perlbmk.
 251   Policy.OnlyTopDown = false;
 252   Policy.OnlyBottomUp = false;
 253   // Enabling or Disabling the latency heuristic is a close call: It seems to
 254   // help nearly no benchmark on out-of-order architectures, on the other hand
 255   // it regresses register pressure on a few benchmarking.
 256   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
 257 }
 258
 259 bool AArch64Subtarget::enableEarlyIfConversion() const {
 260   return EnableEarlyIfConvert;
 261 }
 262
 263 bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
 264   if (!UseAddressTopByteIgnored)
 265     return false;
 266
 267   if (TargetTriple.isiOS()) {
 268     unsigned Major, Minor, Micro;
 269     TargetTriple.getiOSVersion(Major, Minor, Micro);
 270     return Major >= 8;
 271   }
 272
 273   return false;
 274 }
 275
 276 std::unique_ptr<PBQPRAConstraint>
 277 AArch64Subtarget::getCustomPBQPConstraints() const {
 278   return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
 279 }
 280
 281 void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
 282   // We usually compute max call frame size after ISel. Do the computation now
 283   // if the .mir file didn't specify it. Note that this will probably give you
 284   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
 285   // instructions, specify explicitly if you need it to be correct.
 286   MachineFrameInfo &MFI = MF.getFrameInfo();
 287   if (!MFI.isMaxCallFrameSizeComputed())
 288     MFI.computeMaxCallFrameSize(MF);
 289 }