contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

   1 //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AMDGPUSubtarget.h"
  16 #include "R600ISelLowering.h"
  17 #include "R600InstrInfo.h"
  18 #include "SIFrameLowering.h"
  19 #include "SIISelLowering.h"
  20 #include "SIInstrInfo.h"
  21 #include "SIMachineFunctionInfo.h"
  22 #include "llvm/ADT/SmallString.h"
  23 #include "llvm/CodeGen/MachineScheduler.h"
  24
  25 using namespace llvm;
  26
  27 #define DEBUG_TYPE "amdgpu-subtarget"
  28
  29 #define GET_SUBTARGETINFO_ENUM
  30 #define GET_SUBTARGETINFO_TARGET_DESC
  31 #define GET_SUBTARGETINFO_CTOR
  32 #include "AMDGPUGenSubtargetInfo.inc"
  33
  34 AMDGPUSubtarget::~AMDGPUSubtarget() {}
  35
  36 AMDGPUSubtarget &
  37 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
  38                                                  StringRef GPU, StringRef FS) {
  39   // Determine default and user-specified characteristics
  40   // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
  41   // enabled, but some instructions do not respect them and they run at the
  42   // double precision rate, so don't enable by default.
  43   //
  44   // We want to be able to turn these off, but making this a subtarget feature
  45   // for SI has the unhelpful behavior that it unsets everything else if you
  46   // disable it.
  47
  48   SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
  49   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
  50     FullFS += "+flat-for-global,+unaligned-buffer-access,";
  51   FullFS += FS;
  52
  53   ParseSubtargetFeatures(GPU, FullFS);
  54
  55   // FIXME: I don't think think Evergreen has any useful support for
  56   // denormals, but should be checked. Should we issue a warning somewhere
  57   // if someone tries to enable these?
  58   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
  59     FP32Denormals = false;
  60     FP64Denormals = false;
  61   }
  62
  63   // Set defaults if needed.
  64   if (MaxPrivateElementSize == 0)
  65     MaxPrivateElementSize = 4;
  66
  67   return *this;
  68 }
  69
  70 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
  71                                  const TargetMachine &TM)
  72   : AMDGPUGenSubtargetInfo(TT, GPU, FS),
  73     TargetTriple(TT),
  74     Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
  75     IsaVersion(ISAVersion0_0_0),
  76     WavefrontSize(64),
  77     LocalMemorySize(0),
  78     LDSBankCount(0),
  79     MaxPrivateElementSize(0),
  80
  81     FastFMAF32(false),
  82     HalfRate64Ops(false),
  83
  84     FP32Denormals(false),
  85     FP64Denormals(false),
  86     FPExceptions(false),
  87     FlatForGlobal(false),
  88     UnalignedBufferAccess(false),
  89
  90     EnableXNACK(false),
  91     DebuggerInsertNops(false),
  92     DebuggerReserveRegs(false),
  93     DebuggerEmitPrologue(false),
  94
  95     EnableVGPRSpilling(false),
  96     EnablePromoteAlloca(false),
  97     EnableLoadStoreOpt(false),
  98     EnableUnsafeDSOffsetFolding(false),
  99     EnableSIScheduler(false),
 100     DumpCode(false),
 101
 102     FP64(false),
 103     IsGCN(false),
 104     GCN1Encoding(false),
 105     GCN3Encoding(false),
 106     CIInsts(false),
 107     SGPRInitBug(false),
 108     HasSMemRealTime(false),
 109     Has16BitInsts(false),
 110     FlatAddressSpace(false),
 111
 112     R600ALUInst(false),
 113     CaymanISA(false),
 114     CFALUBug(false),
 115     HasVertexCache(false),
 116     TexVTXClauseSize(0),
 117
 118     FeatureDisable(false),
 119     InstrItins(getInstrItineraryForCPU(GPU)) {
 120   initializeSubtargetDependencies(TT, GPU, FS);
 121 }
 122
 123 // FIXME: These limits are for SI. Did they change with the larger maximum LDS
 124 // size?
 125 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
 126   switch (NWaves) {
 127   case 10:
 128     return 1638;
 129   case 9:
 130     return 1820;
 131   case 8:
 132     return 2048;
 133   case 7:
 134     return 2340;
 135   case 6:
 136     return 2730;
 137   case 5:
 138     return 3276;
 139   case 4:
 140     return 4096;
 141   case 3:
 142     return 5461;
 143   case 2:
 144     return 8192;
 145   default:
 146     return getLocalMemorySize();
 147   }
 148 }
 149
 150 unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
 151   if (Bytes <= 1638)
 152     return 10;
 153
 154   if (Bytes <= 1820)
 155     return 9;
 156
 157   if (Bytes <= 2048)
 158     return 8;
 159
 160   if (Bytes <= 2340)
 161     return 7;
 162
 163   if (Bytes <= 2730)
 164     return 6;
 165
 166   if (Bytes <= 3276)
 167     return 5;
 168
 169   if (Bytes <= 4096)
 170     return 4;
 171
 172   if (Bytes <= 5461)
 173     return 3;
 174
 175   if (Bytes <= 8192)
 176     return 2;
 177
 178   return 1;
 179 }
 180
 181 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
 182                              const TargetMachine &TM) :
 183   AMDGPUSubtarget(TT, GPU, FS, TM),
 184   InstrInfo(*this),
 185   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
 186   TLInfo(TM, *this) {}
 187
 188 SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
 189                          const TargetMachine &TM) :
 190   AMDGPUSubtarget(TT, GPU, FS, TM),
 191   InstrInfo(*this),
 192   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
 193   TLInfo(TM, *this),
 194   GISel() {}
 195
 196 unsigned R600Subtarget::getStackEntrySize() const {
 197   switch (getWavefrontSize()) {
 198   case 16:
 199     return 8;
 200   case 32:
 201     return hasCaymanISA() ? 4 : 8;
 202   case 64:
 203     return 4;
 204   default:
 205     llvm_unreachable("Illegal wavefront size.");
 206   }
 207 }
 208
 209 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
 210                                       unsigned NumRegionInstrs) const {
 211   // Track register pressure so the scheduler can try to decrease
 212   // pressure once register usage is above the threshold defined by
 213   // SIRegisterInfo::getRegPressureSetLimit()
 214   Policy.ShouldTrackPressure = true;
 215
 216   // Enabling both top down and bottom up scheduling seems to give us less
 217   // register spills than just using one of these approaches on its own.
 218   Policy.OnlyTopDown = false;
 219   Policy.OnlyBottomUp = false;
 220
 221   // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
 222   if (!enableSIScheduler())
 223     Policy.ShouldTrackLaneMasks = true;
 224 }
 225
 226 bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
 227   return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
 228 }
 229
 230 unsigned SISubtarget::getAmdKernelCodeChipID() const {
 231   switch (getGeneration()) {
 232   case SEA_ISLANDS:
 233     return 12;
 234   default:
 235     llvm_unreachable("ChipID unknown");
 236   }
 237 }
 238
 239 AMDGPU::IsaVersion SISubtarget::getIsaVersion() const {
 240   return AMDGPU::getIsaVersion(getFeatureBits());
 241 }