1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
11 /// \brief AMDGPU specific subclass of TargetSubtarget.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
19 #include "R600InstrInfo.h"
20 #include "R600ISelLowering.h"
21 #include "R600FrameLowering.h"
22 #include "SIInstrInfo.h"
23 #include "SIISelLowering.h"
24 #include "SIFrameLowering.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
27 #include "llvm/Target/TargetSubtargetInfo.h"
29 #define GET_SUBTARGETINFO_HEADER
30 #include "AMDGPUGenSubtargetInfo.inc"
34 class SIMachineFunctionInfo;
37 class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
59 // Basic subtarget description.
63 unsigned WavefrontSize;
66 unsigned MaxPrivateElementSize;
68 // Possibly statically set by tablegen, but may want to be overridden.
72 // Dynamially set bits that enable features.
77 bool UnalignedBufferAccess;
79 bool DebuggerInsertNops;
80 bool DebuggerReserveRegs;
81 bool DebuggerEmitPrologue;
84 bool EnableVGPRSpilling;
85 bool EnablePromoteAlloca;
86 bool EnableLoadStoreOpt;
87 bool EnableUnsafeDSOffsetFolding;
88 bool EnableSIScheduler;
91 // Subtarget statically properties set by tablegen
100 bool FlatAddressSpace;
105 short TexVTXClauseSize;
107 // Dummy feature to use for assembler in tablegen.
110 InstrItineraryData InstrItins;
113 AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
114 const TargetMachine &TM);
115 virtual ~AMDGPUSubtarget();
116 AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
117 StringRef GPU, StringRef FS);
119 const AMDGPUInstrInfo *getInstrInfo() const override;
120 const AMDGPUFrameLowering *getFrameLowering() const override;
121 const AMDGPUTargetLowering *getTargetLowering() const override;
122 const AMDGPURegisterInfo *getRegisterInfo() const override;
124 const InstrItineraryData *getInstrItineraryData() const override {
128 void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
130 bool isAmdHsaOS() const {
131 return TargetTriple.getOS() == Triple::AMDHSA;
134 Generation getGeneration() const {
138 unsigned getWavefrontSize() const {
139 return WavefrontSize;
142 int getLocalMemorySize() const {
143 return LocalMemorySize;
146 int getLDSBankCount() const {
150 unsigned getMaxPrivateElementSize() const {
151 return MaxPrivateElementSize;
154 bool hasHWFP64() const {
158 bool hasFastFMAF32() const {
162 bool hasHalfRate64Ops() const {
163 return HalfRate64Ops;
166 bool hasAddr64() const {
167 return (getGeneration() < VOLCANIC_ISLANDS);
170 bool hasBFE() const {
171 return (getGeneration() >= EVERGREEN);
174 bool hasBFI() const {
175 return (getGeneration() >= EVERGREEN);
178 bool hasBFM() const {
182 bool hasBCNT(unsigned Size) const {
184 return (getGeneration() >= EVERGREEN);
187 return (getGeneration() >= SOUTHERN_ISLANDS);
192 bool hasMulU24() const {
193 return (getGeneration() >= EVERGREEN);
196 bool hasMulI24() const {
197 return (getGeneration() >= SOUTHERN_ISLANDS ||
201 bool hasFFBL() const {
202 return (getGeneration() >= EVERGREEN);
205 bool hasFFBH() const {
206 return (getGeneration() >= EVERGREEN);
209 bool hasCARRY() const {
210 return (getGeneration() >= EVERGREEN);
213 bool hasBORROW() const {
214 return (getGeneration() >= EVERGREEN);
217 bool hasCaymanISA() const {
221 bool isPromoteAllocaEnabled() const {
222 return EnablePromoteAlloca;
225 bool unsafeDSOffsetFoldingEnabled() const {
226 return EnableUnsafeDSOffsetFolding;
229 bool dumpCode() const {
233 /// Return the amount of LDS that can be used that will not restrict the
234 /// occupancy lower than WaveCount.
235 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const;
237 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
238 /// the given LDS memory size is the only constraint.
239 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
242 bool hasFP32Denormals() const {
243 return FP32Denormals;
246 bool hasFP64Denormals() const {
247 return FP64Denormals;
250 bool hasFPExceptions() const {
254 bool useFlatForGlobal() const {
255 return FlatForGlobal;
258 bool hasUnalignedBufferAccess() const {
259 return UnalignedBufferAccess;
262 bool isXNACKEnabled() const {
266 unsigned getMaxWavesPerCU() const {
267 if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
270 // FIXME: Not sure what this is for other subtagets.
274 /// \brief Returns the offset in bytes from the start of the input buffer
275 /// of the first explicit kernel argument.
276 unsigned getExplicitKernelArgOffset() const {
277 return isAmdHsaOS() ? 0 : 36;
280 unsigned getStackAlignment() const {
281 // Scratch is allocated in 256 dword per wave blocks.
282 return 4 * 256 / getWavefrontSize();
285 bool enableMachineScheduler() const override {
289 bool enableSubRegLiveness() const override {
294 class R600Subtarget final : public AMDGPUSubtarget {
296 R600InstrInfo InstrInfo;
297 R600FrameLowering FrameLowering;
298 R600TargetLowering TLInfo;
301 R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
302 const TargetMachine &TM);
304 const R600InstrInfo *getInstrInfo() const override {
308 const R600FrameLowering *getFrameLowering() const override {
309 return &FrameLowering;
312 const R600TargetLowering *getTargetLowering() const override {
316 const R600RegisterInfo *getRegisterInfo() const override {
317 return &InstrInfo.getRegisterInfo();
320 bool hasCFAluBug() const {
324 bool hasVertexCache() const {
325 return HasVertexCache;
328 short getTexVTXClauseSize() const {
329 return TexVTXClauseSize;
332 unsigned getStackEntrySize() const;
335 class SISubtarget final : public AMDGPUSubtarget {
338 FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
342 SIInstrInfo InstrInfo;
343 SIFrameLowering FrameLowering;
344 SITargetLowering TLInfo;
345 std::unique_ptr<GISelAccessor> GISel;
348 SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
349 const TargetMachine &TM);
351 const SIInstrInfo *getInstrInfo() const override {
355 const SIFrameLowering *getFrameLowering() const override {
356 return &FrameLowering;
359 const SITargetLowering *getTargetLowering() const override {
363 const CallLowering *getCallLowering() const override {
364 assert(GISel && "Access to GlobalISel APIs not set");
365 return GISel->getCallLowering();
368 const SIRegisterInfo *getRegisterInfo() const override {
369 return &InstrInfo.getRegisterInfo();
372 void setGISelAccessor(GISelAccessor &GISel) {
373 this->GISel.reset(&GISel);
376 void overrideSchedPolicy(MachineSchedPolicy &Policy,
377 unsigned NumRegionInstrs) const override;
379 bool isVGPRSpillingEnabled(const Function& F) const;
381 unsigned getAmdKernelCodeChipID() const;
383 AMDGPU::IsaVersion getIsaVersion() const;
385 unsigned getMaxNumUserSGPRs() const {
389 bool hasFlatAddressSpace() const {
390 return FlatAddressSpace;
393 bool hasSMemRealTime() const {
394 return HasSMemRealTime;
397 bool has16BitInsts() const {
398 return Has16BitInsts;
401 bool enableSIScheduler() const {
402 return EnableSIScheduler;
405 bool debuggerSupported() const {
406 return debuggerInsertNops() && debuggerReserveRegs() &&
407 debuggerEmitPrologue();
410 bool debuggerInsertNops() const {
411 return DebuggerInsertNops;
414 bool debuggerReserveRegs() const {
415 return DebuggerReserveRegs;
418 bool debuggerEmitPrologue() const {
419 return DebuggerEmitPrologue;
422 bool loadStoreOptEnabled() const {
423 return EnableLoadStoreOpt;
426 bool hasSGPRInitBug() const {
432 inline const AMDGPUInstrInfo *AMDGPUSubtarget::getInstrInfo() const {
433 if (getGeneration() >= SOUTHERN_ISLANDS)
434 return static_cast<const SISubtarget *>(this)->getInstrInfo();
436 return static_cast<const R600Subtarget *>(this)->getInstrInfo();
439 inline const AMDGPUFrameLowering *AMDGPUSubtarget::getFrameLowering() const {
440 if (getGeneration() >= SOUTHERN_ISLANDS)
441 return static_cast<const SISubtarget *>(this)->getFrameLowering();
443 return static_cast<const R600Subtarget *>(this)->getFrameLowering();
446 inline const AMDGPUTargetLowering *AMDGPUSubtarget::getTargetLowering() const {
447 if (getGeneration() >= SOUTHERN_ISLANDS)
448 return static_cast<const SISubtarget *>(this)->getTargetLowering();
450 return static_cast<const R600Subtarget *>(this)->getTargetLowering();
453 inline const AMDGPURegisterInfo *AMDGPUSubtarget::getRegisterInfo() const {
454 if (getGeneration() >= SOUTHERN_ISLANDS)
455 return static_cast<const SISubtarget *>(this)->getRegisterInfo();
457 return static_cast<const R600Subtarget *>(this)->getRegisterInfo();
460 } // End namespace llvm