]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Move all sources from the llvm project into contrib/llvm-project.
[FreeBSD/FreeBSD.git] / contrib / llvm-project / llvm / lib / Target / AMDGPU / AMDGPUTargetTransformInfo.h
1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// AMDGPU target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19
20 #include "AMDGPU.h"
21 #include "AMDGPUSubtarget.h"
22 #include "AMDGPUTargetMachine.h"
23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/Analysis/TargetTransformInfo.h"
27 #include "llvm/CodeGen/BasicTTIImpl.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/MC/SubtargetFeature.h"
30 #include "llvm/Support/MathExtras.h"
31 #include <cassert>
32
33 namespace llvm {
34
35 class AMDGPUTargetLowering;
36 class Loop;
37 class ScalarEvolution;
38 class Type;
39 class Value;
40
41 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
42   using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
43   using TTI = TargetTransformInfo;
44
45   friend BaseT;
46
47   Triple TargetTriple;
48
49 public:
50   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
51     : BaseT(TM, F.getParent()->getDataLayout()),
52       TargetTriple(TM->getTargetTriple()) {}
53
54   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
55                                TTI::UnrollingPreferences &UP);
56 };
57
58 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
59   using BaseT = BasicTTIImplBase<GCNTTIImpl>;
60   using TTI = TargetTransformInfo;
61
62   friend BaseT;
63
64   const GCNSubtarget *ST;
65   const AMDGPUTargetLowering *TLI;
66   AMDGPUTTIImpl CommonTTI;
67   bool IsGraphicsShader;
68
69   const FeatureBitset InlineFeatureIgnoreList = {
70     // Codegen control options which don't matter.
71     AMDGPU::FeatureEnableLoadStoreOpt,
72     AMDGPU::FeatureEnableSIScheduler,
73     AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
74     AMDGPU::FeatureFlatForGlobal,
75     AMDGPU::FeaturePromoteAlloca,
76     AMDGPU::FeatureUnalignedBufferAccess,
77     AMDGPU::FeatureUnalignedScratchAccess,
78
79     AMDGPU::FeatureAutoWaitcntBeforeBarrier,
80
81     // Property of the kernel/environment which can't actually differ.
82     AMDGPU::FeatureSGPRInitBug,
83     AMDGPU::FeatureXNACK,
84     AMDGPU::FeatureTrapHandler,
85     AMDGPU::FeatureCodeObjectV3,
86
87     // The default assumption needs to be ecc is enabled, but no directly
88     // exposed operations depend on it, so it can be safely inlined.
89     AMDGPU::FeatureSRAMECC,
90
91     // Perf-tuning features
92     AMDGPU::FeatureFastFMAF32,
93     AMDGPU::HalfRate64Ops
94   };
95
96   const GCNSubtarget *getST() const { return ST; }
97   const AMDGPUTargetLowering *getTLI() const { return TLI; }
98
99   static inline int getFullRateInstrCost() {
100     return TargetTransformInfo::TCC_Basic;
101   }
102
103   static inline int getHalfRateInstrCost() {
104     return 2 * TargetTransformInfo::TCC_Basic;
105   }
106
107   // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
108   // should be 2 or 4.
109   static inline int getQuarterRateInstrCost() {
110     return 3 * TargetTransformInfo::TCC_Basic;
111   }
112
113    // On some parts, normal fp64 operations are half rate, and others
114    // quarter. This also applies to some integer operations.
115   inline int get64BitInstrCost() const {
116     return ST->hasHalfRate64Ops() ?
117       getHalfRateInstrCost() : getQuarterRateInstrCost();
118   }
119
120 public:
121   explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
122     : BaseT(TM, F.getParent()->getDataLayout()),
123       ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
124       TLI(ST->getTargetLowering()),
125       CommonTTI(TM, F),
126       IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
127
128   bool hasBranchDivergence() { return true; }
129
130   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
131                                TTI::UnrollingPreferences &UP);
132
133   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
134     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
135     return TTI::PSK_FastHardware;
136   }
137
138   unsigned getHardwareNumberOfRegisters(bool Vector) const;
139   unsigned getNumberOfRegisters(bool Vector) const;
140   unsigned getRegisterBitWidth(bool Vector) const;
141   unsigned getMinVectorRegisterBitWidth() const;
142   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
143                                unsigned ChainSizeInBytes,
144                                VectorType *VecTy) const;
145   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
146                                 unsigned ChainSizeInBytes,
147                                 VectorType *VecTy) const;
148   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
149
150   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
151                                   unsigned Alignment,
152                                   unsigned AddrSpace) const;
153   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
154                                    unsigned Alignment,
155                                    unsigned AddrSpace) const;
156   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
157                                     unsigned Alignment,
158                                     unsigned AddrSpace) const;
159
160   unsigned getMaxInterleaveFactor(unsigned VF);
161
162   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
163
164   int getArithmeticInstrCost(
165     unsigned Opcode, Type *Ty,
166     TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
167     TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
168     TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
169     TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
170     ArrayRef<const Value *> Args = ArrayRef<const Value *>());
171
172   unsigned getCFInstrCost(unsigned Opcode);
173
174   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
175   bool isSourceOfDivergence(const Value *V) const;
176   bool isAlwaysUniform(const Value *V) const;
177
178   unsigned getFlatAddressSpace() const {
179     // Don't bother running InferAddressSpaces pass on graphics shaders which
180     // don't use flat addressing.
181     if (IsGraphicsShader)
182       return -1;
183     return AMDGPUAS::FLAT_ADDRESS;
184   }
185
186   unsigned getVectorSplitCost() { return 0; }
187
188   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
189                           Type *SubTp);
190
191   bool areInlineCompatible(const Function *Caller,
192                            const Function *Callee) const;
193
194   unsigned getInliningThresholdMultiplier() { return 7; }
195
196   int getInlinerVectorBonusPercent() { return 0; }
197
198   int getArithmeticReductionCost(unsigned Opcode,
199                                  Type *Ty,
200                                  bool IsPairwise);
201   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
202                              bool IsPairwiseForm,
203                              bool IsUnsigned);
204 };
205
206 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
207   using BaseT = BasicTTIImplBase<R600TTIImpl>;
208   using TTI = TargetTransformInfo;
209
210   friend BaseT;
211
212   const R600Subtarget *ST;
213   const AMDGPUTargetLowering *TLI;
214   AMDGPUTTIImpl CommonTTI;
215
216 public:
217   explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
218     : BaseT(TM, F.getParent()->getDataLayout()),
219       ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
220       TLI(ST->getTargetLowering()),
221       CommonTTI(TM, F)  {}
222
223   const R600Subtarget *getST() const { return ST; }
224   const AMDGPUTargetLowering *getTLI() const { return TLI; }
225
226   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
227                                TTI::UnrollingPreferences &UP);
228   unsigned getHardwareNumberOfRegisters(bool Vec) const;
229   unsigned getNumberOfRegisters(bool Vec) const;
230   unsigned getRegisterBitWidth(bool Vector) const;
231   unsigned getMinVectorRegisterBitWidth() const;
232   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
233   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
234                                   unsigned AddrSpace) const;
235   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
236                                    unsigned Alignment,
237                                    unsigned AddrSpace) const;
238   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
239                                     unsigned Alignment,
240                                     unsigned AddrSpace) const;
241   unsigned getMaxInterleaveFactor(unsigned VF);
242   unsigned getCFInstrCost(unsigned Opcode);
243   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
244 };
245
246 } // end namespace llvm
247
248 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H