1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements AMDGPU TargetInfo objects.
12 //===----------------------------------------------------------------------===//
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/CodeGenOptions.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20 #include "llvm/ADT/StringSwitch.h"
22 using namespace clang;
23 using namespace clang::targets;
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
31 static const char *const DataLayoutStringR600 =
32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
35 static const char *const DataLayoutStringAMDGCN =
36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42 Global, // opencl_global
43 Local, // opencl_local
44 Constant, // opencl_constant
45 Private, // opencl_private
46 Generic, // opencl_generic
47 Global, // cuda_device
48 Constant, // cuda_constant
52 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
54 Global, // opencl_global
55 Local, // opencl_local
56 Constant, // opencl_constant
57 Private, // opencl_private
58 Generic, // opencl_generic
59 Global, // cuda_device
60 Constant, // cuda_constant
63 } // namespace targets
66 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
67 #define BUILTIN(ID, TYPE, ATTRS) \
68 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
69 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
70 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
71 #include "clang/Basic/BuiltinsAMDGPU.def"
74 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
75 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
76 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
77 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
78 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
79 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
80 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
81 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
82 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
83 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
84 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
85 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
86 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
87 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
88 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
89 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
90 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
91 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
92 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
93 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
94 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
95 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
96 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
97 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
98 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
99 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
100 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
101 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
102 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
103 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
104 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
105 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
106 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
107 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
108 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
109 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
110 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
111 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
112 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
113 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
114 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
115 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
116 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
117 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
118 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
119 "flat_scratch_lo", "flat_scratch_hi"
122 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
123 return llvm::makeArrayRef(GCCRegNames);
126 bool AMDGPUTargetInfo::initFeatureMap(
127 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
128 const std::vector<std::string> &FeatureVec) const {
130 using namespace llvm::AMDGPU;
132 // XXX - What does the member GPU mean if device name string passed here?
133 if (isAMDGCN(getTriple())) {
137 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
139 Features["dl-insts"] = true;
140 Features["dot-insts"] = true;
146 Features["gfx9-insts"] = true;
152 Features["vi-insts"] = true;
153 Features["16-bit-insts"] = true;
154 Features["dpp"] = true;
155 Features["s-memrealtime"] = true;
162 Features["ci-insts"] = true;
170 llvm_unreachable("Unhandled GPU!");
176 switch (llvm::AMDGPU::parseArchR600(CPU)) {
181 // TODO: Add fp64 when implemented.
197 llvm_unreachable("Unhandled GPU!");
201 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
204 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
205 TargetOptions &TargetOpts) const {
206 bool hasFP32Denormals = false;
207 bool hasFP64Denormals = false;
209 for (auto &I : TargetOpts.FeaturesAsWritten) {
210 if (I == "+fp32-denormals" || I == "-fp32-denormals")
211 hasFP32Denormals = true;
212 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
213 hasFP64Denormals = true;
215 if (!hasFP32Denormals)
216 TargetOpts.Features.push_back(
217 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
218 ? '+' : '-') + Twine("fp32-denormals"))
220 // Always do not flush fp64 or fp16 denorms.
221 if (!hasFP64Denormals && hasFP64())
222 TargetOpts.Features.push_back("+fp64-fp16-denormals");
225 void AMDGPUTargetInfo::fillValidCPUList(
226 SmallVectorImpl<StringRef> &Values) const {
227 if (isAMDGCN(getTriple()))
228 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
230 llvm::AMDGPU::fillValidArchListR600(Values);
233 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
234 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
237 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
238 const TargetOptions &Opts)
239 : TargetInfo(Triple),
240 GPUKind(isAMDGCN(Triple) ?
241 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
242 llvm::AMDGPU::parseArchR600(Opts.CPU)),
243 GPUFeatures(isAMDGCN(Triple) ?
244 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
245 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
246 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
247 : DataLayoutStringR600);
248 assert(DataLayout->getAllocaAddrSpace() == Private);
250 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
252 UseAddrSpaceMapMangling = true;
254 // Set pointer width and alignment for target address space 0.
255 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
256 if (getMaxPointerWidth() == 64) {
257 LongWidth = LongAlign = 64;
258 SizeType = UnsignedLong;
259 PtrDiffType = SignedLong;
260 IntPtrType = SignedLong;
263 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
266 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
267 TargetInfo::adjust(Opts);
268 // ToDo: There are still a few places using default address space as private
269 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
270 // can be removed from the following line.
271 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
272 !isAMDGCN(getTriple()));
275 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
276 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
277 Builtin::FirstTSBuiltin);
280 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
281 MacroBuilder &Builder) const {
282 Builder.defineMacro("__AMD__");
283 Builder.defineMacro("__AMDGPU__");
285 if (isAMDGCN(getTriple()))
286 Builder.defineMacro("__AMDGCN__");
288 Builder.defineMacro("__R600__");
290 if (GPUKind != llvm::AMDGPU::GK_NONE) {
291 StringRef CanonName = isAMDGCN(getTriple()) ?
292 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
293 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
296 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
297 // removed in the near future.
299 Builder.defineMacro("__HAS_FMAF__");
301 Builder.defineMacro("FP_FAST_FMAF");
303 Builder.defineMacro("__HAS_LDEXPF__");
305 Builder.defineMacro("__HAS_FP64__");
307 Builder.defineMacro("FP_FAST_FMA");