1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements AMDGPU TargetInfo objects.
11 //===----------------------------------------------------------------------===//
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
23 using namespace clang;
24 using namespace clang::targets;
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
32 static const char *const DataLayoutStringR600 =
33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
36 static const char *const DataLayoutStringAMDGCN =
37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
44 Global, // opencl_global
45 Local, // opencl_local
46 Constant, // opencl_constant
47 Private, // opencl_private
48 Generic, // opencl_generic
49 Global, // cuda_device
50 Constant, // cuda_constant
52 Generic, // ptr32_sptr
53 Generic, // ptr32_uptr
57 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
59 Global, // opencl_global
60 Local, // opencl_local
61 Constant, // opencl_constant
62 Private, // opencl_private
63 Generic, // opencl_generic
64 Global, // cuda_device
65 Constant, // cuda_constant
67 Generic, // ptr32_sptr
68 Generic, // ptr32_uptr
72 } // namespace targets
75 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
76 #define BUILTIN(ID, TYPE, ATTRS) \
77 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
78 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
79 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
80 #include "clang/Basic/BuiltinsAMDGPU.def"
83 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
84 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
85 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
86 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
87 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
88 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
89 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
90 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
91 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
92 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
93 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
94 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
95 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
96 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
97 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
98 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
99 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
100 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
101 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
102 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
103 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
104 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
105 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
106 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
107 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
108 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
109 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
110 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
111 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
112 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
113 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
114 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
115 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
116 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
117 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
118 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
119 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
120 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
121 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
122 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
123 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
124 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
125 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
126 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
127 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
128 "flat_scratch_lo", "flat_scratch_hi",
129 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
130 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
131 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
132 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
133 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
134 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
135 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
136 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
137 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
138 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
139 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
140 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
141 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
142 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
143 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
144 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
145 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
146 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
147 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
148 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
149 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
150 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
151 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
152 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
153 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
154 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
155 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
156 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
157 "a252", "a253", "a254", "a255"
160 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
161 return llvm::makeArrayRef(GCCRegNames);
164 bool AMDGPUTargetInfo::initFeatureMap(
165 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
166 const std::vector<std::string> &FeatureVec) const {
168 using namespace llvm::AMDGPU;
170 // XXX - What does the member GPU mean if device name string passed here?
171 if (isAMDGCN(getTriple())) {
172 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
174 Features["ci-insts"] = true;
175 Features["dot1-insts"] = true;
176 Features["dot2-insts"] = true;
177 Features["dot5-insts"] = true;
178 Features["dot6-insts"] = true;
179 Features["dl-insts"] = true;
180 Features["flat-address-space"] = true;
181 Features["16-bit-insts"] = true;
182 Features["dpp"] = true;
183 Features["gfx8-insts"] = true;
184 Features["gfx9-insts"] = true;
185 Features["gfx10-insts"] = true;
186 Features["gfx10-3-insts"] = true;
187 Features["s-memrealtime"] = true;
191 Features["dot1-insts"] = true;
192 Features["dot2-insts"] = true;
193 Features["dot5-insts"] = true;
194 Features["dot6-insts"] = true;
197 Features["dl-insts"] = true;
198 Features["ci-insts"] = true;
199 Features["flat-address-space"] = true;
200 Features["16-bit-insts"] = true;
201 Features["dpp"] = true;
202 Features["gfx8-insts"] = true;
203 Features["gfx9-insts"] = true;
204 Features["gfx10-insts"] = true;
205 Features["s-memrealtime"] = true;
208 Features["dot3-insts"] = true;
209 Features["dot4-insts"] = true;
210 Features["dot5-insts"] = true;
211 Features["dot6-insts"] = true;
212 Features["mai-insts"] = true;
215 Features["dl-insts"] = true;
216 Features["dot1-insts"] = true;
217 Features["dot2-insts"] = true;
223 Features["gfx9-insts"] = true;
229 Features["gfx8-insts"] = true;
230 Features["16-bit-insts"] = true;
231 Features["dpp"] = true;
232 Features["s-memrealtime"] = true;
239 Features["ci-insts"] = true;
240 Features["flat-address-space"] = true;
248 llvm_unreachable("Unhandled GPU!");
254 switch (llvm::AMDGPU::parseArchR600(CPU)) {
259 // TODO: Add fp64 when implemented.
275 llvm_unreachable("Unhandled GPU!");
279 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
282 void AMDGPUTargetInfo::fillValidCPUList(
283 SmallVectorImpl<StringRef> &Values) const {
284 if (isAMDGCN(getTriple()))
285 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
287 llvm::AMDGPU::fillValidArchListR600(Values);
290 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
291 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
294 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
295 const TargetOptions &Opts)
296 : TargetInfo(Triple),
297 GPUKind(isAMDGCN(Triple) ?
298 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
299 llvm::AMDGPU::parseArchR600(Opts.CPU)),
300 GPUFeatures(isAMDGCN(Triple) ?
301 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
302 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
303 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
304 : DataLayoutStringR600);
305 assert(DataLayout->getAllocaAddrSpace() == Private);
306 GridValues = llvm::omp::AMDGPUGpuGridValues;
308 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
310 UseAddrSpaceMapMangling = true;
312 HasLegalHalfType = true;
315 // Set pointer width and alignment for target address space 0.
316 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
317 if (getMaxPointerWidth() == 64) {
318 LongWidth = LongAlign = 64;
319 SizeType = UnsignedLong;
320 PtrDiffType = SignedLong;
321 IntPtrType = SignedLong;
324 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
327 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
328 TargetInfo::adjust(Opts);
329 // ToDo: There are still a few places using default address space as private
330 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
331 // can be removed from the following line.
332 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
333 !isAMDGCN(getTriple()));
336 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
337 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
338 Builtin::FirstTSBuiltin);
341 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
342 MacroBuilder &Builder) const {
343 Builder.defineMacro("__AMD__");
344 Builder.defineMacro("__AMDGPU__");
346 if (isAMDGCN(getTriple()))
347 Builder.defineMacro("__AMDGCN__");
349 Builder.defineMacro("__R600__");
351 if (GPUKind != llvm::AMDGPU::GK_NONE) {
352 StringRef CanonName = isAMDGCN(getTriple()) ?
353 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
354 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
357 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
358 // removed in the near future.
360 Builder.defineMacro("__HAS_FMAF__");
362 Builder.defineMacro("FP_FAST_FMAF");
364 Builder.defineMacro("__HAS_LDEXPF__");
366 Builder.defineMacro("__HAS_FP64__");
368 Builder.defineMacro("FP_FAST_FMA");
371 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
372 assert(HalfFormat == Aux->HalfFormat);
373 assert(FloatFormat == Aux->FloatFormat);
374 assert(DoubleFormat == Aux->DoubleFormat);
376 // On x86_64 long double is 80-bit extended precision format, which is
377 // not supported by AMDGPU. 128-bit floating point format is also not
378 // supported by AMDGPU. Therefore keep its own format for these two types.
379 auto SaveLongDoubleFormat = LongDoubleFormat;
380 auto SaveFloat128Format = Float128Format;
382 LongDoubleFormat = SaveLongDoubleFormat;
383 Float128Format = SaveFloat128Format;
384 // For certain builtin types support on the host target, claim they are
385 // support to pass the compilation of the host code during the device-side
387 // FIXME: As the side effect, we also accept `__float128` uses in the device
388 // code. To rejct these builtin types supported in the host target but not in
389 // the device target, one approach would support `device_builtin` attribute
390 // so that we could tell the device builtin types from the host ones. The
391 // also solves the different representations of the same builtin type, such
392 // as `size_t` in the MSVC environment.
393 if (Aux->hasFloat128Type()) {
395 Float128Format = DoubleFormat;