1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements AMDGPU TargetInfo objects.
11 //===----------------------------------------------------------------------===//
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/IR/DataLayout.h"
22 using namespace clang;
23 using namespace clang::targets;
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
31 static const char *const DataLayoutStringR600 =
32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
35 static const char *const DataLayoutStringAMDGCN =
36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43 Global, // opencl_global
44 Local, // opencl_local
45 Constant, // opencl_constant
46 Private, // opencl_private
47 Generic, // opencl_generic
48 Global, // cuda_device
49 Constant, // cuda_constant
51 Generic, // ptr32_sptr
52 Generic, // ptr32_uptr
56 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
58 Global, // opencl_global
59 Local, // opencl_local
60 Constant, // opencl_constant
61 Private, // opencl_private
62 Generic, // opencl_generic
63 Global, // cuda_device
64 Constant, // cuda_constant
66 Generic, // ptr32_sptr
67 Generic, // ptr32_uptr
71 } // namespace targets
74 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
75 #define BUILTIN(ID, TYPE, ATTRS) \
76 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
77 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
78 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
79 #include "clang/Basic/BuiltinsAMDGPU.def"
82 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
83 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
84 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
85 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
86 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
87 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
88 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
89 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
90 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
91 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
92 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
93 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
94 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
95 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
96 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
97 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
98 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
99 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
100 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
101 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
102 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
103 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
104 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
105 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
106 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
107 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
108 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
109 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
110 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
111 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
112 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
113 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
114 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
115 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
116 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
117 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
118 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
119 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
120 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
121 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
122 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
123 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
124 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
125 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
126 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
127 "flat_scratch_lo", "flat_scratch_hi"
130 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
131 return llvm::makeArrayRef(GCCRegNames);
134 bool AMDGPUTargetInfo::initFeatureMap(
135 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
136 const std::vector<std::string> &FeatureVec) const {
138 using namespace llvm::AMDGPU;
140 // XXX - What does the member GPU mean if device name string passed here?
141 if (isAMDGCN(getTriple())) {
142 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
145 Features["dot1-insts"] = true;
146 Features["dot2-insts"] = true;
147 Features["dot5-insts"] = true;
148 Features["dot6-insts"] = true;
151 Features["dl-insts"] = true;
152 Features["ci-insts"] = true;
153 Features["flat-address-space"] = true;
154 Features["16-bit-insts"] = true;
155 Features["dpp"] = true;
156 Features["gfx8-insts"] = true;
157 Features["gfx9-insts"] = true;
158 Features["gfx10-insts"] = true;
159 Features["s-memrealtime"] = true;
162 Features["dot3-insts"] = true;
163 Features["dot4-insts"] = true;
164 Features["dot5-insts"] = true;
165 Features["dot6-insts"] = true;
168 Features["dl-insts"] = true;
169 Features["dot1-insts"] = true;
170 Features["dot2-insts"] = true;
176 Features["gfx9-insts"] = true;
182 Features["gfx8-insts"] = true;
183 Features["16-bit-insts"] = true;
184 Features["dpp"] = true;
185 Features["s-memrealtime"] = true;
192 Features["ci-insts"] = true;
193 Features["flat-address-space"] = true;
201 llvm_unreachable("Unhandled GPU!");
207 switch (llvm::AMDGPU::parseArchR600(CPU)) {
212 // TODO: Add fp64 when implemented.
228 llvm_unreachable("Unhandled GPU!");
232 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
235 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
236 TargetOptions &TargetOpts) const {
237 bool hasFP32Denormals = false;
238 bool hasFP64Denormals = false;
240 for (auto &I : TargetOpts.FeaturesAsWritten) {
241 if (I == "+fp32-denormals" || I == "-fp32-denormals")
242 hasFP32Denormals = true;
243 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
244 hasFP64Denormals = true;
246 if (!hasFP32Denormals)
247 TargetOpts.Features.push_back(
248 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
249 ? '+' : '-') + Twine("fp32-denormals"))
251 // Always do not flush fp64 or fp16 denorms.
252 if (!hasFP64Denormals && hasFP64())
253 TargetOpts.Features.push_back("+fp64-fp16-denormals");
256 void AMDGPUTargetInfo::fillValidCPUList(
257 SmallVectorImpl<StringRef> &Values) const {
258 if (isAMDGCN(getTriple()))
259 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
261 llvm::AMDGPU::fillValidArchListR600(Values);
264 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
265 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
268 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
269 const TargetOptions &Opts)
270 : TargetInfo(Triple),
271 GPUKind(isAMDGCN(Triple) ?
272 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
273 llvm::AMDGPU::parseArchR600(Opts.CPU)),
274 GPUFeatures(isAMDGCN(Triple) ?
275 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
276 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
277 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
278 : DataLayoutStringR600);
279 assert(DataLayout->getAllocaAddrSpace() == Private);
281 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
283 UseAddrSpaceMapMangling = true;
285 HasLegalHalfType = true;
288 // Set pointer width and alignment for target address space 0.
289 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
290 if (getMaxPointerWidth() == 64) {
291 LongWidth = LongAlign = 64;
292 SizeType = UnsignedLong;
293 PtrDiffType = SignedLong;
294 IntPtrType = SignedLong;
297 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
300 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
301 TargetInfo::adjust(Opts);
302 // ToDo: There are still a few places using default address space as private
303 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
304 // can be removed from the following line.
305 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
306 !isAMDGCN(getTriple()));
309 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
310 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
311 Builtin::FirstTSBuiltin);
314 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
315 MacroBuilder &Builder) const {
316 Builder.defineMacro("__AMD__");
317 Builder.defineMacro("__AMDGPU__");
319 if (isAMDGCN(getTriple()))
320 Builder.defineMacro("__AMDGCN__");
322 Builder.defineMacro("__R600__");
324 if (GPUKind != llvm::AMDGPU::GK_NONE) {
325 StringRef CanonName = isAMDGCN(getTriple()) ?
326 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
327 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
330 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
331 // removed in the near future.
333 Builder.defineMacro("__HAS_FMAF__");
335 Builder.defineMacro("FP_FAST_FMAF");
337 Builder.defineMacro("__HAS_LDEXPF__");
339 Builder.defineMacro("__HAS_FP64__");
341 Builder.defineMacro("FP_FAST_FMA");
344 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
345 assert(HalfFormat == Aux->HalfFormat);
346 assert(FloatFormat == Aux->FloatFormat);
347 assert(DoubleFormat == Aux->DoubleFormat);
349 // On x86_64 long double is 80-bit extended precision format, which is
350 // not supported by AMDGPU. 128-bit floating point format is also not
351 // supported by AMDGPU. Therefore keep its own format for these two types.
352 auto SaveLongDoubleFormat = LongDoubleFormat;
353 auto SaveFloat128Format = Float128Format;
355 LongDoubleFormat = SaveLongDoubleFormat;
356 Float128Format = SaveFloat128Format;