1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements AMDGPU TargetInfo objects.
11 //===----------------------------------------------------------------------===//
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/IR/DataLayout.h"
22 using namespace clang;
23 using namespace clang::targets;
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
31 static const char *const DataLayoutStringR600 =
32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
35 static const char *const DataLayoutStringAMDGCN =
36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43 Global, // opencl_global
44 Local, // opencl_local
45 Constant, // opencl_constant
46 Private, // opencl_private
47 Generic, // opencl_generic
48 Global, // cuda_device
49 Constant, // cuda_constant
53 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
55 Global, // opencl_global
56 Local, // opencl_local
57 Constant, // opencl_constant
58 Private, // opencl_private
59 Generic, // opencl_generic
60 Global, // cuda_device
61 Constant, // cuda_constant
64 } // namespace targets
67 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
68 #define BUILTIN(ID, TYPE, ATTRS) \
69 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
70 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
71 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
72 #include "clang/Basic/BuiltinsAMDGPU.def"
75 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
76 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
77 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
78 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
79 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
80 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
81 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
82 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
83 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
84 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
85 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
86 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
87 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
88 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
89 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
90 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
91 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
92 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
93 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
94 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
95 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
96 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
97 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
98 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
99 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
100 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
101 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
102 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
103 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
104 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
105 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
106 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
107 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
108 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
109 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
110 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
111 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
112 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
113 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
114 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
115 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
116 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
117 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
118 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
119 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
120 "flat_scratch_lo", "flat_scratch_hi"
123 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
124 return llvm::makeArrayRef(GCCRegNames);
127 bool AMDGPUTargetInfo::initFeatureMap(
128 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
129 const std::vector<std::string> &FeatureVec) const {
131 using namespace llvm::AMDGPU;
133 // XXX - What does the member GPU mean if device name string passed here?
134 if (isAMDGCN(getTriple())) {
135 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
138 Features["dot1-insts"] = true;
139 Features["dot2-insts"] = true;
140 Features["dot5-insts"] = true;
141 Features["dot6-insts"] = true;
144 Features["dl-insts"] = true;
145 Features["ci-insts"] = true;
146 Features["flat-address-space"] = true;
147 Features["16-bit-insts"] = true;
148 Features["dpp"] = true;
149 Features["gfx8-insts"] = true;
150 Features["gfx9-insts"] = true;
151 Features["gfx10-insts"] = true;
152 Features["s-memrealtime"] = true;
155 Features["dot3-insts"] = true;
156 Features["dot4-insts"] = true;
157 Features["dot5-insts"] = true;
158 Features["dot6-insts"] = true;
161 Features["dl-insts"] = true;
162 Features["dot1-insts"] = true;
163 Features["dot2-insts"] = true;
169 Features["gfx9-insts"] = true;
175 Features["gfx8-insts"] = true;
176 Features["16-bit-insts"] = true;
177 Features["dpp"] = true;
178 Features["s-memrealtime"] = true;
185 Features["ci-insts"] = true;
186 Features["flat-address-space"] = true;
194 llvm_unreachable("Unhandled GPU!");
200 switch (llvm::AMDGPU::parseArchR600(CPU)) {
205 // TODO: Add fp64 when implemented.
221 llvm_unreachable("Unhandled GPU!");
225 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
228 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
229 TargetOptions &TargetOpts) const {
230 bool hasFP32Denormals = false;
231 bool hasFP64Denormals = false;
233 for (auto &I : TargetOpts.FeaturesAsWritten) {
234 if (I == "+fp32-denormals" || I == "-fp32-denormals")
235 hasFP32Denormals = true;
236 if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
237 hasFP64Denormals = true;
239 if (!hasFP32Denormals)
240 TargetOpts.Features.push_back(
241 (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
242 ? '+' : '-') + Twine("fp32-denormals"))
244 // Always do not flush fp64 or fp16 denorms.
245 if (!hasFP64Denormals && hasFP64())
246 TargetOpts.Features.push_back("+fp64-fp16-denormals");
249 void AMDGPUTargetInfo::fillValidCPUList(
250 SmallVectorImpl<StringRef> &Values) const {
251 if (isAMDGCN(getTriple()))
252 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
254 llvm::AMDGPU::fillValidArchListR600(Values);
257 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
258 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
261 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
262 const TargetOptions &Opts)
263 : TargetInfo(Triple),
264 GPUKind(isAMDGCN(Triple) ?
265 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
266 llvm::AMDGPU::parseArchR600(Opts.CPU)),
267 GPUFeatures(isAMDGCN(Triple) ?
268 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
269 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
270 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
271 : DataLayoutStringR600);
272 assert(DataLayout->getAllocaAddrSpace() == Private);
274 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
276 UseAddrSpaceMapMangling = true;
278 HasLegalHalfType = true;
281 // Set pointer width and alignment for target address space 0.
282 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
283 if (getMaxPointerWidth() == 64) {
284 LongWidth = LongAlign = 64;
285 SizeType = UnsignedLong;
286 PtrDiffType = SignedLong;
287 IntPtrType = SignedLong;
290 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
293 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
294 TargetInfo::adjust(Opts);
295 // ToDo: There are still a few places using default address space as private
296 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
297 // can be removed from the following line.
298 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
299 !isAMDGCN(getTriple()));
302 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
303 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
304 Builtin::FirstTSBuiltin);
307 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
308 MacroBuilder &Builder) const {
309 Builder.defineMacro("__AMD__");
310 Builder.defineMacro("__AMDGPU__");
312 if (isAMDGCN(getTriple()))
313 Builder.defineMacro("__AMDGCN__");
315 Builder.defineMacro("__R600__");
317 if (GPUKind != llvm::AMDGPU::GK_NONE) {
318 StringRef CanonName = isAMDGCN(getTriple()) ?
319 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
320 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
323 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
324 // removed in the near future.
326 Builder.defineMacro("__HAS_FMAF__");
328 Builder.defineMacro("FP_FAST_FMAF");
330 Builder.defineMacro("__HAS_LDEXPF__");
332 Builder.defineMacro("__HAS_FP64__");
334 Builder.defineMacro("FP_FAST_FMA");
337 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
338 assert(HalfFormat == Aux->HalfFormat);
339 assert(FloatFormat == Aux->FloatFormat);
340 assert(DoubleFormat == Aux->DoubleFormat);
342 // On x86_64 long double is 80-bit extended precision format, which is
343 // not supported by AMDGPU. 128-bit floating point format is also not
344 // supported by AMDGPU. Therefore keep its own format for these two types.
345 auto SaveLongDoubleFormat = LongDoubleFormat;
346 auto SaveFloat128Format = Float128Format;
348 LongDoubleFormat = SaveLongDoubleFormat;
349 Float128Format = SaveFloat128Format;