contrib/llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp

   1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements AMDGPU TargetInfo objects.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "AMDGPU.h"
  14 #include "clang/Basic/Builtins.h"
  15 #include "clang/Basic/CodeGenOptions.h"
  16 #include "clang/Basic/LangOptions.h"
  17 #include "clang/Basic/MacroBuilder.h"
  18 #include "clang/Basic/TargetBuiltins.h"
  19 #include "llvm/ADT/StringSwitch.h"
  20 #include "llvm/IR/DataLayout.h"
  21
  22 using namespace clang;
  23 using namespace clang::targets;
  24
  25 namespace clang {
  26 namespace targets {
  27
  28 // If you edit the description strings, make sure you update
  29 // getPointerWidthV().
  30
  31 static const char *const DataLayoutStringR600 =
  32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
  33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
  34
  35 static const char *const DataLayoutStringAMDGCN =
  36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
  37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
  38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
  39     "-ni:7";
  40
  41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
  42     Generic,  // Default
  43     Global,   // opencl_global
  44     Local,    // opencl_local
  45     Constant, // opencl_constant
  46     Private,  // opencl_private
  47     Generic,  // opencl_generic
  48     Global,   // cuda_device
  49     Constant, // cuda_constant
  50     Local,    // cuda_shared
  51     Generic,  // ptr32_sptr
  52     Generic,  // ptr32_uptr
  53     Generic   // ptr64
  54 };
  55
  56 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
  57     Private,  // Default
  58     Global,   // opencl_global
  59     Local,    // opencl_local
  60     Constant, // opencl_constant
  61     Private,  // opencl_private
  62     Generic,  // opencl_generic
  63     Global,   // cuda_device
  64     Constant, // cuda_constant
  65     Local,    // cuda_shared
  66     Generic,  // ptr32_sptr
  67     Generic,  // ptr32_uptr
  68     Generic   // ptr64
  69
  70 };
  71 } // namespace targets
  72 } // namespace clang
  73
  74 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
  75 #define BUILTIN(ID, TYPE, ATTRS)                                               \
  76   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
  77 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
  78   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
  79 #include "clang/Basic/BuiltinsAMDGPU.def"
  80 };
  81
  82 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
  83   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
  84   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
  85   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
  86   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
  87   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
  88   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
  89   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
  90   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
  91   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
  92   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
  93   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
  94   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
  95   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
  96   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
  97   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
  98   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
  99   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
 100   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
 101   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
 102   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
 103   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
 104   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
 105   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
 106   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
 107   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
 108   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
 109   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
 110   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
 111   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
 112   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
 113   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
 114   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
 115   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
 116   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
 117   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
 118   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
 119   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
 120   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
 121   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
 122   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
 123   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
 124   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
 125   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
 126   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
 127   "flat_scratch_lo", "flat_scratch_hi"
 128 };
 129
 130 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
 131   return llvm::makeArrayRef(GCCRegNames);
 132 }
 133
 134 bool AMDGPUTargetInfo::initFeatureMap(
 135     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
 136     const std::vector<std::string> &FeatureVec) const {
 137
 138   using namespace llvm::AMDGPU;
 139
 140   // XXX - What does the member GPU mean if device name string passed here?
 141   if (isAMDGCN(getTriple())) {
 142     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
 143     case GK_GFX1012:
 144     case GK_GFX1011:
 145       Features["dot1-insts"] = true;
 146       Features["dot2-insts"] = true;
 147       Features["dot5-insts"] = true;
 148       Features["dot6-insts"] = true;
 149       LLVM_FALLTHROUGH;
 150     case GK_GFX1010:
 151       Features["dl-insts"] = true;
 152       Features["ci-insts"] = true;
 153       Features["flat-address-space"] = true;
 154       Features["16-bit-insts"] = true;
 155       Features["dpp"] = true;
 156       Features["gfx8-insts"] = true;
 157       Features["gfx9-insts"] = true;
 158       Features["gfx10-insts"] = true;
 159       Features["s-memrealtime"] = true;
 160       break;
 161     case GK_GFX908:
 162       Features["dot3-insts"] = true;
 163       Features["dot4-insts"] = true;
 164       Features["dot5-insts"] = true;
 165       Features["dot6-insts"] = true;
 166       LLVM_FALLTHROUGH;
 167     case GK_GFX906:
 168       Features["dl-insts"] = true;
 169       Features["dot1-insts"] = true;
 170       Features["dot2-insts"] = true;
 171       LLVM_FALLTHROUGH;
 172     case GK_GFX909:
 173     case GK_GFX904:
 174     case GK_GFX902:
 175     case GK_GFX900:
 176       Features["gfx9-insts"] = true;
 177       LLVM_FALLTHROUGH;
 178     case GK_GFX810:
 179     case GK_GFX803:
 180     case GK_GFX802:
 181     case GK_GFX801:
 182       Features["gfx8-insts"] = true;
 183       Features["16-bit-insts"] = true;
 184       Features["dpp"] = true;
 185       Features["s-memrealtime"] = true;
 186       LLVM_FALLTHROUGH;
 187     case GK_GFX704:
 188     case GK_GFX703:
 189     case GK_GFX702:
 190     case GK_GFX701:
 191     case GK_GFX700:
 192       Features["ci-insts"] = true;
 193       Features["flat-address-space"] = true;
 194       LLVM_FALLTHROUGH;
 195     case GK_GFX601:
 196     case GK_GFX600:
 197       break;
 198     case GK_NONE:
 199       break;
 200     default:
 201       llvm_unreachable("Unhandled GPU!");
 202     }
 203   } else {
 204     if (CPU.empty())
 205       CPU = "r600";
 206
 207     switch (llvm::AMDGPU::parseArchR600(CPU)) {
 208     case GK_CAYMAN:
 209     case GK_CYPRESS:
 210     case GK_RV770:
 211     case GK_RV670:
 212       // TODO: Add fp64 when implemented.
 213       break;
 214     case GK_TURKS:
 215     case GK_CAICOS:
 216     case GK_BARTS:
 217     case GK_SUMO:
 218     case GK_REDWOOD:
 219     case GK_JUNIPER:
 220     case GK_CEDAR:
 221     case GK_RV730:
 222     case GK_RV710:
 223     case GK_RS880:
 224     case GK_R630:
 225     case GK_R600:
 226       break;
 227     default:
 228       llvm_unreachable("Unhandled GPU!");
 229     }
 230   }
 231
 232   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
 233 }
 234
 235 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
 236                                            TargetOptions &TargetOpts) const {
 237   bool hasFP32Denormals = false;
 238   bool hasFP64Denormals = false;
 239
 240   for (auto &I : TargetOpts.FeaturesAsWritten) {
 241     if (I == "+fp32-denormals" || I == "-fp32-denormals")
 242       hasFP32Denormals = true;
 243     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
 244       hasFP64Denormals = true;
 245   }
 246   if (!hasFP32Denormals)
 247     TargetOpts.Features.push_back(
 248       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
 249              ? '+' : '-') + Twine("fp32-denormals"))
 250             .str());
 251   // Always do not flush fp64 or fp16 denorms.
 252   if (!hasFP64Denormals && hasFP64())
 253     TargetOpts.Features.push_back("+fp64-fp16-denormals");
 254 }
 255
 256 void AMDGPUTargetInfo::fillValidCPUList(
 257     SmallVectorImpl<StringRef> &Values) const {
 258   if (isAMDGCN(getTriple()))
 259     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
 260   else
 261     llvm::AMDGPU::fillValidArchListR600(Values);
 262 }
 263
 264 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
 265   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
 266 }
 267
 268 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
 269                                    const TargetOptions &Opts)
 270     : TargetInfo(Triple),
 271       GPUKind(isAMDGCN(Triple) ?
 272               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
 273               llvm::AMDGPU::parseArchR600(Opts.CPU)),
 274       GPUFeatures(isAMDGCN(Triple) ?
 275                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
 276                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
 277   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
 278                                         : DataLayoutStringR600);
 279   assert(DataLayout->getAllocaAddrSpace() == Private);
 280
 281   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
 282                      !isAMDGCN(Triple));
 283   UseAddrSpaceMapMangling = true;
 284
 285   HasLegalHalfType = true;
 286   HasFloat16 = true;
 287
 288   // Set pointer width and alignment for target address space 0.
 289   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
 290   if (getMaxPointerWidth() == 64) {
 291     LongWidth = LongAlign = 64;
 292     SizeType = UnsignedLong;
 293     PtrDiffType = SignedLong;
 294     IntPtrType = SignedLong;
 295   }
 296
 297   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
 298 }
 299
 300 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
 301   TargetInfo::adjust(Opts);
 302   // ToDo: There are still a few places using default address space as private
 303   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
 304   // can be removed from the following line.
 305   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
 306                      !isAMDGCN(getTriple()));
 307 }
 308
 309 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
 310   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
 311                                              Builtin::FirstTSBuiltin);
 312 }
 313
 314 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
 315                                         MacroBuilder &Builder) const {
 316   Builder.defineMacro("__AMD__");
 317   Builder.defineMacro("__AMDGPU__");
 318
 319   if (isAMDGCN(getTriple()))
 320     Builder.defineMacro("__AMDGCN__");
 321   else
 322     Builder.defineMacro("__R600__");
 323
 324   if (GPUKind != llvm::AMDGPU::GK_NONE) {
 325     StringRef CanonName = isAMDGCN(getTriple()) ?
 326       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
 327     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
 328   }
 329
 330   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
 331   // removed in the near future.
 332   if (hasFMAF())
 333     Builder.defineMacro("__HAS_FMAF__");
 334   if (hasFastFMAF())
 335     Builder.defineMacro("FP_FAST_FMAF");
 336   if (hasLDEXPF())
 337     Builder.defineMacro("__HAS_LDEXPF__");
 338   if (hasFP64())
 339     Builder.defineMacro("__HAS_FP64__");
 340   if (hasFastFMA())
 341     Builder.defineMacro("FP_FAST_FMA");
 342 }
 343
 344 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
 345   assert(HalfFormat == Aux->HalfFormat);
 346   assert(FloatFormat == Aux->FloatFormat);
 347   assert(DoubleFormat == Aux->DoubleFormat);
 348
 349   // On x86_64 long double is 80-bit extended precision format, which is
 350   // not supported by AMDGPU. 128-bit floating point format is also not
 351   // supported by AMDGPU. Therefore keep its own format for these two types.
 352   auto SaveLongDoubleFormat = LongDoubleFormat;
 353   auto SaveFloat128Format = Float128Format;
 354   copyAuxTarget(Aux);
 355   LongDoubleFormat = SaveLongDoubleFormat;
 356   Float128Format = SaveFloat128Format;
 357 }