lib/Basic/Targets/AMDGPU.cpp

   1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements AMDGPU TargetInfo objects.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "AMDGPU.h"
  14 #include "clang/Basic/Builtins.h"
  15 #include "clang/Basic/CodeGenOptions.h"
  16 #include "clang/Basic/LangOptions.h"
  17 #include "clang/Basic/MacroBuilder.h"
  18 #include "clang/Basic/TargetBuiltins.h"
  19 #include "llvm/ADT/StringSwitch.h"
  20 #include "llvm/IR/DataLayout.h"
  21
  22 using namespace clang;
  23 using namespace clang::targets;
  24
  25 namespace clang {
  26 namespace targets {
  27
  28 // If you edit the description strings, make sure you update
  29 // getPointerWidthV().
  30
  31 static const char *const DataLayoutStringR600 =
  32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
  33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
  34
  35 static const char *const DataLayoutStringAMDGCN =
  36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
  37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
  38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
  39     "-ni:7";
  40
  41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
  42     Generic,  // Default
  43     Global,   // opencl_global
  44     Local,    // opencl_local
  45     Constant, // opencl_constant
  46     Private,  // opencl_private
  47     Generic,  // opencl_generic
  48     Global,   // cuda_device
  49     Constant, // cuda_constant
  50     Local     // cuda_shared
  51 };
  52
  53 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
  54     Private,  // Default
  55     Global,   // opencl_global
  56     Local,    // opencl_local
  57     Constant, // opencl_constant
  58     Private,  // opencl_private
  59     Generic,  // opencl_generic
  60     Global,   // cuda_device
  61     Constant, // cuda_constant
  62     Local     // cuda_shared
  63 };
  64 } // namespace targets
  65 } // namespace clang
  66
  67 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
  68 #define BUILTIN(ID, TYPE, ATTRS)                                               \
  69   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
  70 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
  71   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
  72 #include "clang/Basic/BuiltinsAMDGPU.def"
  73 };
  74
  75 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
  76   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
  77   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
  78   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
  79   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
  80   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
  81   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
  82   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
  83   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
  84   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
  85   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
  86   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
  87   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
  88   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
  89   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
  90   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
  91   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
  92   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
  93   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
  94   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
  95   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
  96   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
  97   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
  98   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
  99   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
 100   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
 101   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
 102   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
 103   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
 104   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
 105   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
 106   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
 107   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
 108   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
 109   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
 110   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
 111   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
 112   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
 113   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
 114   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
 115   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
 116   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
 117   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
 118   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
 119   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
 120   "flat_scratch_lo", "flat_scratch_hi"
 121 };
 122
 123 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
 124   return llvm::makeArrayRef(GCCRegNames);
 125 }
 126
 127 bool AMDGPUTargetInfo::initFeatureMap(
 128     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
 129     const std::vector<std::string> &FeatureVec) const {
 130
 131   using namespace llvm::AMDGPU;
 132
 133   // XXX - What does the member GPU mean if device name string passed here?
 134   if (isAMDGCN(getTriple())) {
 135     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
 136     case GK_GFX1012:
 137     case GK_GFX1011:
 138       Features["dot1-insts"] = true;
 139       Features["dot2-insts"] = true;
 140       Features["dot5-insts"] = true;
 141       Features["dot6-insts"] = true;
 142       LLVM_FALLTHROUGH;
 143     case GK_GFX1010:
 144       Features["dl-insts"] = true;
 145       Features["ci-insts"] = true;
 146       Features["flat-address-space"] = true;
 147       Features["16-bit-insts"] = true;
 148       Features["dpp"] = true;
 149       Features["gfx8-insts"] = true;
 150       Features["gfx9-insts"] = true;
 151       Features["gfx10-insts"] = true;
 152       Features["s-memrealtime"] = true;
 153       break;
 154     case GK_GFX908:
 155       Features["dot3-insts"] = true;
 156       Features["dot4-insts"] = true;
 157       Features["dot5-insts"] = true;
 158       Features["dot6-insts"] = true;
 159       LLVM_FALLTHROUGH;
 160     case GK_GFX906:
 161       Features["dl-insts"] = true;
 162       Features["dot1-insts"] = true;
 163       Features["dot2-insts"] = true;
 164       LLVM_FALLTHROUGH;
 165     case GK_GFX909:
 166     case GK_GFX904:
 167     case GK_GFX902:
 168     case GK_GFX900:
 169       Features["gfx9-insts"] = true;
 170       LLVM_FALLTHROUGH;
 171     case GK_GFX810:
 172     case GK_GFX803:
 173     case GK_GFX802:
 174     case GK_GFX801:
 175       Features["gfx8-insts"] = true;
 176       Features["16-bit-insts"] = true;
 177       Features["dpp"] = true;
 178       Features["s-memrealtime"] = true;
 179       LLVM_FALLTHROUGH;
 180     case GK_GFX704:
 181     case GK_GFX703:
 182     case GK_GFX702:
 183     case GK_GFX701:
 184     case GK_GFX700:
 185       Features["ci-insts"] = true;
 186       Features["flat-address-space"] = true;
 187       LLVM_FALLTHROUGH;
 188     case GK_GFX601:
 189     case GK_GFX600:
 190       break;
 191     case GK_NONE:
 192       break;
 193     default:
 194       llvm_unreachable("Unhandled GPU!");
 195     }
 196   } else {
 197     if (CPU.empty())
 198       CPU = "r600";
 199
 200     switch (llvm::AMDGPU::parseArchR600(CPU)) {
 201     case GK_CAYMAN:
 202     case GK_CYPRESS:
 203     case GK_RV770:
 204     case GK_RV670:
 205       // TODO: Add fp64 when implemented.
 206       break;
 207     case GK_TURKS:
 208     case GK_CAICOS:
 209     case GK_BARTS:
 210     case GK_SUMO:
 211     case GK_REDWOOD:
 212     case GK_JUNIPER:
 213     case GK_CEDAR:
 214     case GK_RV730:
 215     case GK_RV710:
 216     case GK_RS880:
 217     case GK_R630:
 218     case GK_R600:
 219       break;
 220     default:
 221       llvm_unreachable("Unhandled GPU!");
 222     }
 223   }
 224
 225   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
 226 }
 227
 228 void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
 229                                            TargetOptions &TargetOpts) const {
 230   bool hasFP32Denormals = false;
 231   bool hasFP64Denormals = false;
 232
 233   for (auto &I : TargetOpts.FeaturesAsWritten) {
 234     if (I == "+fp32-denormals" || I == "-fp32-denormals")
 235       hasFP32Denormals = true;
 236     if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
 237       hasFP64Denormals = true;
 238   }
 239   if (!hasFP32Denormals)
 240     TargetOpts.Features.push_back(
 241       (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
 242              ? '+' : '-') + Twine("fp32-denormals"))
 243             .str());
 244   // Always do not flush fp64 or fp16 denorms.
 245   if (!hasFP64Denormals && hasFP64())
 246     TargetOpts.Features.push_back("+fp64-fp16-denormals");
 247 }
 248
 249 void AMDGPUTargetInfo::fillValidCPUList(
 250     SmallVectorImpl<StringRef> &Values) const {
 251   if (isAMDGCN(getTriple()))
 252     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
 253   else
 254     llvm::AMDGPU::fillValidArchListR600(Values);
 255 }
 256
 257 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
 258   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
 259 }
 260
 261 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
 262                                    const TargetOptions &Opts)
 263     : TargetInfo(Triple),
 264       GPUKind(isAMDGCN(Triple) ?
 265               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
 266               llvm::AMDGPU::parseArchR600(Opts.CPU)),
 267       GPUFeatures(isAMDGCN(Triple) ?
 268                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
 269                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
 270   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
 271                                         : DataLayoutStringR600);
 272   assert(DataLayout->getAllocaAddrSpace() == Private);
 273
 274   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
 275                      !isAMDGCN(Triple));
 276   UseAddrSpaceMapMangling = true;
 277
 278   HasLegalHalfType = true;
 279   HasFloat16 = true;
 280
 281   // Set pointer width and alignment for target address space 0.
 282   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
 283   if (getMaxPointerWidth() == 64) {
 284     LongWidth = LongAlign = 64;
 285     SizeType = UnsignedLong;
 286     PtrDiffType = SignedLong;
 287     IntPtrType = SignedLong;
 288   }
 289
 290   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
 291 }
 292
 293 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
 294   TargetInfo::adjust(Opts);
 295   // ToDo: There are still a few places using default address space as private
 296   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
 297   // can be removed from the following line.
 298   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
 299                      !isAMDGCN(getTriple()));
 300 }
 301
 302 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
 303   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
 304                                              Builtin::FirstTSBuiltin);
 305 }
 306
 307 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
 308                                         MacroBuilder &Builder) const {
 309   Builder.defineMacro("__AMD__");
 310   Builder.defineMacro("__AMDGPU__");
 311
 312   if (isAMDGCN(getTriple()))
 313     Builder.defineMacro("__AMDGCN__");
 314   else
 315     Builder.defineMacro("__R600__");
 316
 317   if (GPUKind != llvm::AMDGPU::GK_NONE) {
 318     StringRef CanonName = isAMDGCN(getTriple()) ?
 319       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
 320     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
 321   }
 322
 323   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
 324   // removed in the near future.
 325   if (hasFMAF())
 326     Builder.defineMacro("__HAS_FMAF__");
 327   if (hasFastFMAF())
 328     Builder.defineMacro("FP_FAST_FMAF");
 329   if (hasLDEXPF())
 330     Builder.defineMacro("__HAS_LDEXPF__");
 331   if (hasFP64())
 332     Builder.defineMacro("__HAS_FP64__");
 333   if (hasFastFMA())
 334     Builder.defineMacro("FP_FAST_FMA");
 335 }
 336
 337 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
 338   assert(HalfFormat == Aux->HalfFormat);
 339   assert(FloatFormat == Aux->FloatFormat);
 340   assert(DoubleFormat == Aux->DoubleFormat);
 341
 342   // On x86_64 long double is 80-bit extended precision format, which is
 343   // not supported by AMDGPU. 128-bit floating point format is also not
 344   // supported by AMDGPU. Therefore keep its own format for these two types.
 345   auto SaveLongDoubleFormat = LongDoubleFormat;
 346   auto SaveFloat128Format = Float128Format;
 347   copyAuxTarget(Aux);
 348   LongDoubleFormat = SaveLongDoubleFormat;
 349   Float128Format = SaveFloat128Format;
 350 }