1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// AMDGPU HSA Metadata Streamer.
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUHSAMetadataStreamer.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25 const DataLayout &DL) {
26 Type *Ty = Arg.getType();
28 if (Arg.hasByRefAttr()) {
29 Ty = Arg.getParamByRefType();
30 ArgAlign = Arg.getParamAlign();
34 ArgAlign = DL.getABITypeAlign(Ty);
36 return std::pair(Ty, *ArgAlign);
41 static cl::opt<bool> DumpHSAMetadata(
42 "amdgpu-dump-hsa-metadata",
43 cl::desc("Dump AMDGPU HSA Metadata"));
44 static cl::opt<bool> VerifyHSAMetadata(
45 "amdgpu-verify-hsa-metadata",
46 cl::desc("Verify AMDGPU HSA Metadata"));
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV2
53 //===----------------------------------------------------------------------===//
54 void MetadataStreamerYamlV2::dump(StringRef HSAMetadataString) const {
55 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
58 void MetadataStreamerYamlV2::verify(StringRef HSAMetadataString) const {
59 errs() << "AMDGPU HSA Metadata Parser Test: ";
61 HSAMD::Metadata FromHSAMetadataString;
62 if (fromString(HSAMetadataString, FromHSAMetadataString)) {
67 std::string ToHSAMetadataString;
68 if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
73 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
75 if (HSAMetadataString != ToHSAMetadataString) {
76 errs() << "Original input: " << HSAMetadataString << '\n'
77 << "Produced output: " << ToHSAMetadataString << '\n';
82 MetadataStreamerYamlV2::getAccessQualifier(StringRef AccQual) const {
84 return AccessQualifier::Unknown;
86 return StringSwitch<AccessQualifier>(AccQual)
87 .Case("read_only", AccessQualifier::ReadOnly)
88 .Case("write_only", AccessQualifier::WriteOnly)
89 .Case("read_write", AccessQualifier::ReadWrite)
90 .Default(AccessQualifier::Default);
94 MetadataStreamerYamlV2::getAddressSpaceQualifier(unsigned AddressSpace) const {
95 switch (AddressSpace) {
96 case AMDGPUAS::PRIVATE_ADDRESS:
97 return AddressSpaceQualifier::Private;
98 case AMDGPUAS::GLOBAL_ADDRESS:
99 return AddressSpaceQualifier::Global;
100 case AMDGPUAS::CONSTANT_ADDRESS:
101 return AddressSpaceQualifier::Constant;
102 case AMDGPUAS::LOCAL_ADDRESS:
103 return AddressSpaceQualifier::Local;
104 case AMDGPUAS::FLAT_ADDRESS:
105 return AddressSpaceQualifier::Generic;
106 case AMDGPUAS::REGION_ADDRESS:
107 return AddressSpaceQualifier::Region;
109 return AddressSpaceQualifier::Unknown;
113 ValueKind MetadataStreamerYamlV2::getValueKind(Type *Ty, StringRef TypeQual,
114 StringRef BaseTypeName) const {
115 if (TypeQual.contains("pipe"))
116 return ValueKind::Pipe;
118 return StringSwitch<ValueKind>(BaseTypeName)
119 .Case("image1d_t", ValueKind::Image)
120 .Case("image1d_array_t", ValueKind::Image)
121 .Case("image1d_buffer_t", ValueKind::Image)
122 .Case("image2d_t", ValueKind::Image)
123 .Case("image2d_array_t", ValueKind::Image)
124 .Case("image2d_array_depth_t", ValueKind::Image)
125 .Case("image2d_array_msaa_t", ValueKind::Image)
126 .Case("image2d_array_msaa_depth_t", ValueKind::Image)
127 .Case("image2d_depth_t", ValueKind::Image)
128 .Case("image2d_msaa_t", ValueKind::Image)
129 .Case("image2d_msaa_depth_t", ValueKind::Image)
130 .Case("image3d_t", ValueKind::Image)
131 .Case("sampler_t", ValueKind::Sampler)
132 .Case("queue_t", ValueKind::Queue)
133 .Default(isa<PointerType>(Ty) ?
134 (Ty->getPointerAddressSpace() ==
135 AMDGPUAS::LOCAL_ADDRESS ?
136 ValueKind::DynamicSharedPointer :
137 ValueKind::GlobalBuffer) :
141 std::string MetadataStreamerYamlV2::getTypeName(Type *Ty, bool Signed) const {
142 switch (Ty->getTypeID()) {
143 case Type::IntegerTyID: {
145 return (Twine('u') + getTypeName(Ty, true)).str();
147 auto BitWidth = Ty->getIntegerBitWidth();
158 return (Twine('i') + Twine(BitWidth)).str();
163 case Type::FloatTyID:
165 case Type::DoubleTyID:
167 case Type::FixedVectorTyID: {
168 auto VecTy = cast<FixedVectorType>(Ty);
169 auto ElTy = VecTy->getElementType();
170 auto NumElements = VecTy->getNumElements();
171 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
178 std::vector<uint32_t>
179 MetadataStreamerYamlV2::getWorkGroupDimensions(MDNode *Node) const {
180 std::vector<uint32_t> Dims;
181 if (Node->getNumOperands() != 3)
184 for (auto &Op : Node->operands())
185 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
189 Kernel::CodeProps::Metadata MetadataStreamerYamlV2::getHSACodeProps(
190 const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
191 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
192 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
193 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
194 const Function &F = MF.getFunction();
196 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
197 F.getCallingConv() == CallingConv::SPIR_KERNEL);
199 Align MaxKernArgAlign;
200 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
202 HSACodeProps.mKernargSegmentAlign =
203 std::max(MaxKernArgAlign, Align(4)).value();
205 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
206 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
207 HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
208 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
209 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
210 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
211 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
212 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
213 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
214 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
219 Kernel::DebugProps::Metadata MetadataStreamerYamlV2::getHSADebugProps(
220 const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
221 return HSAMD::Kernel::DebugProps::Metadata();
224 void MetadataStreamerYamlV2::emitVersion() {
225 auto &Version = HSAMetadata.mVersion;
227 Version.push_back(VersionMajorV2);
228 Version.push_back(VersionMinorV2);
231 void MetadataStreamerYamlV2::emitPrintf(const Module &Mod) {
232 auto &Printf = HSAMetadata.mPrintf;
234 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
238 for (auto *Op : Node->operands())
239 if (Op->getNumOperands())
241 std::string(cast<MDString>(Op->getOperand(0))->getString()));
244 void MetadataStreamerYamlV2::emitKernelLanguage(const Function &Func) {
245 auto &Kernel = HSAMetadata.mKernels.back();
247 // TODO: What about other languages?
248 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
249 if (!Node || !Node->getNumOperands())
251 auto Op0 = Node->getOperand(0);
252 if (Op0->getNumOperands() <= 1)
255 Kernel.mLanguage = "OpenCL C";
256 Kernel.mLanguageVersion.push_back(
257 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
258 Kernel.mLanguageVersion.push_back(
259 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
262 void MetadataStreamerYamlV2::emitKernelAttrs(const Function &Func) {
263 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
265 if (auto Node = Func.getMetadata("reqd_work_group_size"))
266 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
267 if (auto Node = Func.getMetadata("work_group_size_hint"))
268 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
269 if (auto Node = Func.getMetadata("vec_type_hint")) {
270 Attrs.mVecTypeHint = getTypeName(
271 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
272 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
274 if (Func.hasFnAttribute("runtime-handle")) {
275 Attrs.mRuntimeHandle =
276 Func.getFnAttribute("runtime-handle").getValueAsString().str();
280 void MetadataStreamerYamlV2::emitKernelArgs(const Function &Func,
281 const GCNSubtarget &ST) {
282 for (auto &Arg : Func.args())
285 emitHiddenKernelArgs(Func, ST);
288 void MetadataStreamerYamlV2::emitKernelArg(const Argument &Arg) {
289 auto Func = Arg.getParent();
290 auto ArgNo = Arg.getArgNo();
294 Node = Func->getMetadata("kernel_arg_name");
295 if (Node && ArgNo < Node->getNumOperands())
296 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
297 else if (Arg.hasName())
298 Name = Arg.getName();
301 Node = Func->getMetadata("kernel_arg_type");
302 if (Node && ArgNo < Node->getNumOperands())
303 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
305 StringRef BaseTypeName;
306 Node = Func->getMetadata("kernel_arg_base_type");
307 if (Node && ArgNo < Node->getNumOperands())
308 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
311 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
312 Arg.hasNoAliasAttr()) {
313 AccQual = "read_only";
315 Node = Func->getMetadata("kernel_arg_access_qual");
316 if (Node && ArgNo < Node->getNumOperands())
317 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
321 Node = Func->getMetadata("kernel_arg_type_qual");
322 if (Node && ArgNo < Node->getNumOperands())
323 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
325 const DataLayout &DL = Func->getParent()->getDataLayout();
327 MaybeAlign PointeeAlign;
328 if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
329 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
330 // FIXME: Should report this for all address spaces
331 PointeeAlign = Arg.getParamAlign().valueOrOne();
337 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
339 emitKernelArg(DL, ArgTy, ArgAlign,
340 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
341 TypeName, BaseTypeName, AccQual, TypeQual);
344 void MetadataStreamerYamlV2::emitKernelArg(
345 const DataLayout &DL, Type *Ty, Align Alignment, ValueKind ValueKind,
346 MaybeAlign PointeeAlign, StringRef Name, StringRef TypeName,
347 StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) {
348 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
349 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
351 Arg.mName = std::string(Name);
352 Arg.mTypeName = std::string(TypeName);
353 Arg.mSize = DL.getTypeAllocSize(Ty);
354 Arg.mAlign = Alignment.value();
355 Arg.mValueKind = ValueKind;
356 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
358 if (auto PtrTy = dyn_cast<PointerType>(Ty))
359 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
361 Arg.mAccQual = getAccessQualifier(AccQual);
363 // TODO: Emit Arg.mActualAccQual.
365 SmallVector<StringRef, 1> SplitTypeQuals;
366 TypeQual.split(SplitTypeQuals, " ", -1, false);
367 for (StringRef Key : SplitTypeQuals) {
368 auto P = StringSwitch<bool*>(Key)
369 .Case("const", &Arg.mIsConst)
370 .Case("restrict", &Arg.mIsRestrict)
371 .Case("volatile", &Arg.mIsVolatile)
372 .Case("pipe", &Arg.mIsPipe)
379 void MetadataStreamerYamlV2::emitHiddenKernelArgs(const Function &Func,
380 const GCNSubtarget &ST) {
381 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
382 if (!HiddenArgNumBytes)
385 auto &DL = Func.getParent()->getDataLayout();
386 auto Int64Ty = Type::getInt64Ty(Func.getContext());
388 if (HiddenArgNumBytes >= 8)
389 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
390 if (HiddenArgNumBytes >= 16)
391 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
392 if (HiddenArgNumBytes >= 24)
393 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
395 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
396 AMDGPUAS::GLOBAL_ADDRESS);
398 if (HiddenArgNumBytes >= 32) {
399 // We forbid the use of features requiring hostcall when compiling OpenCL
400 // before code object V5, which makes the mutual exclusion between the
401 // "printf buffer" and "hostcall buffer" here sound.
402 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
403 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
404 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
405 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
407 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
410 // Emit "default queue" and "completion action" arguments if enqueue kernel is
411 // used, otherwise emit dummy "none" arguments.
412 if (HiddenArgNumBytes >= 40) {
413 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
414 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
416 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
420 if (HiddenArgNumBytes >= 48) {
421 if (!Func.hasFnAttribute("amdgpu-no-completion-action") &&
422 // FIXME: Hack for runtime bug if we fail to optimize this out
423 Func.hasFnAttribute("calls-enqueue-kernel")) {
424 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
426 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
430 // Emit the pointer argument for multi-grid object.
431 if (HiddenArgNumBytes >= 56) {
432 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg"))
433 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
435 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
439 bool MetadataStreamerYamlV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
440 return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
443 void MetadataStreamerYamlV2::begin(const Module &Mod,
444 const IsaInfo::AMDGPUTargetID &TargetID) {
449 void MetadataStreamerYamlV2::end() {
450 std::string HSAMetadataString;
451 if (toString(HSAMetadata, HSAMetadataString))
455 dump(HSAMetadataString);
456 if (VerifyHSAMetadata)
457 verify(HSAMetadataString);
460 void MetadataStreamerYamlV2::emitKernel(const MachineFunction &MF,
461 const SIProgramInfo &ProgramInfo) {
462 auto &Func = MF.getFunction();
463 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
466 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
467 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
469 HSAMetadata.mKernels.push_back(Kernel::Metadata());
470 auto &Kernel = HSAMetadata.mKernels.back();
472 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
473 Kernel.mName = std::string(Func.getName());
474 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
475 emitKernelLanguage(Func);
476 emitKernelAttrs(Func);
477 emitKernelArgs(Func, ST);
478 HSAMetadata.mKernels.back().mCodeProps = CodeProps;
479 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
482 //===----------------------------------------------------------------------===//
483 // HSAMetadataStreamerV3
484 //===----------------------------------------------------------------------===//
486 void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const {
487 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
490 void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const {
491 errs() << "AMDGPU HSA Metadata Parser Test: ";
493 msgpack::Document FromHSAMetadataString;
495 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
500 std::string ToHSAMetadataString;
501 raw_string_ostream StrOS(ToHSAMetadataString);
502 FromHSAMetadataString.toYAML(StrOS);
504 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
505 if (HSAMetadataString != ToHSAMetadataString) {
506 errs() << "Original input: " << HSAMetadataString << '\n'
507 << "Produced output: " << StrOS.str() << '\n';
511 std::optional<StringRef>
512 MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const {
513 return StringSwitch<std::optional<StringRef>>(AccQual)
514 .Case("read_only", StringRef("read_only"))
515 .Case("write_only", StringRef("write_only"))
516 .Case("read_write", StringRef("read_write"))
517 .Default(std::nullopt);
520 std::optional<StringRef> MetadataStreamerMsgPackV3::getAddressSpaceQualifier(
521 unsigned AddressSpace) const {
522 switch (AddressSpace) {
523 case AMDGPUAS::PRIVATE_ADDRESS:
524 return StringRef("private");
525 case AMDGPUAS::GLOBAL_ADDRESS:
526 return StringRef("global");
527 case AMDGPUAS::CONSTANT_ADDRESS:
528 return StringRef("constant");
529 case AMDGPUAS::LOCAL_ADDRESS:
530 return StringRef("local");
531 case AMDGPUAS::FLAT_ADDRESS:
532 return StringRef("generic");
533 case AMDGPUAS::REGION_ADDRESS:
534 return StringRef("region");
541 MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual,
542 StringRef BaseTypeName) const {
543 if (TypeQual.contains("pipe"))
546 return StringSwitch<StringRef>(BaseTypeName)
547 .Case("image1d_t", "image")
548 .Case("image1d_array_t", "image")
549 .Case("image1d_buffer_t", "image")
550 .Case("image2d_t", "image")
551 .Case("image2d_array_t", "image")
552 .Case("image2d_array_depth_t", "image")
553 .Case("image2d_array_msaa_t", "image")
554 .Case("image2d_array_msaa_depth_t", "image")
555 .Case("image2d_depth_t", "image")
556 .Case("image2d_msaa_t", "image")
557 .Case("image2d_msaa_depth_t", "image")
558 .Case("image3d_t", "image")
559 .Case("sampler_t", "sampler")
560 .Case("queue_t", "queue")
561 .Default(isa<PointerType>(Ty)
562 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
563 ? "dynamic_shared_pointer"
568 std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty,
570 switch (Ty->getTypeID()) {
571 case Type::IntegerTyID: {
573 return (Twine('u') + getTypeName(Ty, true)).str();
575 auto BitWidth = Ty->getIntegerBitWidth();
586 return (Twine('i') + Twine(BitWidth)).str();
591 case Type::FloatTyID:
593 case Type::DoubleTyID:
595 case Type::FixedVectorTyID: {
596 auto VecTy = cast<FixedVectorType>(Ty);
597 auto ElTy = VecTy->getElementType();
598 auto NumElements = VecTy->getNumElements();
599 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
606 msgpack::ArrayDocNode
607 MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const {
608 auto Dims = HSAMetadataDoc->getArrayNode();
609 if (Node->getNumOperands() != 3)
612 for (auto &Op : Node->operands())
613 Dims.push_back(Dims.getDocument()->getNode(
614 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
618 void MetadataStreamerMsgPackV3::emitVersion() {
619 auto Version = HSAMetadataDoc->getArrayNode();
620 Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
621 Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
622 getRootMetadata("amdhsa.version") = Version;
625 void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) {
626 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
630 auto Printf = HSAMetadataDoc->getArrayNode();
631 for (auto *Op : Node->operands())
632 if (Op->getNumOperands())
633 Printf.push_back(Printf.getDocument()->getNode(
634 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
635 getRootMetadata("amdhsa.printf") = Printf;
638 void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func,
639 msgpack::MapDocNode Kern) {
640 // TODO: What about other languages?
641 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
642 if (!Node || !Node->getNumOperands())
644 auto Op0 = Node->getOperand(0);
645 if (Op0->getNumOperands() <= 1)
648 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
649 auto LanguageVersion = Kern.getDocument()->getArrayNode();
650 LanguageVersion.push_back(Kern.getDocument()->getNode(
651 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
652 LanguageVersion.push_back(Kern.getDocument()->getNode(
653 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
654 Kern[".language_version"] = LanguageVersion;
657 void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func,
658 msgpack::MapDocNode Kern) {
660 if (auto Node = Func.getMetadata("reqd_work_group_size"))
661 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
662 if (auto Node = Func.getMetadata("work_group_size_hint"))
663 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
664 if (auto Node = Func.getMetadata("vec_type_hint")) {
665 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
667 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
668 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
671 if (Func.hasFnAttribute("runtime-handle")) {
672 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
673 Func.getFnAttribute("runtime-handle").getValueAsString().str(),
676 if (Func.hasFnAttribute("device-init"))
677 Kern[".kind"] = Kern.getDocument()->getNode("init");
678 else if (Func.hasFnAttribute("device-fini"))
679 Kern[".kind"] = Kern.getDocument()->getNode("fini");
682 void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF,
683 msgpack::MapDocNode Kern) {
684 auto &Func = MF.getFunction();
686 auto Args = HSAMetadataDoc->getArrayNode();
687 for (auto &Arg : Func.args())
688 emitKernelArg(Arg, Offset, Args);
690 emitHiddenKernelArgs(MF, Offset, Args);
692 Kern[".args"] = Args;
695 void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg,
697 msgpack::ArrayDocNode Args) {
698 auto Func = Arg.getParent();
699 auto ArgNo = Arg.getArgNo();
703 Node = Func->getMetadata("kernel_arg_name");
704 if (Node && ArgNo < Node->getNumOperands())
705 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
706 else if (Arg.hasName())
707 Name = Arg.getName();
710 Node = Func->getMetadata("kernel_arg_type");
711 if (Node && ArgNo < Node->getNumOperands())
712 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
714 StringRef BaseTypeName;
715 Node = Func->getMetadata("kernel_arg_base_type");
716 if (Node && ArgNo < Node->getNumOperands())
717 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
720 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
721 Arg.hasNoAliasAttr()) {
722 AccQual = "read_only";
724 Node = Func->getMetadata("kernel_arg_access_qual");
725 if (Node && ArgNo < Node->getNumOperands())
726 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
730 Node = Func->getMetadata("kernel_arg_type_qual");
731 if (Node && ArgNo < Node->getNumOperands())
732 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
734 const DataLayout &DL = Func->getParent()->getDataLayout();
736 MaybeAlign PointeeAlign;
737 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
739 // FIXME: Need to distinguish in memory alignment from pointer alignment.
740 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
741 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
742 PointeeAlign = Arg.getParamAlign().valueOrOne();
745 // There's no distinction between byval aggregates and raw aggregates.
748 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
750 emitKernelArg(DL, ArgTy, ArgAlign,
751 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
752 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
755 void MetadataStreamerMsgPackV3::emitKernelArg(
756 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
757 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
758 StringRef Name, StringRef TypeName, StringRef BaseTypeName,
759 StringRef AccQual, StringRef TypeQual) {
760 auto Arg = Args.getDocument()->getMapNode();
763 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
764 if (!TypeName.empty())
765 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
766 auto Size = DL.getTypeAllocSize(Ty);
767 Arg[".size"] = Arg.getDocument()->getNode(Size);
768 Offset = alignTo(Offset, Alignment);
769 Arg[".offset"] = Arg.getDocument()->getNode(Offset);
771 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
773 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
775 if (auto PtrTy = dyn_cast<PointerType>(Ty))
776 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
777 // Limiting address space to emit only for a certain ValueKind.
778 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
779 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier,
782 if (auto AQ = getAccessQualifier(AccQual))
783 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
785 // TODO: Emit Arg[".actual_access"].
787 SmallVector<StringRef, 1> SplitTypeQuals;
788 TypeQual.split(SplitTypeQuals, " ", -1, false);
789 for (StringRef Key : SplitTypeQuals) {
791 Arg[".is_const"] = Arg.getDocument()->getNode(true);
792 else if (Key == "restrict")
793 Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
794 else if (Key == "volatile")
795 Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
796 else if (Key == "pipe")
797 Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
803 void MetadataStreamerMsgPackV3::emitHiddenKernelArgs(
804 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
805 auto &Func = MF.getFunction();
806 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
808 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
809 if (!HiddenArgNumBytes)
812 const Module *M = Func.getParent();
813 auto &DL = M->getDataLayout();
814 auto Int64Ty = Type::getInt64Ty(Func.getContext());
816 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
818 if (HiddenArgNumBytes >= 8)
819 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
821 if (HiddenArgNumBytes >= 16)
822 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
824 if (HiddenArgNumBytes >= 24)
825 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
829 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
831 if (HiddenArgNumBytes >= 32) {
832 // We forbid the use of features requiring hostcall when compiling OpenCL
833 // before code object V5, which makes the mutual exclusion between the
834 // "printf buffer" and "hostcall buffer" here sound.
835 if (M->getNamedMetadata("llvm.printf.fmts"))
836 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
838 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
839 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
842 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
845 // Emit "default queue" and "completion action" arguments if enqueue kernel is
846 // used, otherwise emit dummy "none" arguments.
847 if (HiddenArgNumBytes >= 40) {
848 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
849 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
852 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
856 if (HiddenArgNumBytes >= 48) {
857 if (!Func.hasFnAttribute("amdgpu-no-completion-action") &&
858 // FIXME: Hack for runtime bug if we fail to optimize this out
859 Func.hasFnAttribute("calls-enqueue-kernel")) {
860 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
863 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
867 // Emit the pointer argument for multi-grid object.
868 if (HiddenArgNumBytes >= 56) {
869 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
870 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
873 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
878 msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps(
879 const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
880 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
881 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
882 const Function &F = MF.getFunction();
884 auto Kern = HSAMetadataDoc->getMapNode();
886 Align MaxKernArgAlign;
887 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
888 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
889 Kern[".group_segment_fixed_size"] =
890 Kern.getDocument()->getNode(ProgramInfo.LDSSize);
891 Kern[".private_segment_fixed_size"] =
892 Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
893 if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5)
894 Kern[".uses_dynamic_stack"] =
895 Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
896 if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5 && STM.supportsWGP())
897 Kern[".workgroup_processor_mode"] =
898 Kern.getDocument()->getNode(ProgramInfo.WgpMode);
900 // FIXME: The metadata treats the minimum as 16?
901 Kern[".kernarg_segment_align"] =
902 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
903 Kern[".wavefront_size"] =
904 Kern.getDocument()->getNode(STM.getWavefrontSize());
905 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
906 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
908 // Only add AGPR count to metadata for supported devices
909 if (STM.hasMAIInsts()) {
910 Kern[".agpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumAccVGPR);
913 Kern[".max_flat_workgroup_size"] =
914 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
915 Kern[".sgpr_spill_count"] =
916 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
917 Kern[".vgpr_spill_count"] =
918 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
923 bool MetadataStreamerMsgPackV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
924 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
927 void MetadataStreamerMsgPackV3::begin(const Module &Mod,
928 const IsaInfo::AMDGPUTargetID &TargetID) {
931 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
934 void MetadataStreamerMsgPackV3::end() {
935 std::string HSAMetadataString;
936 raw_string_ostream StrOS(HSAMetadataString);
937 HSAMetadataDoc->toYAML(StrOS);
941 if (VerifyHSAMetadata)
945 void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF,
946 const SIProgramInfo &ProgramInfo) {
947 auto &Func = MF.getFunction();
948 auto Kern = getHSAKernelProps(MF, ProgramInfo);
950 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
951 Func.getCallingConv() == CallingConv::SPIR_KERNEL);
954 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
957 Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
958 Kern[".symbol"] = Kern.getDocument()->getNode(
959 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
960 emitKernelLanguage(Func, Kern);
961 emitKernelAttrs(Func, Kern);
962 emitKernelArgs(MF, Kern);
965 Kernels.push_back(Kern);
968 //===----------------------------------------------------------------------===//
969 // HSAMetadataStreamerV4
970 //===----------------------------------------------------------------------===//
972 void MetadataStreamerMsgPackV4::emitVersion() {
973 auto Version = HSAMetadataDoc->getArrayNode();
974 Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
975 Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
976 getRootMetadata("amdhsa.version") = Version;
979 void MetadataStreamerMsgPackV4::emitTargetID(
980 const IsaInfo::AMDGPUTargetID &TargetID) {
981 getRootMetadata("amdhsa.target") =
982 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
985 void MetadataStreamerMsgPackV4::begin(const Module &Mod,
986 const IsaInfo::AMDGPUTargetID &TargetID) {
988 emitTargetID(TargetID);
990 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
993 //===----------------------------------------------------------------------===//
994 // HSAMetadataStreamerV5
995 //===----------------------------------------------------------------------===//
997 void MetadataStreamerMsgPackV5::emitVersion() {
998 auto Version = HSAMetadataDoc->getArrayNode();
999 Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
1000 Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
1001 getRootMetadata("amdhsa.version") = Version;
1004 void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
1005 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
1006 auto &Func = MF.getFunction();
1007 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1009 // No implicit kernel argument is used.
1010 if (ST.getImplicitArgNumBytes(Func) == 0)
1013 const Module *M = Func.getParent();
1014 auto &DL = M->getDataLayout();
1015 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1017 auto Int64Ty = Type::getInt64Ty(Func.getContext());
1018 auto Int32Ty = Type::getInt32Ty(Func.getContext());
1019 auto Int16Ty = Type::getInt16Ty(Func.getContext());
1021 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
1022 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
1023 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
1024 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
1026 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
1027 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
1028 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
1030 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
1031 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
1032 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
1034 // Reserved for hidden_tool_correlation_id.
1037 Offset += 8; // Reserved.
1039 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
1040 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
1041 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
1043 emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
1045 Offset += 6; // Reserved.
1047 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
1049 if (M->getNamedMetadata("llvm.printf.fmts")) {
1050 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
1053 Offset += 8; // Skipped.
1056 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
1057 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
1060 Offset += 8; // Skipped.
1063 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
1064 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
1067 Offset += 8; // Skipped.
1070 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
1071 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
1073 Offset += 8; // Skipped.
1075 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
1076 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
1079 Offset += 8; // Skipped.
1082 if (!Func.hasFnAttribute("amdgpu-no-completion-action") &&
1083 // FIXME: Hack for runtime bug
1084 Func.hasFnAttribute("calls-enqueue-kernel")) {
1085 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
1088 Offset += 8; // Skipped.
1091 Offset += 72; // Reserved.
1093 // hidden_private_base and hidden_shared_base are only when the subtarget has
1095 if (!ST.hasApertureRegs()) {
1096 emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
1097 emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
1099 Offset += 8; // Skipped.
1102 if (MFI.hasQueuePtr())
1103 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
1106 void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
1107 msgpack::MapDocNode Kern) {
1108 MetadataStreamerMsgPackV3::emitKernelAttrs(Func, Kern);
1110 if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
1111 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
1115 } // end namespace HSAMD
1116 } // end namespace AMDGPU
1117 } // end namespace llvm