1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// AMDGPU HSA Metadata Streamer.
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUHSAMetadataStreamer.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
29 static cl::opt<bool> DumpHSAMetadata(
30 "amdgpu-dump-hsa-metadata",
31 cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt<bool> VerifyHSAMetadata(
33 "amdgpu-verify-hsa-metadata",
34 cl::desc("Verify AMDGPU HSA Metadata"));
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
43 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
47 errs() << "AMDGPU HSA Metadata Parser Test: ";
49 HSAMD::Metadata FromHSAMetadataString;
50 if (fromString(std::string(HSAMetadataString), FromHSAMetadataString)) {
55 std::string ToHSAMetadataString;
56 if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
61 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
63 if (HSAMetadataString != ToHSAMetadataString) {
64 errs() << "Original input: " << HSAMetadataString << '\n'
65 << "Produced output: " << ToHSAMetadataString << '\n';
70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
72 return AccessQualifier::Unknown;
74 return StringSwitch<AccessQualifier>(AccQual)
75 .Case("read_only", AccessQualifier::ReadOnly)
76 .Case("write_only", AccessQualifier::WriteOnly)
77 .Case("read_write", AccessQualifier::ReadWrite)
78 .Default(AccessQualifier::Default);
82 MetadataStreamerV2::getAddressSpaceQualifier(
83 unsigned AddressSpace) const {
84 switch (AddressSpace) {
85 case AMDGPUAS::PRIVATE_ADDRESS:
86 return AddressSpaceQualifier::Private;
87 case AMDGPUAS::GLOBAL_ADDRESS:
88 return AddressSpaceQualifier::Global;
89 case AMDGPUAS::CONSTANT_ADDRESS:
90 return AddressSpaceQualifier::Constant;
91 case AMDGPUAS::LOCAL_ADDRESS:
92 return AddressSpaceQualifier::Local;
93 case AMDGPUAS::FLAT_ADDRESS:
94 return AddressSpaceQualifier::Generic;
95 case AMDGPUAS::REGION_ADDRESS:
96 return AddressSpaceQualifier::Region;
98 return AddressSpaceQualifier::Unknown;
102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
103 StringRef BaseTypeName) const {
104 if (TypeQual.find("pipe") != StringRef::npos)
105 return ValueKind::Pipe;
107 return StringSwitch<ValueKind>(BaseTypeName)
108 .Case("image1d_t", ValueKind::Image)
109 .Case("image1d_array_t", ValueKind::Image)
110 .Case("image1d_buffer_t", ValueKind::Image)
111 .Case("image2d_t", ValueKind::Image)
112 .Case("image2d_array_t", ValueKind::Image)
113 .Case("image2d_array_depth_t", ValueKind::Image)
114 .Case("image2d_array_msaa_t", ValueKind::Image)
115 .Case("image2d_array_msaa_depth_t", ValueKind::Image)
116 .Case("image2d_depth_t", ValueKind::Image)
117 .Case("image2d_msaa_t", ValueKind::Image)
118 .Case("image2d_msaa_depth_t", ValueKind::Image)
119 .Case("image3d_t", ValueKind::Image)
120 .Case("sampler_t", ValueKind::Sampler)
121 .Case("queue_t", ValueKind::Queue)
122 .Default(isa<PointerType>(Ty) ?
123 (Ty->getPointerAddressSpace() ==
124 AMDGPUAS::LOCAL_ADDRESS ?
125 ValueKind::DynamicSharedPointer :
126 ValueKind::GlobalBuffer) :
130 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
131 switch (Ty->getTypeID()) {
132 case Type::IntegerTyID: {
134 return (Twine('u') + getTypeName(Ty, true)).str();
136 auto BitWidth = Ty->getIntegerBitWidth();
147 return (Twine('i') + Twine(BitWidth)).str();
152 case Type::FloatTyID:
154 case Type::DoubleTyID:
156 case Type::FixedVectorTyID: {
157 auto VecTy = cast<FixedVectorType>(Ty);
158 auto ElTy = VecTy->getElementType();
159 auto NumElements = VecTy->getNumElements();
160 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
167 std::vector<uint32_t>
168 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
169 std::vector<uint32_t> Dims;
170 if (Node->getNumOperands() != 3)
173 for (auto &Op : Node->operands())
174 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
178 Kernel::CodeProps::Metadata
179 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
180 const SIProgramInfo &ProgramInfo) const {
181 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
182 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
183 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
184 const Function &F = MF.getFunction();
186 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
187 F.getCallingConv() == CallingConv::SPIR_KERNEL);
189 Align MaxKernArgAlign;
190 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
192 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
193 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
194 HSACodeProps.mKernargSegmentAlign =
195 std::max(MaxKernArgAlign, Align(4)).value();
196 HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
197 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
198 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
199 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
200 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
201 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
202 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
203 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
208 Kernel::DebugProps::Metadata
209 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
210 const SIProgramInfo &ProgramInfo) const {
211 return HSAMD::Kernel::DebugProps::Metadata();
214 void MetadataStreamerV2::emitVersion() {
215 auto &Version = HSAMetadata.mVersion;
217 Version.push_back(VersionMajor);
218 Version.push_back(VersionMinor);
221 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
222 auto &Printf = HSAMetadata.mPrintf;
224 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
228 for (auto Op : Node->operands())
229 if (Op->getNumOperands())
231 std::string(cast<MDString>(Op->getOperand(0))->getString()));
234 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
235 auto &Kernel = HSAMetadata.mKernels.back();
237 // TODO: What about other languages?
238 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
239 if (!Node || !Node->getNumOperands())
241 auto Op0 = Node->getOperand(0);
242 if (Op0->getNumOperands() <= 1)
245 Kernel.mLanguage = "OpenCL C";
246 Kernel.mLanguageVersion.push_back(
247 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
248 Kernel.mLanguageVersion.push_back(
249 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
252 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
253 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
255 if (auto Node = Func.getMetadata("reqd_work_group_size"))
256 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
257 if (auto Node = Func.getMetadata("work_group_size_hint"))
258 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
259 if (auto Node = Func.getMetadata("vec_type_hint")) {
260 Attrs.mVecTypeHint = getTypeName(
261 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
262 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
264 if (Func.hasFnAttribute("runtime-handle")) {
265 Attrs.mRuntimeHandle =
266 Func.getFnAttribute("runtime-handle").getValueAsString().str();
270 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
271 for (auto &Arg : Func.args())
274 emitHiddenKernelArgs(Func);
277 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
278 auto Func = Arg.getParent();
279 auto ArgNo = Arg.getArgNo();
283 Node = Func->getMetadata("kernel_arg_name");
284 if (Node && ArgNo < Node->getNumOperands())
285 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
286 else if (Arg.hasName())
287 Name = Arg.getName();
290 Node = Func->getMetadata("kernel_arg_type");
291 if (Node && ArgNo < Node->getNumOperands())
292 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
294 StringRef BaseTypeName;
295 Node = Func->getMetadata("kernel_arg_base_type");
296 if (Node && ArgNo < Node->getNumOperands())
297 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
300 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
301 Arg.hasNoAliasAttr()) {
302 AccQual = "read_only";
304 Node = Func->getMetadata("kernel_arg_access_qual");
305 if (Node && ArgNo < Node->getNumOperands())
306 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
310 Node = Func->getMetadata("kernel_arg_type_qual");
311 if (Node && ArgNo < Node->getNumOperands())
312 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
314 Type *Ty = Arg.getType();
315 const DataLayout &DL = Func->getParent()->getDataLayout();
317 MaybeAlign PointeeAlign;
318 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
319 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
320 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
321 PtrTy->getElementType());
325 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
326 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
329 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
331 MaybeAlign PointeeAlign, StringRef Name,
333 StringRef BaseTypeName,
334 StringRef AccQual, StringRef TypeQual) {
335 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
336 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
338 Arg.mName = std::string(Name);
339 Arg.mTypeName = std::string(TypeName);
340 Arg.mSize = DL.getTypeAllocSize(Ty);
341 Arg.mAlign = DL.getABITypeAlign(Ty).value();
342 Arg.mValueKind = ValueKind;
343 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
345 if (auto PtrTy = dyn_cast<PointerType>(Ty))
346 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
348 Arg.mAccQual = getAccessQualifier(AccQual);
350 // TODO: Emit Arg.mActualAccQual.
352 SmallVector<StringRef, 1> SplitTypeQuals;
353 TypeQual.split(SplitTypeQuals, " ", -1, false);
354 for (StringRef Key : SplitTypeQuals) {
355 auto P = StringSwitch<bool*>(Key)
356 .Case("const", &Arg.mIsConst)
357 .Case("restrict", &Arg.mIsRestrict)
358 .Case("volatile", &Arg.mIsVolatile)
359 .Case("pipe", &Arg.mIsPipe)
366 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
367 int HiddenArgNumBytes =
368 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
370 if (!HiddenArgNumBytes)
373 auto &DL = Func.getParent()->getDataLayout();
374 auto Int64Ty = Type::getInt64Ty(Func.getContext());
376 if (HiddenArgNumBytes >= 8)
377 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
378 if (HiddenArgNumBytes >= 16)
379 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
380 if (HiddenArgNumBytes >= 24)
381 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
383 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
384 AMDGPUAS::GLOBAL_ADDRESS);
386 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
388 if (HiddenArgNumBytes >= 32) {
389 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
390 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
391 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
392 // The printf runtime binding pass should have ensured that hostcall and
393 // printf are not used in the same module.
394 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
395 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer);
397 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
400 // Emit "default queue" and "completion action" arguments if enqueue kernel is
401 // used, otherwise emit dummy "none" arguments.
402 if (HiddenArgNumBytes >= 48) {
403 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
404 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
405 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
407 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
408 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
412 // Emit the pointer argument for multi-grid object.
413 if (HiddenArgNumBytes >= 56)
414 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg);
417 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
418 return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
421 void MetadataStreamerV2::begin(const Module &Mod) {
426 void MetadataStreamerV2::end() {
427 std::string HSAMetadataString;
428 if (toString(HSAMetadata, HSAMetadataString))
432 dump(HSAMetadataString);
433 if (VerifyHSAMetadata)
434 verify(HSAMetadataString);
437 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
438 const SIProgramInfo &ProgramInfo) {
439 auto &Func = MF.getFunction();
440 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
443 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
444 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
446 HSAMetadata.mKernels.push_back(Kernel::Metadata());
447 auto &Kernel = HSAMetadata.mKernels.back();
449 Kernel.mName = std::string(Func.getName());
450 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
451 emitKernelLanguage(Func);
452 emitKernelAttrs(Func);
453 emitKernelArgs(Func);
454 HSAMetadata.mKernels.back().mCodeProps = CodeProps;
455 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
458 //===----------------------------------------------------------------------===//
459 // HSAMetadataStreamerV3
460 //===----------------------------------------------------------------------===//
462 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
463 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
466 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
467 errs() << "AMDGPU HSA Metadata Parser Test: ";
469 msgpack::Document FromHSAMetadataString;
471 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
476 std::string ToHSAMetadataString;
477 raw_string_ostream StrOS(ToHSAMetadataString);
478 FromHSAMetadataString.toYAML(StrOS);
480 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
481 if (HSAMetadataString != ToHSAMetadataString) {
482 errs() << "Original input: " << HSAMetadataString << '\n'
483 << "Produced output: " << StrOS.str() << '\n';
488 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
489 return StringSwitch<Optional<StringRef>>(AccQual)
490 .Case("read_only", StringRef("read_only"))
491 .Case("write_only", StringRef("write_only"))
492 .Case("read_write", StringRef("read_write"))
497 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
498 switch (AddressSpace) {
499 case AMDGPUAS::PRIVATE_ADDRESS:
500 return StringRef("private");
501 case AMDGPUAS::GLOBAL_ADDRESS:
502 return StringRef("global");
503 case AMDGPUAS::CONSTANT_ADDRESS:
504 return StringRef("constant");
505 case AMDGPUAS::LOCAL_ADDRESS:
506 return StringRef("local");
507 case AMDGPUAS::FLAT_ADDRESS:
508 return StringRef("generic");
509 case AMDGPUAS::REGION_ADDRESS:
510 return StringRef("region");
516 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
517 StringRef BaseTypeName) const {
518 if (TypeQual.find("pipe") != StringRef::npos)
521 return StringSwitch<StringRef>(BaseTypeName)
522 .Case("image1d_t", "image")
523 .Case("image1d_array_t", "image")
524 .Case("image1d_buffer_t", "image")
525 .Case("image2d_t", "image")
526 .Case("image2d_array_t", "image")
527 .Case("image2d_array_depth_t", "image")
528 .Case("image2d_array_msaa_t", "image")
529 .Case("image2d_array_msaa_depth_t", "image")
530 .Case("image2d_depth_t", "image")
531 .Case("image2d_msaa_t", "image")
532 .Case("image2d_msaa_depth_t", "image")
533 .Case("image3d_t", "image")
534 .Case("sampler_t", "sampler")
535 .Case("queue_t", "queue")
536 .Default(isa<PointerType>(Ty)
537 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
538 ? "dynamic_shared_pointer"
543 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
544 switch (Ty->getTypeID()) {
545 case Type::IntegerTyID: {
547 return (Twine('u') + getTypeName(Ty, true)).str();
549 auto BitWidth = Ty->getIntegerBitWidth();
560 return (Twine('i') + Twine(BitWidth)).str();
565 case Type::FloatTyID:
567 case Type::DoubleTyID:
569 case Type::FixedVectorTyID: {
570 auto VecTy = cast<FixedVectorType>(Ty);
571 auto ElTy = VecTy->getElementType();
572 auto NumElements = VecTy->getNumElements();
573 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
580 msgpack::ArrayDocNode
581 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
582 auto Dims = HSAMetadataDoc->getArrayNode();
583 if (Node->getNumOperands() != 3)
586 for (auto &Op : Node->operands())
587 Dims.push_back(Dims.getDocument()->getNode(
588 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
592 void MetadataStreamerV3::emitVersion() {
593 auto Version = HSAMetadataDoc->getArrayNode();
594 Version.push_back(Version.getDocument()->getNode(VersionMajor));
595 Version.push_back(Version.getDocument()->getNode(VersionMinor));
596 getRootMetadata("amdhsa.version") = Version;
599 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
600 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
604 auto Printf = HSAMetadataDoc->getArrayNode();
605 for (auto Op : Node->operands())
606 if (Op->getNumOperands())
607 Printf.push_back(Printf.getDocument()->getNode(
608 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
609 getRootMetadata("amdhsa.printf") = Printf;
612 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
613 msgpack::MapDocNode Kern) {
614 // TODO: What about other languages?
615 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
616 if (!Node || !Node->getNumOperands())
618 auto Op0 = Node->getOperand(0);
619 if (Op0->getNumOperands() <= 1)
622 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
623 auto LanguageVersion = Kern.getDocument()->getArrayNode();
624 LanguageVersion.push_back(Kern.getDocument()->getNode(
625 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
626 LanguageVersion.push_back(Kern.getDocument()->getNode(
627 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
628 Kern[".language_version"] = LanguageVersion;
631 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
632 msgpack::MapDocNode Kern) {
634 if (auto Node = Func.getMetadata("reqd_work_group_size"))
635 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
636 if (auto Node = Func.getMetadata("work_group_size_hint"))
637 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
638 if (auto Node = Func.getMetadata("vec_type_hint")) {
639 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
641 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
642 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
645 if (Func.hasFnAttribute("runtime-handle")) {
646 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
647 Func.getFnAttribute("runtime-handle").getValueAsString().str(),
652 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
653 msgpack::MapDocNode Kern) {
655 auto Args = HSAMetadataDoc->getArrayNode();
656 for (auto &Arg : Func.args())
657 emitKernelArg(Arg, Offset, Args);
659 emitHiddenKernelArgs(Func, Offset, Args);
661 Kern[".args"] = Args;
664 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
665 msgpack::ArrayDocNode Args) {
666 auto Func = Arg.getParent();
667 auto ArgNo = Arg.getArgNo();
671 Node = Func->getMetadata("kernel_arg_name");
672 if (Node && ArgNo < Node->getNumOperands())
673 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
674 else if (Arg.hasName())
675 Name = Arg.getName();
678 Node = Func->getMetadata("kernel_arg_type");
679 if (Node && ArgNo < Node->getNumOperands())
680 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
682 StringRef BaseTypeName;
683 Node = Func->getMetadata("kernel_arg_base_type");
684 if (Node && ArgNo < Node->getNumOperands())
685 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
688 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
689 Arg.hasNoAliasAttr()) {
690 AccQual = "read_only";
692 Node = Func->getMetadata("kernel_arg_access_qual");
693 if (Node && ArgNo < Node->getNumOperands())
694 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
698 Node = Func->getMetadata("kernel_arg_type_qual");
699 if (Node && ArgNo < Node->getNumOperands())
700 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
702 Type *Ty = Arg.getType();
703 const DataLayout &DL = Func->getParent()->getDataLayout();
705 MaybeAlign PointeeAlign;
706 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
707 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
708 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
709 PtrTy->getElementType());
713 emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
714 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
715 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
719 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
720 StringRef ValueKind, unsigned &Offset,
721 msgpack::ArrayDocNode Args,
722 MaybeAlign PointeeAlign, StringRef Name,
724 StringRef BaseTypeName,
725 StringRef AccQual, StringRef TypeQual) {
726 auto Arg = Args.getDocument()->getMapNode();
729 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
730 if (!TypeName.empty())
731 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
732 auto Size = DL.getTypeAllocSize(Ty);
733 Align Alignment = DL.getABITypeAlign(Ty);
734 Arg[".size"] = Arg.getDocument()->getNode(Size);
735 Offset = alignTo(Offset, Alignment);
736 Arg[".offset"] = Arg.getDocument()->getNode(Offset);
738 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
740 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
742 if (auto PtrTy = dyn_cast<PointerType>(Ty))
743 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
744 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
746 if (auto AQ = getAccessQualifier(AccQual))
747 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
749 // TODO: Emit Arg[".actual_access"].
751 SmallVector<StringRef, 1> SplitTypeQuals;
752 TypeQual.split(SplitTypeQuals, " ", -1, false);
753 for (StringRef Key : SplitTypeQuals) {
755 Arg[".is_const"] = Arg.getDocument()->getNode(true);
756 else if (Key == "restrict")
757 Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
758 else if (Key == "volatile")
759 Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
760 else if (Key == "pipe")
761 Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
767 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
769 msgpack::ArrayDocNode Args) {
770 int HiddenArgNumBytes =
771 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
773 if (!HiddenArgNumBytes)
776 auto &DL = Func.getParent()->getDataLayout();
777 auto Int64Ty = Type::getInt64Ty(Func.getContext());
779 if (HiddenArgNumBytes >= 8)
780 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
781 if (HiddenArgNumBytes >= 16)
782 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
783 if (HiddenArgNumBytes >= 24)
784 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
787 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
789 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
791 if (HiddenArgNumBytes >= 32) {
792 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
793 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
794 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
795 // The printf runtime binding pass should have ensured that hostcall and
796 // printf are not used in the same module.
797 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
798 emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args);
800 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
803 // Emit "default queue" and "completion action" arguments if enqueue kernel is
804 // used, otherwise emit dummy "none" arguments.
805 if (HiddenArgNumBytes >= 48) {
806 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
807 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
808 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
810 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
811 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
815 // Emit the pointer argument for multi-grid object.
816 if (HiddenArgNumBytes >= 56)
817 emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args);
821 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
822 const SIProgramInfo &ProgramInfo) const {
823 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
824 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
825 const Function &F = MF.getFunction();
827 auto Kern = HSAMetadataDoc->getMapNode();
829 Align MaxKernArgAlign;
830 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
831 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
832 Kern[".group_segment_fixed_size"] =
833 Kern.getDocument()->getNode(ProgramInfo.LDSSize);
834 Kern[".private_segment_fixed_size"] =
835 Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
836 Kern[".kernarg_segment_align"] =
837 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
838 Kern[".wavefront_size"] =
839 Kern.getDocument()->getNode(STM.getWavefrontSize());
840 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
841 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
842 Kern[".max_flat_workgroup_size"] =
843 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
844 Kern[".sgpr_spill_count"] =
845 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
846 Kern[".vgpr_spill_count"] =
847 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
852 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
853 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
856 void MetadataStreamerV3::begin(const Module &Mod) {
859 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
862 void MetadataStreamerV3::end() {
863 std::string HSAMetadataString;
864 raw_string_ostream StrOS(HSAMetadataString);
865 HSAMetadataDoc->toYAML(StrOS);
869 if (VerifyHSAMetadata)
873 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
874 const SIProgramInfo &ProgramInfo) {
875 auto &Func = MF.getFunction();
876 auto Kern = getHSAKernelProps(MF, ProgramInfo);
878 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
879 Func.getCallingConv() == CallingConv::SPIR_KERNEL);
882 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
885 Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
886 Kern[".symbol"] = Kern.getDocument()->getNode(
887 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
888 emitKernelLanguage(Func, Kern);
889 emitKernelAttrs(Func, Kern);
890 emitKernelArgs(Func, Kern);
893 Kernels.push_back(Kern);
896 } // end namespace HSAMD
897 } // end namespace AMDGPU
898 } // end namespace llvm