1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// AMDGPU HSA Metadata Streamer.
14 //===----------------------------------------------------------------------===//
16 #include "AMDGPUHSAMetadataStreamer.h"
18 #include "AMDGPUSubtarget.h"
19 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "SIProgramInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/StringSwitch.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/Support/raw_ostream.h"
30 static cl::opt<bool> DumpHSAMetadata(
31 "amdgpu-dump-hsa-metadata",
32 cl::desc("Dump AMDGPU HSA Metadata"));
33 static cl::opt<bool> VerifyHSAMetadata(
34 "amdgpu-verify-hsa-metadata",
35 cl::desc("Verify AMDGPU HSA Metadata"));
40 //===----------------------------------------------------------------------===//
41 // HSAMetadataStreamerV2
42 //===----------------------------------------------------------------------===//
43 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
44 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
47 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
48 errs() << "AMDGPU HSA Metadata Parser Test: ";
50 HSAMD::Metadata FromHSAMetadataString;
51 if (fromString(HSAMetadataString, FromHSAMetadataString)) {
56 std::string ToHSAMetadataString;
57 if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
62 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
64 if (HSAMetadataString != ToHSAMetadataString) {
65 errs() << "Original input: " << HSAMetadataString << '\n'
66 << "Produced output: " << ToHSAMetadataString << '\n';
71 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
73 return AccessQualifier::Unknown;
75 return StringSwitch<AccessQualifier>(AccQual)
76 .Case("read_only", AccessQualifier::ReadOnly)
77 .Case("write_only", AccessQualifier::WriteOnly)
78 .Case("read_write", AccessQualifier::ReadWrite)
79 .Default(AccessQualifier::Default);
83 MetadataStreamerV2::getAddressSpaceQualifier(
84 unsigned AddressSpace) const {
85 switch (AddressSpace) {
86 case AMDGPUAS::PRIVATE_ADDRESS:
87 return AddressSpaceQualifier::Private;
88 case AMDGPUAS::GLOBAL_ADDRESS:
89 return AddressSpaceQualifier::Global;
90 case AMDGPUAS::CONSTANT_ADDRESS:
91 return AddressSpaceQualifier::Constant;
92 case AMDGPUAS::LOCAL_ADDRESS:
93 return AddressSpaceQualifier::Local;
94 case AMDGPUAS::FLAT_ADDRESS:
95 return AddressSpaceQualifier::Generic;
96 case AMDGPUAS::REGION_ADDRESS:
97 return AddressSpaceQualifier::Region;
99 return AddressSpaceQualifier::Unknown;
103 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
104 StringRef BaseTypeName) const {
105 if (TypeQual.find("pipe") != StringRef::npos)
106 return ValueKind::Pipe;
108 return StringSwitch<ValueKind>(BaseTypeName)
109 .Case("image1d_t", ValueKind::Image)
110 .Case("image1d_array_t", ValueKind::Image)
111 .Case("image1d_buffer_t", ValueKind::Image)
112 .Case("image2d_t", ValueKind::Image)
113 .Case("image2d_array_t", ValueKind::Image)
114 .Case("image2d_array_depth_t", ValueKind::Image)
115 .Case("image2d_array_msaa_t", ValueKind::Image)
116 .Case("image2d_array_msaa_depth_t", ValueKind::Image)
117 .Case("image2d_depth_t", ValueKind::Image)
118 .Case("image2d_msaa_t", ValueKind::Image)
119 .Case("image2d_msaa_depth_t", ValueKind::Image)
120 .Case("image3d_t", ValueKind::Image)
121 .Case("sampler_t", ValueKind::Sampler)
122 .Case("queue_t", ValueKind::Queue)
123 .Default(isa<PointerType>(Ty) ?
124 (Ty->getPointerAddressSpace() ==
125 AMDGPUAS::LOCAL_ADDRESS ?
126 ValueKind::DynamicSharedPointer :
127 ValueKind::GlobalBuffer) :
131 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
132 switch (Ty->getTypeID()) {
133 case Type::IntegerTyID: {
134 auto Signed = !TypeName.startswith("u");
135 switch (Ty->getIntegerBitWidth()) {
137 return Signed ? ValueType::I8 : ValueType::U8;
139 return Signed ? ValueType::I16 : ValueType::U16;
141 return Signed ? ValueType::I32 : ValueType::U32;
143 return Signed ? ValueType::I64 : ValueType::U64;
145 return ValueType::Struct;
149 return ValueType::F16;
150 case Type::FloatTyID:
151 return ValueType::F32;
152 case Type::DoubleTyID:
153 return ValueType::F64;
154 case Type::PointerTyID:
155 return getValueType(Ty->getPointerElementType(), TypeName);
156 case Type::VectorTyID:
157 return getValueType(Ty->getVectorElementType(), TypeName);
159 return ValueType::Struct;
163 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
164 switch (Ty->getTypeID()) {
165 case Type::IntegerTyID: {
167 return (Twine('u') + getTypeName(Ty, true)).str();
169 auto BitWidth = Ty->getIntegerBitWidth();
180 return (Twine('i') + Twine(BitWidth)).str();
185 case Type::FloatTyID:
187 case Type::DoubleTyID:
189 case Type::VectorTyID: {
190 auto VecTy = cast<VectorType>(Ty);
191 auto ElTy = VecTy->getElementType();
192 auto NumElements = VecTy->getVectorNumElements();
193 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
200 std::vector<uint32_t>
201 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
202 std::vector<uint32_t> Dims;
203 if (Node->getNumOperands() != 3)
206 for (auto &Op : Node->operands())
207 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
211 Kernel::CodeProps::Metadata
212 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
213 const SIProgramInfo &ProgramInfo) const {
214 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
215 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
216 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
217 const Function &F = MF.getFunction();
219 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
220 F.getCallingConv() == CallingConv::SPIR_KERNEL);
222 unsigned MaxKernArgAlign;
223 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
225 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
226 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
227 HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u);
228 HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
229 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
230 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
231 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
232 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
233 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
234 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
235 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
240 Kernel::DebugProps::Metadata
241 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
242 const SIProgramInfo &ProgramInfo) const {
243 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
244 HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
246 if (!STM.debuggerSupported())
247 return HSADebugProps;
249 HSADebugProps.mDebuggerABIVersion.push_back(1);
250 HSADebugProps.mDebuggerABIVersion.push_back(0);
252 if (STM.debuggerEmitPrologue()) {
253 HSADebugProps.mPrivateSegmentBufferSGPR =
254 ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
255 HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
256 ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
259 return HSADebugProps;
262 void MetadataStreamerV2::emitVersion() {
263 auto &Version = HSAMetadata.mVersion;
265 Version.push_back(VersionMajor);
266 Version.push_back(VersionMinor);
269 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
270 auto &Printf = HSAMetadata.mPrintf;
272 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
276 for (auto Op : Node->operands())
277 if (Op->getNumOperands())
278 Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
281 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
282 auto &Kernel = HSAMetadata.mKernels.back();
284 // TODO: What about other languages?
285 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
286 if (!Node || !Node->getNumOperands())
288 auto Op0 = Node->getOperand(0);
289 if (Op0->getNumOperands() <= 1)
292 Kernel.mLanguage = "OpenCL C";
293 Kernel.mLanguageVersion.push_back(
294 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
295 Kernel.mLanguageVersion.push_back(
296 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
299 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
300 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
302 if (auto Node = Func.getMetadata("reqd_work_group_size"))
303 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
304 if (auto Node = Func.getMetadata("work_group_size_hint"))
305 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
306 if (auto Node = Func.getMetadata("vec_type_hint")) {
307 Attrs.mVecTypeHint = getTypeName(
308 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
309 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
311 if (Func.hasFnAttribute("runtime-handle")) {
312 Attrs.mRuntimeHandle =
313 Func.getFnAttribute("runtime-handle").getValueAsString().str();
317 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
318 for (auto &Arg : Func.args())
321 emitHiddenKernelArgs(Func);
324 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
325 auto Func = Arg.getParent();
326 auto ArgNo = Arg.getArgNo();
330 Node = Func->getMetadata("kernel_arg_name");
331 if (Node && ArgNo < Node->getNumOperands())
332 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
333 else if (Arg.hasName())
334 Name = Arg.getName();
337 Node = Func->getMetadata("kernel_arg_type");
338 if (Node && ArgNo < Node->getNumOperands())
339 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
341 StringRef BaseTypeName;
342 Node = Func->getMetadata("kernel_arg_base_type");
343 if (Node && ArgNo < Node->getNumOperands())
344 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
347 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
348 Arg.hasNoAliasAttr()) {
349 AccQual = "read_only";
351 Node = Func->getMetadata("kernel_arg_access_qual");
352 if (Node && ArgNo < Node->getNumOperands())
353 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
357 Node = Func->getMetadata("kernel_arg_type_qual");
358 if (Node && ArgNo < Node->getNumOperands())
359 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
361 Type *Ty = Arg.getType();
362 const DataLayout &DL = Func->getParent()->getDataLayout();
364 unsigned PointeeAlign = 0;
365 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
366 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
367 PointeeAlign = Arg.getParamAlignment();
368 if (PointeeAlign == 0)
369 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
373 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
374 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
377 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
379 unsigned PointeeAlign, StringRef Name,
381 StringRef BaseTypeName,
382 StringRef AccQual, StringRef TypeQual) {
383 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
384 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
387 Arg.mTypeName = TypeName;
388 Arg.mSize = DL.getTypeAllocSize(Ty);
389 Arg.mAlign = DL.getABITypeAlignment(Ty);
390 Arg.mValueKind = ValueKind;
391 Arg.mValueType = getValueType(Ty, BaseTypeName);
392 Arg.mPointeeAlign = PointeeAlign;
394 if (auto PtrTy = dyn_cast<PointerType>(Ty))
395 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
397 Arg.mAccQual = getAccessQualifier(AccQual);
399 // TODO: Emit Arg.mActualAccQual.
401 SmallVector<StringRef, 1> SplitTypeQuals;
402 TypeQual.split(SplitTypeQuals, " ", -1, false);
403 for (StringRef Key : SplitTypeQuals) {
404 auto P = StringSwitch<bool*>(Key)
405 .Case("const", &Arg.mIsConst)
406 .Case("restrict", &Arg.mIsRestrict)
407 .Case("volatile", &Arg.mIsVolatile)
408 .Case("pipe", &Arg.mIsPipe)
415 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
416 int HiddenArgNumBytes =
417 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
419 if (!HiddenArgNumBytes)
422 auto &DL = Func.getParent()->getDataLayout();
423 auto Int64Ty = Type::getInt64Ty(Func.getContext());
425 if (HiddenArgNumBytes >= 8)
426 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
427 if (HiddenArgNumBytes >= 16)
428 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
429 if (HiddenArgNumBytes >= 24)
430 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
432 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
433 AMDGPUAS::GLOBAL_ADDRESS);
435 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
437 if (HiddenArgNumBytes >= 32) {
438 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
439 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
441 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
444 // Emit "default queue" and "completion action" arguments if enqueue kernel is
445 // used, otherwise emit dummy "none" arguments.
446 if (HiddenArgNumBytes >= 48) {
447 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
448 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
449 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
451 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
452 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
457 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
458 return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
461 void MetadataStreamerV2::begin(const Module &Mod) {
466 void MetadataStreamerV2::end() {
467 std::string HSAMetadataString;
468 if (toString(HSAMetadata, HSAMetadataString))
472 dump(HSAMetadataString);
473 if (VerifyHSAMetadata)
474 verify(HSAMetadataString);
477 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
478 const SIProgramInfo &ProgramInfo) {
479 auto &Func = MF.getFunction();
480 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
483 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
484 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
486 HSAMetadata.mKernels.push_back(Kernel::Metadata());
487 auto &Kernel = HSAMetadata.mKernels.back();
489 Kernel.mName = Func.getName();
490 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
491 emitKernelLanguage(Func);
492 emitKernelAttrs(Func);
493 emitKernelArgs(Func);
494 HSAMetadata.mKernels.back().mCodeProps = CodeProps;
495 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
498 //===----------------------------------------------------------------------===//
499 // HSAMetadataStreamerV3
500 //===----------------------------------------------------------------------===//
502 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
503 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
506 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
507 errs() << "AMDGPU HSA Metadata Parser Test: ";
509 std::shared_ptr<msgpack::Node> FromHSAMetadataString =
510 std::make_shared<msgpack::MapNode>();
512 yaml::Input YIn(HSAMetadataString);
513 YIn >> FromHSAMetadataString;
519 std::string ToHSAMetadataString;
520 raw_string_ostream StrOS(ToHSAMetadataString);
521 yaml::Output YOut(StrOS);
522 YOut << FromHSAMetadataString;
524 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
525 if (HSAMetadataString != ToHSAMetadataString) {
526 errs() << "Original input: " << HSAMetadataString << '\n'
527 << "Produced output: " << StrOS.str() << '\n';
532 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
533 return StringSwitch<Optional<StringRef>>(AccQual)
534 .Case("read_only", StringRef("read_only"))
535 .Case("write_only", StringRef("write_only"))
536 .Case("read_write", StringRef("read_write"))
541 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
542 switch (AddressSpace) {
543 case AMDGPUAS::PRIVATE_ADDRESS:
544 return StringRef("private");
545 case AMDGPUAS::GLOBAL_ADDRESS:
546 return StringRef("global");
547 case AMDGPUAS::CONSTANT_ADDRESS:
548 return StringRef("constant");
549 case AMDGPUAS::LOCAL_ADDRESS:
550 return StringRef("local");
551 case AMDGPUAS::FLAT_ADDRESS:
552 return StringRef("generic");
553 case AMDGPUAS::REGION_ADDRESS:
554 return StringRef("region");
560 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
561 StringRef BaseTypeName) const {
562 if (TypeQual.find("pipe") != StringRef::npos)
565 return StringSwitch<StringRef>(BaseTypeName)
566 .Case("image1d_t", "image")
567 .Case("image1d_array_t", "image")
568 .Case("image1d_buffer_t", "image")
569 .Case("image2d_t", "image")
570 .Case("image2d_array_t", "image")
571 .Case("image2d_array_depth_t", "image")
572 .Case("image2d_array_msaa_t", "image")
573 .Case("image2d_array_msaa_depth_t", "image")
574 .Case("image2d_depth_t", "image")
575 .Case("image2d_msaa_t", "image")
576 .Case("image2d_msaa_depth_t", "image")
577 .Case("image3d_t", "image")
578 .Case("sampler_t", "sampler")
579 .Case("queue_t", "queue")
580 .Default(isa<PointerType>(Ty)
581 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
582 ? "dynamic_shared_pointer"
587 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
588 switch (Ty->getTypeID()) {
589 case Type::IntegerTyID: {
590 auto Signed = !TypeName.startswith("u");
591 switch (Ty->getIntegerBitWidth()) {
593 return Signed ? "i8" : "u8";
595 return Signed ? "i16" : "u16";
597 return Signed ? "i32" : "u32";
599 return Signed ? "i64" : "u64";
606 case Type::FloatTyID:
608 case Type::DoubleTyID:
610 case Type::PointerTyID:
611 return getValueType(Ty->getPointerElementType(), TypeName);
612 case Type::VectorTyID:
613 return getValueType(Ty->getVectorElementType(), TypeName);
619 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
620 switch (Ty->getTypeID()) {
621 case Type::IntegerTyID: {
623 return (Twine('u') + getTypeName(Ty, true)).str();
625 auto BitWidth = Ty->getIntegerBitWidth();
636 return (Twine('i') + Twine(BitWidth)).str();
641 case Type::FloatTyID:
643 case Type::DoubleTyID:
645 case Type::VectorTyID: {
646 auto VecTy = cast<VectorType>(Ty);
647 auto ElTy = VecTy->getElementType();
648 auto NumElements = VecTy->getVectorNumElements();
649 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
656 std::shared_ptr<msgpack::ArrayNode>
657 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
658 auto Dims = std::make_shared<msgpack::ArrayNode>();
659 if (Node->getNumOperands() != 3)
662 for (auto &Op : Node->operands())
663 Dims->push_back(std::make_shared<msgpack::ScalarNode>(
664 mdconst::extract<ConstantInt>(Op)->getZExtValue()));
668 void MetadataStreamerV3::emitVersion() {
669 auto Version = std::make_shared<msgpack::ArrayNode>();
670 Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor));
671 Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor));
672 getRootMetadata("amdhsa.version") = std::move(Version);
675 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
676 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
680 auto Printf = std::make_shared<msgpack::ArrayNode>();
681 for (auto Op : Node->operands())
682 if (Op->getNumOperands())
683 Printf->push_back(std::make_shared<msgpack::ScalarNode>(
684 cast<MDString>(Op->getOperand(0))->getString()));
685 getRootMetadata("amdhsa.printf") = std::move(Printf);
688 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
689 msgpack::MapNode &Kern) {
690 // TODO: What about other languages?
691 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
692 if (!Node || !Node->getNumOperands())
694 auto Op0 = Node->getOperand(0);
695 if (Op0->getNumOperands() <= 1)
698 Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C");
699 auto LanguageVersion = std::make_shared<msgpack::ArrayNode>();
700 LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
701 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
702 LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
703 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
704 Kern[".language_version"] = std::move(LanguageVersion);
707 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
708 msgpack::MapNode &Kern) {
710 if (auto Node = Func.getMetadata("reqd_work_group_size"))
711 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
712 if (auto Node = Func.getMetadata("work_group_size_hint"))
713 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
714 if (auto Node = Func.getMetadata("vec_type_hint")) {
715 Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName(
716 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
717 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()));
719 if (Func.hasFnAttribute("runtime-handle")) {
720 Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>(
721 Func.getFnAttribute("runtime-handle").getValueAsString().str());
725 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
726 msgpack::MapNode &Kern) {
728 auto Args = std::make_shared<msgpack::ArrayNode>();
729 for (auto &Arg : Func.args())
730 emitKernelArg(Arg, Offset, *Args);
732 emitHiddenKernelArgs(Func, Offset, *Args);
734 // TODO: What about other languages?
735 if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
736 auto &DL = Func.getParent()->getDataLayout();
737 auto Int64Ty = Type::getInt64Ty(Func.getContext());
739 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args);
740 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args);
741 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args);
744 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
746 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
748 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
749 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args);
751 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
753 // Emit "default queue" and "completion action" arguments if enqueue kernel
754 // is used, otherwise emit dummy "none" arguments.
755 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
756 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args);
757 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args);
759 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
760 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
764 Kern[".args"] = std::move(Args);
767 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
768 msgpack::ArrayNode &Args) {
769 auto Func = Arg.getParent();
770 auto ArgNo = Arg.getArgNo();
774 Node = Func->getMetadata("kernel_arg_name");
775 if (Node && ArgNo < Node->getNumOperands())
776 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
777 else if (Arg.hasName())
778 Name = Arg.getName();
781 Node = Func->getMetadata("kernel_arg_type");
782 if (Node && ArgNo < Node->getNumOperands())
783 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
785 StringRef BaseTypeName;
786 Node = Func->getMetadata("kernel_arg_base_type");
787 if (Node && ArgNo < Node->getNumOperands())
788 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
791 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
792 Arg.hasNoAliasAttr()) {
793 AccQual = "read_only";
795 Node = Func->getMetadata("kernel_arg_access_qual");
796 if (Node && ArgNo < Node->getNumOperands())
797 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
801 Node = Func->getMetadata("kernel_arg_type_qual");
802 if (Node && ArgNo < Node->getNumOperands())
803 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
805 Type *Ty = Arg.getType();
806 const DataLayout &DL = Func->getParent()->getDataLayout();
808 unsigned PointeeAlign = 0;
809 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
810 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
811 PointeeAlign = Arg.getParamAlignment();
812 if (PointeeAlign == 0)
813 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
817 emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
818 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
819 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
823 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
824 StringRef ValueKind, unsigned &Offset,
825 msgpack::ArrayNode &Args,
826 unsigned PointeeAlign, StringRef Name,
828 StringRef BaseTypeName,
829 StringRef AccQual, StringRef TypeQual) {
830 auto ArgPtr = std::make_shared<msgpack::MapNode>();
834 Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name);
835 if (!TypeName.empty())
836 Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName);
837 auto Size = DL.getTypeAllocSize(Ty);
838 auto Align = DL.getABITypeAlignment(Ty);
839 Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size);
840 Offset = alignTo(Offset, Align);
841 Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset);
843 Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind);
845 std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName));
847 Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign);
849 if (auto PtrTy = dyn_cast<PointerType>(Ty))
850 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
851 Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier);
853 if (auto AQ = getAccessQualifier(AccQual))
854 Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ);
856 // TODO: Emit Arg[".actual_access"].
858 SmallVector<StringRef, 1> SplitTypeQuals;
859 TypeQual.split(SplitTypeQuals, " ", -1, false);
860 for (StringRef Key : SplitTypeQuals) {
862 Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true);
863 else if (Key == "restrict")
864 Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true);
865 else if (Key == "volatile")
866 Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true);
867 else if (Key == "pipe")
868 Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true);
871 Args.push_back(std::move(ArgPtr));
874 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
876 msgpack::ArrayNode &Args) {
877 int HiddenArgNumBytes =
878 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
880 if (!HiddenArgNumBytes)
883 auto &DL = Func.getParent()->getDataLayout();
884 auto Int64Ty = Type::getInt64Ty(Func.getContext());
886 if (HiddenArgNumBytes >= 8)
887 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
888 if (HiddenArgNumBytes >= 16)
889 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
890 if (HiddenArgNumBytes >= 24)
891 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
894 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
896 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
898 if (HiddenArgNumBytes >= 32) {
899 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
900 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
902 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
905 // Emit "default queue" and "completion action" arguments if enqueue kernel is
906 // used, otherwise emit dummy "none" arguments.
907 if (HiddenArgNumBytes >= 48) {
908 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
909 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
910 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
912 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
913 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
918 std::shared_ptr<msgpack::MapNode>
919 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
920 const SIProgramInfo &ProgramInfo) const {
921 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
922 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
923 const Function &F = MF.getFunction();
925 auto HSAKernelProps = std::make_shared<msgpack::MapNode>();
926 auto &Kern = *HSAKernelProps;
928 unsigned MaxKernArgAlign;
929 Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>(
930 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
931 Kern[".group_segment_fixed_size"] =
932 std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize);
933 Kern[".private_segment_fixed_size"] =
934 std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize);
935 Kern[".kernarg_segment_align"] =
936 std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign));
937 Kern[".wavefront_size"] =
938 std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize());
939 Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR);
940 Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR);
941 Kern[".max_flat_workgroup_size"] =
942 std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize());
943 Kern[".sgpr_spill_count"] =
944 std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs());
945 Kern[".vgpr_spill_count"] =
946 std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs());
948 return HSAKernelProps;
951 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
952 return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true);
955 void MetadataStreamerV3::begin(const Module &Mod) {
958 getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
961 void MetadataStreamerV3::end() {
962 std::string HSAMetadataString;
963 raw_string_ostream StrOS(HSAMetadataString);
964 yaml::Output YOut(StrOS);
965 YOut << HSAMetadataRoot;
969 if (VerifyHSAMetadata)
973 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
974 const SIProgramInfo &ProgramInfo) {
975 auto &Func = MF.getFunction();
976 auto KernelProps = getHSAKernelProps(MF, ProgramInfo);
978 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
979 Func.getCallingConv() == CallingConv::SPIR_KERNEL);
981 auto &KernelsNode = getRootMetadata("amdhsa.kernels");
982 auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get());
985 auto &Kern = *KernelProps;
986 Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName());
987 Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>(
988 (Twine(Func.getName()) + Twine(".kd")).str());
989 emitKernelLanguage(Func, Kern);
990 emitKernelAttrs(Func, Kern);
991 emitKernelArgs(Func, Kern);
994 Kernels->push_back(std::move(KernelProps));
997 } // end namespace HSAMD
998 } // end namespace AMDGPU
999 } // end namespace llvm