1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file provides AMDGPU specific target streamer methods.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUTargetStreamer.h"
16 #include "SIDefines.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/BinaryFormat/ELF.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/Metadata.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCELFStreamer.h"
27 #include "llvm/MC/MCObjectFileInfo.h"
28 #include "llvm/MC/MCSectionELF.h"
29 #include "llvm/Support/FormattedStream.h"
32 #include "AMDGPUPTNote.h"
36 using namespace llvm::AMDGPU;
38 //===----------------------------------------------------------------------===//
39 // AMDGPUTargetStreamer
40 //===----------------------------------------------------------------------===//
46 // Radeon HD 2000/3000 Series (R600).
47 { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
48 { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
49 { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
50 { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
51 // Radeon HD 4000 Series (R700).
52 { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
53 { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
54 { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
55 // Radeon HD 5000 Series (Evergreen).
56 { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
57 { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
58 { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
59 { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
60 { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
61 // Radeon HD 6000 Series (Northern Islands).
62 { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
63 { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
64 { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
65 { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
67 { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
68 { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
69 { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
70 { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
71 { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
72 { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
73 { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
75 { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
76 { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
77 { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
78 { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
79 { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
80 { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
81 { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
82 { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
83 { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
84 { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
86 { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
87 { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
88 { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
89 { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
90 { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
91 { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
92 { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
93 { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
94 { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
95 { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
96 { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
98 { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
99 { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
100 { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
101 { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
102 // Not specified processor.
103 { nullptr, ELF::EF_AMDGPU_MACH_NONE }
106 unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
107 auto Entry = MachTable;
108 for (; Entry->Name && GPU != Entry->Name; ++Entry)
113 const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
114 auto Entry = MachTable;
115 for (; Entry->Name && Mach != Entry->Mach; ++Entry)
120 bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
121 HSAMD::Metadata HSAMetadata;
122 if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
125 return EmitHSAMetadata(HSAMetadata);
128 //===----------------------------------------------------------------------===//
129 // AMDGPUTargetAsmStreamer
130 //===----------------------------------------------------------------------===//
132 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
133 formatted_raw_ostream &OS)
134 : AMDGPUTargetStreamer(S), OS(OS) { }
136 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
137 OS << "\t.amdgcn_target \"" << Target << "\"\n";
140 void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
141 uint32_t Major, uint32_t Minor) {
142 OS << "\t.hsa_code_object_version " <<
143 Twine(Major) << "," << Twine(Minor) << '\n';
147 AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
150 StringRef VendorName,
151 StringRef ArchName) {
152 OS << "\t.hsa_code_object_isa " <<
153 Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
154 ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
159 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
160 OS << "\t.amd_kernel_code_t\n";
161 dumpAmdKernelCode(&Header, OS, "\t\t");
162 OS << "\t.end_amd_kernel_code_t\n";
165 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
168 default: llvm_unreachable("Invalid AMDGPU symbol type");
169 case ELF::STT_AMDGPU_HSA_KERNEL:
170 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
175 bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
176 OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n";
180 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
181 const AMDGPU::HSAMD::Metadata &HSAMetadata) {
182 std::string HSAMetadataString;
183 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
186 OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
187 OS << HSAMetadataString << '\n';
188 OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
192 bool AMDGPUTargetAsmStreamer::EmitPALMetadata(
193 const PALMD::Metadata &PALMetadata) {
194 std::string PALMetadataString;
195 if (PALMD::toString(PALMetadata, PALMetadataString))
198 OS << '\t' << PALMD::AssemblerDirective << PALMetadataString << '\n';
202 void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
203 const MCSubtargetInfo &STI, StringRef KernelName,
204 const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
205 bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
206 amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
208 IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
210 OS << "\t.amdhsa_kernel " << KernelName << '\n';
212 #define PRINT_IF_NOT_DEFAULT(STREAM, DIRECTIVE, KERNEL_DESC, \
213 DEFAULT_KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
214 if (AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) != \
215 AMDHSA_BITS_GET(DEFAULT_KERNEL_DESC.MEMBER_NAME, FIELD_NAME)) \
216 STREAM << "\t\t" << DIRECTIVE << " " \
217 << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
219 if (KD.group_segment_fixed_size != DefaultKD.group_segment_fixed_size)
220 OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
222 if (KD.private_segment_fixed_size != DefaultKD.private_segment_fixed_size)
223 OS << "\t\t.amdhsa_private_segment_fixed_size "
224 << KD.private_segment_fixed_size << '\n';
226 PRINT_IF_NOT_DEFAULT(
227 OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, DefaultKD,
228 kernel_code_properties,
229 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
230 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, DefaultKD,
231 kernel_code_properties,
232 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
233 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_queue_ptr", KD, DefaultKD,
234 kernel_code_properties,
235 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
236 PRINT_IF_NOT_DEFAULT(
237 OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, DefaultKD,
238 kernel_code_properties,
239 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
240 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_id", KD, DefaultKD,
241 kernel_code_properties,
242 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
243 PRINT_IF_NOT_DEFAULT(
244 OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, DefaultKD,
245 kernel_code_properties,
246 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
247 PRINT_IF_NOT_DEFAULT(
248 OS, ".amdhsa_user_sgpr_private_segment_size", KD, DefaultKD,
249 kernel_code_properties,
250 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
251 PRINT_IF_NOT_DEFAULT(
252 OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, DefaultKD,
254 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
255 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, DefaultKD,
257 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
258 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, DefaultKD,
260 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
261 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, DefaultKD,
263 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
264 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_info", KD, DefaultKD,
266 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
267 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_vgpr_workitem_id", KD, DefaultKD,
269 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
271 // These directives are required.
272 OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
273 OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
276 OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
277 if (IVersion.Major >= 7 && !ReserveFlatScr)
278 OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
279 if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
280 OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
282 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_32", KD, DefaultKD,
284 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
285 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_16_64", KD, DefaultKD,
287 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
288 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_32", KD, DefaultKD,
290 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
291 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_16_64", KD, DefaultKD,
293 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
294 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_dx10_clamp", KD, DefaultKD,
296 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
297 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_ieee_mode", KD, DefaultKD,
299 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
300 if (IVersion.Major >= 9)
301 PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_fp16_overflow", KD, DefaultKD,
303 amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
304 PRINT_IF_NOT_DEFAULT(
305 OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, DefaultKD,
307 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
308 PRINT_IF_NOT_DEFAULT(
309 OS, ".amdhsa_exception_fp_denorm_src", KD, DefaultKD, compute_pgm_rsrc2,
310 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
311 PRINT_IF_NOT_DEFAULT(
312 OS, ".amdhsa_exception_fp_ieee_div_zero", KD, DefaultKD,
314 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
315 PRINT_IF_NOT_DEFAULT(
316 OS, ".amdhsa_exception_fp_ieee_overflow", KD, DefaultKD,
318 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
319 PRINT_IF_NOT_DEFAULT(
320 OS, ".amdhsa_exception_fp_ieee_underflow", KD, DefaultKD,
322 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
323 PRINT_IF_NOT_DEFAULT(
324 OS, ".amdhsa_exception_fp_ieee_inexact", KD, DefaultKD, compute_pgm_rsrc2,
325 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
326 PRINT_IF_NOT_DEFAULT(
327 OS, ".amdhsa_exception_int_div_zero", KD, DefaultKD, compute_pgm_rsrc2,
328 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
329 #undef PRINT_IF_NOT_DEFAULT
331 OS << "\t.end_amdhsa_kernel\n";
334 //===----------------------------------------------------------------------===//
335 // AMDGPUTargetELFStreamer
336 //===----------------------------------------------------------------------===//
338 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
339 MCStreamer &S, const MCSubtargetInfo &STI)
340 : AMDGPUTargetStreamer(S), Streamer(S) {
341 MCAssembler &MCA = getStreamer().getAssembler();
342 unsigned EFlags = MCA.getELFHeaderEFlags();
344 EFlags &= ~ELF::EF_AMDGPU_MACH;
345 EFlags |= getMACH(STI.getCPU());
347 EFlags &= ~ELF::EF_AMDGPU_XNACK;
348 if (AMDGPU::hasXNACK(STI))
349 EFlags |= ELF::EF_AMDGPU_XNACK;
351 MCA.setELFHeaderEFlags(EFlags);
354 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
355 return static_cast<MCELFStreamer &>(Streamer);
358 void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
359 const MCExpr *DescSZ, unsigned NoteType,
360 function_ref<void(MCELFStreamer &)> EmitDesc) {
361 auto &S = getStreamer();
362 auto &Context = S.getContext();
364 auto NameSZ = sizeof(ElfNote::NoteName);
367 S.SwitchSection(Context.getELFSection(
368 ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
369 S.EmitIntValue(NameSZ, 4); // namesz
370 S.EmitValue(DescSZ, 4); // descz
371 S.EmitIntValue(NoteType, 4); // type
372 S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name
373 S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
375 S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
379 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
381 void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
382 uint32_t Major, uint32_t Minor) {
385 MCConstantExpr::create(8, getContext()),
386 ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
387 [&](MCELFStreamer &OS){
388 OS.EmitIntValue(Major, 4);
389 OS.EmitIntValue(Minor, 4);
395 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
398 StringRef VendorName,
399 StringRef ArchName) {
400 uint16_t VendorNameSize = VendorName.size() + 1;
401 uint16_t ArchNameSize = ArchName.size() + 1;
403 unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
404 sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
405 VendorNameSize + ArchNameSize;
408 MCConstantExpr::create(DescSZ, getContext()),
409 ElfNote::NT_AMDGPU_HSA_ISA,
410 [&](MCELFStreamer &OS) {
411 OS.EmitIntValue(VendorNameSize, 2);
412 OS.EmitIntValue(ArchNameSize, 2);
413 OS.EmitIntValue(Major, 4);
414 OS.EmitIntValue(Minor, 4);
415 OS.EmitIntValue(Stepping, 4);
416 OS.EmitBytes(VendorName);
417 OS.EmitIntValue(0, 1); // NULL terminate VendorName
418 OS.EmitBytes(ArchName);
419 OS.EmitIntValue(0, 1); // NULL terminte ArchName
425 AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
427 MCStreamer &OS = getStreamer();
429 OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
433 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
435 MCSymbolELF *Symbol = cast<MCSymbolELF>(
436 getStreamer().getContext().getOrCreateSymbol(SymbolName));
437 Symbol->setType(Type);
440 bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
441 // Create two labels to mark the beginning and end of the desc field
442 // and a MCExpr to calculate the size of the desc field.
443 auto &Context = getContext();
444 auto *DescBegin = Context.createTempSymbol();
445 auto *DescEnd = Context.createTempSymbol();
446 auto *DescSZ = MCBinaryExpr::createSub(
447 MCSymbolRefExpr::create(DescEnd, Context),
448 MCSymbolRefExpr::create(DescBegin, Context), Context);
452 ELF::NT_AMD_AMDGPU_ISA,
453 [&](MCELFStreamer &OS) {
454 OS.EmitLabel(DescBegin);
455 OS.EmitBytes(IsaVersionString);
456 OS.EmitLabel(DescEnd);
462 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
463 const AMDGPU::HSAMD::Metadata &HSAMetadata) {
464 std::string HSAMetadataString;
465 if (HSAMD::toString(HSAMetadata, HSAMetadataString))
468 // Create two labels to mark the beginning and end of the desc field
469 // and a MCExpr to calculate the size of the desc field.
470 auto &Context = getContext();
471 auto *DescBegin = Context.createTempSymbol();
472 auto *DescEnd = Context.createTempSymbol();
473 auto *DescSZ = MCBinaryExpr::createSub(
474 MCSymbolRefExpr::create(DescEnd, Context),
475 MCSymbolRefExpr::create(DescBegin, Context), Context);
479 ELF::NT_AMD_AMDGPU_HSA_METADATA,
480 [&](MCELFStreamer &OS) {
481 OS.EmitLabel(DescBegin);
482 OS.EmitBytes(HSAMetadataString);
483 OS.EmitLabel(DescEnd);
489 bool AMDGPUTargetELFStreamer::EmitPALMetadata(
490 const PALMD::Metadata &PALMetadata) {
492 MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()),
493 ELF::NT_AMD_AMDGPU_PAL_METADATA,
494 [&](MCELFStreamer &OS){
495 for (auto I : PALMetadata)
496 OS.EmitIntValue(I, sizeof(uint32_t));
502 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
503 const MCSubtargetInfo &STI, StringRef KernelName,
504 const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
505 uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
507 auto &Streamer = getStreamer();
508 auto &Context = Streamer.getContext();
510 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
511 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
512 KernelDescriptorSymbol->setBinding(ELF::STB_GLOBAL);
513 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
514 KernelDescriptorSymbol->setSize(
515 MCConstantExpr::create(sizeof(KernelDescriptor), Context));
517 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
518 Context.getOrCreateSymbol(Twine(KernelName)));
519 KernelCodeSymbol->setBinding(ELF::STB_LOCAL);
521 Streamer.EmitLabel(KernelDescriptorSymbol);
522 Streamer.EmitBytes(StringRef(
523 (const char*)&(KernelDescriptor),
524 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)));
525 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
526 // expression being created is:
527 // (start of kernel code) - (start of kernel descriptor)
528 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
529 Streamer.EmitValue(MCBinaryExpr::createSub(
530 MCSymbolRefExpr::create(
531 KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
532 MCSymbolRefExpr::create(
533 KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
535 sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
536 Streamer.EmitBytes(StringRef(
537 (const char*)&(KernelDescriptor) +
538 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) +
539 sizeof(KernelDescriptor.kernel_code_entry_byte_offset),
540 sizeof(KernelDescriptor) -
541 offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) -
542 sizeof(KernelDescriptor.kernel_code_entry_byte_offset)));