1 //===- lib/MC/WasmObjectWriter.cpp - Wasm File Writer ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements Wasm object file writer information.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/BinaryFormat/Wasm.h"
17 #include "llvm/MC/MCAsmBackend.h"
18 #include "llvm/MC/MCAsmInfo.h"
19 #include "llvm/MC/MCAsmLayout.h"
20 #include "llvm/MC/MCAssembler.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCObjectFileInfo.h"
25 #include "llvm/MC/MCObjectWriter.h"
26 #include "llvm/MC/MCSectionWasm.h"
27 #include "llvm/MC/MCSymbolWasm.h"
28 #include "llvm/MC/MCValue.h"
29 #include "llvm/MC/MCWasmObjectWriter.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/LEB128.h"
34 #include "llvm/Support/StringSaver.h"
39 #define DEBUG_TYPE "mc"
43 // For patching purposes, we need to remember where each section starts, both
44 // for patching up the section size field, and for patching up references to
45 // locations within the section.
46 struct SectionBookkeeping {
47 // Where the size of the section is written.
49 // Where the contents of the section starts (after the header).
50 uint64_t ContentsOffset;
53 // The signature of a wasm function, in a struct capable of being used as a
55 struct WasmFunctionType {
56 // Support empty and tombstone instances, needed by DenseMap.
57 enum { Plain, Empty, Tombstone } State;
59 // The return types of the function.
60 SmallVector<wasm::ValType, 1> Returns;
62 // The parameter types of the function.
63 SmallVector<wasm::ValType, 4> Params;
65 WasmFunctionType() : State(Plain) {}
67 bool operator==(const WasmFunctionType &Other) const {
68 return State == Other.State && Returns == Other.Returns &&
69 Params == Other.Params;
73 // Traits for using WasmFunctionType in a DenseMap.
74 struct WasmFunctionTypeDenseMapInfo {
75 static WasmFunctionType getEmptyKey() {
76 WasmFunctionType FuncTy;
77 FuncTy.State = WasmFunctionType::Empty;
80 static WasmFunctionType getTombstoneKey() {
81 WasmFunctionType FuncTy;
82 FuncTy.State = WasmFunctionType::Tombstone;
85 static unsigned getHashValue(const WasmFunctionType &FuncTy) {
86 uintptr_t Value = FuncTy.State;
87 for (wasm::ValType Ret : FuncTy.Returns)
88 Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Ret));
89 for (wasm::ValType Param : FuncTy.Params)
90 Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Param));
93 static bool isEqual(const WasmFunctionType &LHS,
94 const WasmFunctionType &RHS) {
99 // A wasm import to be written into the import section.
101 StringRef ModuleName;
107 // A wasm function to be written into the function section.
108 struct WasmFunction {
110 const MCSymbolWasm *Sym;
113 // A wasm export to be written into the export section.
120 // A wasm global to be written into the global section.
125 uint64_t InitialValue;
126 uint32_t ImportIndex;
129 // Information about a single relocation.
130 struct WasmRelocationEntry {
131 uint64_t Offset; // Where is the relocation.
132 const MCSymbolWasm *Symbol; // The symbol to relocate with.
133 int64_t Addend; // A value to add to the symbol.
134 unsigned Type; // The type of the relocation.
135 const MCSectionWasm *FixupSection;// The section the relocation is targeting.
137 WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol,
138 int64_t Addend, unsigned Type,
139 const MCSectionWasm *FixupSection)
140 : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type),
141 FixupSection(FixupSection) {}
143 bool hasAddend() const {
145 case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB:
146 case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
147 case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
154 void print(raw_ostream &Out) const {
155 Out << "Off=" << Offset << ", Sym=" << *Symbol << ", Addend=" << Addend
156 << ", Type=" << Type << ", FixupSection=" << FixupSection;
159 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
160 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
165 raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) {
171 class WasmObjectWriter : public MCObjectWriter {
172 /// Helper struct for containing some precomputed information on symbols.
173 struct WasmSymbolData {
174 const MCSymbolWasm *Symbol;
177 // Support lexicographic sorting.
178 bool operator<(const WasmSymbolData &RHS) const { return Name < RHS.Name; }
181 /// The target specific Wasm writer instance.
182 std::unique_ptr<MCWasmObjectTargetWriter> TargetObjectWriter;
184 // Relocations for fixing up references in the code section.
185 std::vector<WasmRelocationEntry> CodeRelocations;
187 // Relocations for fixing up references in the data section.
188 std::vector<WasmRelocationEntry> DataRelocations;
190 // Index values to use for fixing up call_indirect type indices.
191 // Maps function symbols to the index of the type of the function
192 DenseMap<const MCSymbolWasm *, uint32_t> TypeIndices;
193 // Maps function symbols to the table element index space. Used
194 // for TABLE_INDEX relocation types (i.e. address taken functions).
195 DenseMap<const MCSymbolWasm *, uint32_t> IndirectSymbolIndices;
196 // Maps function/global symbols to the function/global index space.
197 DenseMap<const MCSymbolWasm *, uint32_t> SymbolIndices;
199 DenseMap<WasmFunctionType, int32_t, WasmFunctionTypeDenseMapInfo>
201 SmallVector<WasmFunctionType, 4> FunctionTypes;
203 // TargetObjectWriter wrappers.
204 bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
205 unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup) const {
206 return TargetObjectWriter->getRelocType(Target, Fixup);
209 void startSection(SectionBookkeeping &Section, unsigned SectionId,
210 const char *Name = nullptr);
211 void endSection(SectionBookkeeping &Section);
214 WasmObjectWriter(MCWasmObjectTargetWriter *MOTW, raw_pwrite_stream &OS)
215 : MCObjectWriter(OS, /*IsLittleEndian=*/true), TargetObjectWriter(MOTW) {}
218 ~WasmObjectWriter() override;
220 void reset() override {
221 CodeRelocations.clear();
222 DataRelocations.clear();
224 SymbolIndices.clear();
225 IndirectSymbolIndices.clear();
226 FunctionTypeIndices.clear();
227 FunctionTypes.clear();
228 MCObjectWriter::reset();
231 void writeHeader(const MCAssembler &Asm);
233 void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
234 const MCFragment *Fragment, const MCFixup &Fixup,
235 MCValue Target, uint64_t &FixedValue) override;
237 void executePostLayoutBinding(MCAssembler &Asm,
238 const MCAsmLayout &Layout) override;
240 void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
242 void writeString(const StringRef Str) {
243 encodeULEB128(Str.size(), getStream());
247 void writeValueType(wasm::ValType Ty) {
248 encodeSLEB128(int32_t(Ty), getStream());
251 void writeTypeSection(const SmallVector<WasmFunctionType, 4> &FunctionTypes);
252 void writeImportSection(const SmallVector<WasmImport, 4> &Imports);
253 void writeFunctionSection(const SmallVector<WasmFunction, 4> &Functions);
254 void writeTableSection(uint32_t NumElements);
255 void writeMemorySection(const SmallVector<char, 0> &DataBytes);
256 void writeGlobalSection(const SmallVector<WasmGlobal, 4> &Globals);
257 void writeExportSection(const SmallVector<WasmExport, 4> &Exports);
258 void writeElemSection(const SmallVector<uint32_t, 4> &TableElems);
259 void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
260 const SmallVector<WasmFunction, 4> &Functions);
262 writeDataSection(const SmallVector<char, 0> &DataBytes);
263 void writeNameSection(const SmallVector<WasmFunction, 4> &Functions,
264 const SmallVector<WasmImport, 4> &Imports,
265 uint32_t NumFuncImports);
266 void writeCodeRelocSection();
267 void writeDataRelocSection(uint64_t DataSectionHeaderSize);
268 void writeLinkingMetaDataSection(uint32_t DataSize, uint32_t DataAlignment,
269 ArrayRef<StringRef> WeakSymbols,
270 bool HasStackPointer,
271 uint32_t StackPointerGlobal);
273 void applyRelocations(ArrayRef<WasmRelocationEntry> Relocations,
274 uint64_t ContentsOffset);
276 void writeRelocations(ArrayRef<WasmRelocationEntry> Relocations,
277 uint64_t HeaderSize);
278 uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry);
279 uint32_t getFunctionType(const MCSymbolWasm& Symbol);
280 uint32_t registerFunctionType(const MCSymbolWasm& Symbol);
283 } // end anonymous namespace
285 WasmObjectWriter::~WasmObjectWriter() {}
287 // Return the padding size to write a 32-bit value into a 5-byte ULEB128.
288 static unsigned PaddingFor5ByteULEB128(uint32_t X) {
289 return X == 0 ? 4 : (4u - (31u - countLeadingZeros(X)) / 7u);
292 // Return the padding size to write a 32-bit value into a 5-byte SLEB128.
293 static unsigned PaddingFor5ByteSLEB128(int32_t X) {
294 return 5 - getSLEB128Size(X);
297 // Write out a section header and a patchable section size field.
298 void WasmObjectWriter::startSection(SectionBookkeeping &Section,
301 assert((Name != nullptr) == (SectionId == wasm::WASM_SEC_CUSTOM) &&
302 "Only custom sections can have names");
304 DEBUG(dbgs() << "startSection " << SectionId << ": " << Name << "\n");
305 encodeULEB128(SectionId, getStream());
307 Section.SizeOffset = getStream().tell();
309 // The section size. We don't know the size yet, so reserve enough space
310 // for any 32-bit value; we'll patch it later.
311 encodeULEB128(UINT32_MAX, getStream());
313 // The position where the section starts, for measuring its size.
314 Section.ContentsOffset = getStream().tell();
316 // Custom sections in wasm also have a string identifier.
317 if (SectionId == wasm::WASM_SEC_CUSTOM) {
319 writeString(StringRef(Name));
323 // Now that the section is complete and we know how big it is, patch up the
324 // section size field at the start of the section.
325 void WasmObjectWriter::endSection(SectionBookkeeping &Section) {
326 uint64_t Size = getStream().tell() - Section.ContentsOffset;
327 if (uint32_t(Size) != Size)
328 report_fatal_error("section size does not fit in a uint32_t");
330 DEBUG(dbgs() << "endSection size=" << Size << "\n");
331 unsigned Padding = PaddingFor5ByteULEB128(Size);
333 // Write the final section size to the payload_len field, which follows
334 // the section id byte.
336 unsigned SizeLen = encodeULEB128(Size, Buffer, Padding);
337 assert(SizeLen == 5);
338 getStream().pwrite((char *)Buffer, SizeLen, Section.SizeOffset);
341 // Emit the Wasm header.
342 void WasmObjectWriter::writeHeader(const MCAssembler &Asm) {
343 writeBytes(StringRef(wasm::WasmMagic, sizeof(wasm::WasmMagic)));
344 writeLE32(wasm::WasmVersion);
347 void WasmObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
348 const MCAsmLayout &Layout) {
351 void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
352 const MCAsmLayout &Layout,
353 const MCFragment *Fragment,
354 const MCFixup &Fixup, MCValue Target,
355 uint64_t &FixedValue) {
356 MCAsmBackend &Backend = Asm.getBackend();
357 bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags &
358 MCFixupKindInfo::FKF_IsPCRel;
359 const auto &FixupSection = cast<MCSectionWasm>(*Fragment->getParent());
360 uint64_t C = Target.getConstant();
361 uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
362 MCContext &Ctx = Asm.getContext();
364 if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
365 assert(RefB->getKind() == MCSymbolRefExpr::VK_None &&
366 "Should not have constructed this");
368 // Let A, B and C being the components of Target and R be the location of
369 // the fixup. If the fixup is not pcrel, we want to compute (A - B + C).
370 // If it is pcrel, we want to compute (A - B + C - R).
372 // In general, Wasm has no relocations for -B. It can only represent (A + C)
373 // or (A + C - R). If B = R + K and the relocation is not pcrel, we can
374 // replace B to implement it: (A - R - K + C)
378 "No relocation available to represent this relative expression");
382 const auto &SymB = cast<MCSymbolWasm>(RefB->getSymbol());
384 if (SymB.isUndefined()) {
385 Ctx.reportError(Fixup.getLoc(),
386 Twine("symbol '") + SymB.getName() +
387 "' can not be undefined in a subtraction expression");
391 assert(!SymB.isAbsolute() && "Should have been folded");
392 const MCSection &SecB = SymB.getSection();
393 if (&SecB != &FixupSection) {
394 Ctx.reportError(Fixup.getLoc(),
395 "Cannot represent a difference across sections");
399 uint64_t SymBOffset = Layout.getSymbolOffset(SymB);
400 uint64_t K = SymBOffset - FixupOffset;
405 // We either rejected the fixup or folded B into C at this point.
406 const MCSymbolRefExpr *RefA = Target.getSymA();
407 const auto *SymA = RefA ? cast<MCSymbolWasm>(&RefA->getSymbol()) : nullptr;
409 if (SymA && SymA->isVariable()) {
410 const MCExpr *Expr = SymA->getVariableValue();
411 const auto *Inner = cast<MCSymbolRefExpr>(Expr);
412 if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF)
413 llvm_unreachable("weakref used in reloc not yet implemented");
416 // Put any constant offset in an addend. Offsets can be negative, and
417 // LLVM expects wrapping, in contrast to wasm's immediates which can't
418 // be negative and don't wrap.
422 SymA->setUsedInReloc();
427 unsigned Type = getRelocType(Target, Fixup);
429 WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection);
430 DEBUG(dbgs() << "WasmReloc: " << Rec << "\n");
432 if (FixupSection.hasInstructions())
433 CodeRelocations.push_back(Rec);
435 DataRelocations.push_back(Rec);
438 // Write X as an (unsigned) LEB value at offset Offset in Stream, padded
439 // to allow patching.
441 WritePatchableLEB(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
443 unsigned Padding = PaddingFor5ByteULEB128(X);
444 unsigned SizeLen = encodeULEB128(X, Buffer, Padding);
445 assert(SizeLen == 5);
446 Stream.pwrite((char *)Buffer, SizeLen, Offset);
449 // Write X as an signed LEB value at offset Offset in Stream, padded
450 // to allow patching.
452 WritePatchableSLEB(raw_pwrite_stream &Stream, int32_t X, uint64_t Offset) {
454 unsigned Padding = PaddingFor5ByteSLEB128(X);
455 unsigned SizeLen = encodeSLEB128(X, Buffer, Padding);
456 assert(SizeLen == 5);
457 Stream.pwrite((char *)Buffer, SizeLen, Offset);
460 // Write X as a plain integer value at offset Offset in Stream.
461 static void WriteI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
463 support::endian::write32le(Buffer, X);
464 Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
467 // Compute a value to write into the code at the location covered
468 // by RelEntry. This value isn't used by the static linker, since
469 // we have addends; it just serves to make the code more readable
470 // and to make standalone wasm modules directly usable.
471 static uint32_t ProvisionalValue(const WasmRelocationEntry &RelEntry) {
472 const MCSymbolWasm *Sym = RelEntry.Symbol;
474 // For undefined symbols, use a hopefully invalid value.
475 if (!Sym->isDefined(/*SetUsed=*/false))
478 const auto &Section = cast<MCSectionWasm>(RelEntry.Symbol->getSection(false));
479 uint64_t Address = Section.getSectionOffset() + RelEntry.Addend;
481 // Ignore overflow. LLVM allows address arithmetic to silently wrap.
482 uint32_t Value = Address;
487 uint32_t WasmObjectWriter::getRelocationIndexValue(
488 const WasmRelocationEntry &RelEntry) {
489 switch (RelEntry.Type) {
490 case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
491 case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
492 if (!IndirectSymbolIndices.count(RelEntry.Symbol))
493 report_fatal_error("symbol not found table index space: " +
494 RelEntry.Symbol->getName());
495 return IndirectSymbolIndices[RelEntry.Symbol];
496 case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
497 case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
498 case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB:
499 case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
500 case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
501 if (!SymbolIndices.count(RelEntry.Symbol))
502 report_fatal_error("symbol not found function/global index space: " +
503 RelEntry.Symbol->getName());
504 return SymbolIndices[RelEntry.Symbol];
505 case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
506 if (!TypeIndices.count(RelEntry.Symbol))
507 report_fatal_error("symbol not found in type index space: " +
508 RelEntry.Symbol->getName());
509 return TypeIndices[RelEntry.Symbol];
511 llvm_unreachable("invalid relocation type");
515 // Apply the portions of the relocation records that we can handle ourselves
517 void WasmObjectWriter::applyRelocations(
518 ArrayRef<WasmRelocationEntry> Relocations, uint64_t ContentsOffset) {
519 raw_pwrite_stream &Stream = getStream();
520 for (const WasmRelocationEntry &RelEntry : Relocations) {
521 uint64_t Offset = ContentsOffset +
522 RelEntry.FixupSection->getSectionOffset() +
525 DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n");
526 switch (RelEntry.Type) {
527 case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
528 case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
529 case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
530 case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: {
531 uint32_t Index = getRelocationIndexValue(RelEntry);
532 WritePatchableSLEB(Stream, Index, Offset);
535 case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: {
536 uint32_t Index = getRelocationIndexValue(RelEntry);
537 WriteI32(Stream, Index, Offset);
540 case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: {
541 uint32_t Value = ProvisionalValue(RelEntry);
542 WritePatchableSLEB(Stream, Value, Offset);
545 case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: {
546 uint32_t Value = ProvisionalValue(RelEntry);
547 WritePatchableLEB(Stream, Value, Offset);
550 case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: {
551 uint32_t Value = ProvisionalValue(RelEntry);
552 WriteI32(Stream, Value, Offset);
556 llvm_unreachable("invalid relocation type");
561 // Write out the portions of the relocation records that the linker will
563 void WasmObjectWriter::writeRelocations(
564 ArrayRef<WasmRelocationEntry> Relocations, uint64_t HeaderSize) {
565 raw_pwrite_stream &Stream = getStream();
566 for (const WasmRelocationEntry& RelEntry : Relocations) {
568 uint64_t Offset = RelEntry.Offset +
569 RelEntry.FixupSection->getSectionOffset() + HeaderSize;
570 uint32_t Index = getRelocationIndexValue(RelEntry);
572 encodeULEB128(RelEntry.Type, Stream);
573 encodeULEB128(Offset, Stream);
574 encodeULEB128(Index, Stream);
575 if (RelEntry.hasAddend())
576 encodeSLEB128(RelEntry.Addend, Stream);
580 void WasmObjectWriter::writeTypeSection(
581 const SmallVector<WasmFunctionType, 4> &FunctionTypes) {
582 if (FunctionTypes.empty())
585 SectionBookkeeping Section;
586 startSection(Section, wasm::WASM_SEC_TYPE);
588 encodeULEB128(FunctionTypes.size(), getStream());
590 for (const WasmFunctionType &FuncTy : FunctionTypes) {
591 encodeSLEB128(wasm::WASM_TYPE_FUNC, getStream());
592 encodeULEB128(FuncTy.Params.size(), getStream());
593 for (wasm::ValType Ty : FuncTy.Params)
595 encodeULEB128(FuncTy.Returns.size(), getStream());
596 for (wasm::ValType Ty : FuncTy.Returns)
604 void WasmObjectWriter::writeImportSection(
605 const SmallVector<WasmImport, 4> &Imports) {
609 SectionBookkeeping Section;
610 startSection(Section, wasm::WASM_SEC_IMPORT);
612 encodeULEB128(Imports.size(), getStream());
613 for (const WasmImport &Import : Imports) {
614 writeString(Import.ModuleName);
615 writeString(Import.FieldName);
617 encodeULEB128(Import.Kind, getStream());
619 switch (Import.Kind) {
620 case wasm::WASM_EXTERNAL_FUNCTION:
621 encodeULEB128(Import.Type, getStream());
623 case wasm::WASM_EXTERNAL_GLOBAL:
624 encodeSLEB128(int32_t(Import.Type), getStream());
625 encodeULEB128(0, getStream()); // mutability
628 llvm_unreachable("unsupported import kind");
635 void WasmObjectWriter::writeFunctionSection(
636 const SmallVector<WasmFunction, 4> &Functions) {
637 if (Functions.empty())
640 SectionBookkeeping Section;
641 startSection(Section, wasm::WASM_SEC_FUNCTION);
643 encodeULEB128(Functions.size(), getStream());
644 for (const WasmFunction &Func : Functions)
645 encodeULEB128(Func.Type, getStream());
650 void WasmObjectWriter::writeTableSection(uint32_t NumElements) {
651 // For now, always emit the table section, since indirect calls are not
652 // valid without it. In the future, we could perhaps be more clever and omit
653 // it if there are no indirect calls.
655 SectionBookkeeping Section;
656 startSection(Section, wasm::WASM_SEC_TABLE);
658 encodeULEB128(1, getStream()); // The number of tables.
659 // Fixed to 1 for now.
660 encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream()); // Type of table
661 encodeULEB128(0, getStream()); // flags
662 encodeULEB128(NumElements, getStream()); // initial
667 void WasmObjectWriter::writeMemorySection(
668 const SmallVector<char, 0> &DataBytes) {
669 // For now, always emit the memory section, since loads and stores are not
670 // valid without it. In the future, we could perhaps be more clever and omit
671 // it if there are no loads or stores.
672 SectionBookkeeping Section;
674 (DataBytes.size() + wasm::WasmPageSize - 1) / wasm::WasmPageSize;
676 startSection(Section, wasm::WASM_SEC_MEMORY);
677 encodeULEB128(1, getStream()); // number of memory spaces
679 encodeULEB128(0, getStream()); // flags
680 encodeULEB128(NumPages, getStream()); // initial
685 void WasmObjectWriter::writeGlobalSection(
686 const SmallVector<WasmGlobal, 4> &Globals) {
690 SectionBookkeeping Section;
691 startSection(Section, wasm::WASM_SEC_GLOBAL);
693 encodeULEB128(Globals.size(), getStream());
694 for (const WasmGlobal &Global : Globals) {
695 writeValueType(Global.Type);
696 write8(Global.IsMutable);
698 if (Global.HasImport) {
699 assert(Global.InitialValue == 0);
700 write8(wasm::WASM_OPCODE_GET_GLOBAL);
701 encodeULEB128(Global.ImportIndex, getStream());
703 assert(Global.ImportIndex == 0);
704 write8(wasm::WASM_OPCODE_I32_CONST);
705 encodeSLEB128(Global.InitialValue, getStream()); // offset
707 write8(wasm::WASM_OPCODE_END);
713 void WasmObjectWriter::writeExportSection(
714 const SmallVector<WasmExport, 4> &Exports) {
718 SectionBookkeeping Section;
719 startSection(Section, wasm::WASM_SEC_EXPORT);
721 encodeULEB128(Exports.size(), getStream());
722 for (const WasmExport &Export : Exports) {
723 writeString(Export.FieldName);
724 encodeSLEB128(Export.Kind, getStream());
725 encodeULEB128(Export.Index, getStream());
731 void WasmObjectWriter::writeElemSection(
732 const SmallVector<uint32_t, 4> &TableElems) {
733 if (TableElems.empty())
736 SectionBookkeeping Section;
737 startSection(Section, wasm::WASM_SEC_ELEM);
739 encodeULEB128(1, getStream()); // number of "segments"
740 encodeULEB128(0, getStream()); // the table index
742 // init expr for starting offset
743 write8(wasm::WASM_OPCODE_I32_CONST);
744 encodeSLEB128(0, getStream());
745 write8(wasm::WASM_OPCODE_END);
747 encodeULEB128(TableElems.size(), getStream());
748 for (uint32_t Elem : TableElems)
749 encodeULEB128(Elem, getStream());
754 void WasmObjectWriter::writeCodeSection(
755 const MCAssembler &Asm, const MCAsmLayout &Layout,
756 const SmallVector<WasmFunction, 4> &Functions) {
757 if (Functions.empty())
760 SectionBookkeeping Section;
761 startSection(Section, wasm::WASM_SEC_CODE);
763 encodeULEB128(Functions.size(), getStream());
765 for (const WasmFunction &Func : Functions) {
766 auto &FuncSection = static_cast<MCSectionWasm &>(Func.Sym->getSection());
769 if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout))
770 report_fatal_error(".size expression must be evaluatable");
772 encodeULEB128(Size, getStream());
774 FuncSection.setSectionOffset(getStream().tell() - Section.ContentsOffset);
776 Asm.writeSectionData(&FuncSection, Layout);
780 applyRelocations(CodeRelocations, Section.ContentsOffset);
785 uint64_t WasmObjectWriter::writeDataSection(
786 const SmallVector<char, 0> &DataBytes) {
787 if (DataBytes.empty())
790 SectionBookkeeping Section;
791 startSection(Section, wasm::WASM_SEC_DATA);
793 encodeULEB128(1, getStream()); // count
794 encodeULEB128(0, getStream()); // memory index
795 write8(wasm::WASM_OPCODE_I32_CONST);
796 encodeSLEB128(0, getStream()); // offset
797 write8(wasm::WASM_OPCODE_END);
798 encodeULEB128(DataBytes.size(), getStream()); // size
799 uint32_t HeaderSize = getStream().tell() - Section.ContentsOffset;
800 writeBytes(DataBytes); // data
803 applyRelocations(DataRelocations, Section.ContentsOffset + HeaderSize);
809 void WasmObjectWriter::writeNameSection(
810 const SmallVector<WasmFunction, 4> &Functions,
811 const SmallVector<WasmImport, 4> &Imports,
812 unsigned NumFuncImports) {
813 uint32_t TotalFunctions = NumFuncImports + Functions.size();
814 if (TotalFunctions == 0)
817 SectionBookkeeping Section;
818 startSection(Section, wasm::WASM_SEC_CUSTOM, "name");
819 SectionBookkeeping SubSection;
820 startSection(SubSection, wasm::WASM_NAMES_FUNCTION);
822 encodeULEB128(TotalFunctions, getStream());
824 for (const WasmImport &Import : Imports) {
825 if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) {
826 encodeULEB128(Index, getStream());
827 writeString(Import.FieldName);
831 for (const WasmFunction &Func : Functions) {
832 encodeULEB128(Index, getStream());
833 writeString(Func.Sym->getName());
837 endSection(SubSection);
841 void WasmObjectWriter::writeCodeRelocSection() {
842 // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
843 // for descriptions of the reloc sections.
845 if (CodeRelocations.empty())
848 SectionBookkeeping Section;
849 startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE");
851 encodeULEB128(wasm::WASM_SEC_CODE, getStream());
852 encodeULEB128(CodeRelocations.size(), getStream());
854 writeRelocations(CodeRelocations, 0);
859 void WasmObjectWriter::writeDataRelocSection(uint64_t DataSectionHeaderSize) {
860 // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
861 // for descriptions of the reloc sections.
863 if (DataRelocations.empty())
866 SectionBookkeeping Section;
867 startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.DATA");
869 encodeULEB128(wasm::WASM_SEC_DATA, getStream());
870 encodeULEB128(DataRelocations.size(), getStream());
872 writeRelocations(DataRelocations, DataSectionHeaderSize);
877 void WasmObjectWriter::writeLinkingMetaDataSection(
878 uint32_t DataSize, uint32_t DataAlignment, ArrayRef<StringRef> WeakSymbols,
879 bool HasStackPointer, uint32_t StackPointerGlobal) {
880 SectionBookkeeping Section;
881 startSection(Section, wasm::WASM_SEC_CUSTOM, "linking");
882 SectionBookkeeping SubSection;
884 if (HasStackPointer) {
885 startSection(SubSection, wasm::WASM_STACK_POINTER);
886 encodeULEB128(StackPointerGlobal, getStream()); // id
887 endSection(SubSection);
890 if (WeakSymbols.size() != 0) {
891 startSection(SubSection, wasm::WASM_SYMBOL_INFO);
892 encodeULEB128(WeakSymbols.size(), getStream());
893 for (const StringRef Export: WeakSymbols) {
895 encodeULEB128(wasm::WASM_SYMBOL_FLAG_WEAK, getStream());
897 endSection(SubSection);
901 startSection(SubSection, wasm::WASM_DATA_SIZE);
902 encodeULEB128(DataSize, getStream());
903 endSection(SubSection);
905 startSection(SubSection, wasm::WASM_DATA_ALIGNMENT);
906 encodeULEB128(DataAlignment, getStream());
907 endSection(SubSection);
913 uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm& Symbol) {
914 assert(Symbol.isFunction());
915 assert(TypeIndices.count(&Symbol));
916 return TypeIndices[&Symbol];
919 uint32_t WasmObjectWriter::registerFunctionType(const MCSymbolWasm& Symbol) {
920 assert(Symbol.isFunction());
923 if (Symbol.isVariable()) {
924 const MCExpr *Expr = Symbol.getVariableValue();
925 auto *Inner = cast<MCSymbolRefExpr>(Expr);
926 const auto *ResolvedSym = cast<MCSymbolWasm>(&Inner->getSymbol());
927 F.Returns = ResolvedSym->getReturns();
928 F.Params = ResolvedSym->getParams();
930 F.Returns = Symbol.getReturns();
931 F.Params = Symbol.getParams();
935 FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size()));
937 FunctionTypes.push_back(F);
938 TypeIndices[&Symbol] = Pair.first->second;
940 DEBUG(dbgs() << "registerFunctionType: " << Symbol << " new:" << Pair.second << "\n");
941 DEBUG(dbgs() << " -> type index: " << Pair.first->second << "\n");
942 return Pair.first->second;
945 void WasmObjectWriter::writeObject(MCAssembler &Asm,
946 const MCAsmLayout &Layout) {
947 DEBUG(dbgs() << "WasmObjectWriter::writeObject\n");
948 MCContext &Ctx = Asm.getContext();
949 wasm::ValType PtrType = is64Bit() ? wasm::ValType::I64 : wasm::ValType::I32;
951 // Collect information from the available symbols.
952 SmallVector<WasmFunction, 4> Functions;
953 SmallVector<uint32_t, 4> TableElems;
954 SmallVector<WasmGlobal, 4> Globals;
955 SmallVector<WasmImport, 4> Imports;
956 SmallVector<WasmExport, 4> Exports;
957 SmallVector<StringRef, 4> WeakSymbols;
958 SmallPtrSet<const MCSymbolWasm *, 4> IsAddressTaken;
959 unsigned NumFuncImports = 0;
960 unsigned NumGlobalImports = 0;
961 SmallVector<char, 0> DataBytes;
962 uint32_t DataAlignment = 1;
963 uint32_t StackPointerGlobal = 0;
964 bool HasStackPointer = false;
966 // Populate the IsAddressTaken set.
967 for (const WasmRelocationEntry &RelEntry : CodeRelocations) {
968 switch (RelEntry.Type) {
969 case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
970 case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
971 IsAddressTaken.insert(RelEntry.Symbol);
977 for (const WasmRelocationEntry &RelEntry : DataRelocations) {
978 switch (RelEntry.Type) {
979 case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
980 case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
981 IsAddressTaken.insert(RelEntry.Symbol);
988 // Populate the Imports set.
989 for (const MCSymbol &S : Asm.symbols()) {
990 const auto &WS = static_cast<const MCSymbolWasm &>(S);
992 if (WS.isTemporary())
996 registerFunctionType(WS);
998 // If the symbol is not defined in this translation unit, import it.
999 if (!WS.isDefined(/*SetUsed=*/false) || WS.isVariable()) {
1001 Import.ModuleName = WS.getModuleName();
1002 Import.FieldName = WS.getName();
1004 if (WS.isFunction()) {
1005 Import.Kind = wasm::WASM_EXTERNAL_FUNCTION;
1006 Import.Type = getFunctionType(WS);
1007 SymbolIndices[&WS] = NumFuncImports;
1010 Import.Kind = wasm::WASM_EXTERNAL_GLOBAL;
1011 Import.Type = int32_t(PtrType);
1012 SymbolIndices[&WS] = NumGlobalImports;
1016 Imports.push_back(Import);
1020 // In the special .global_variables section, we've encoded global
1021 // variables used by the function. Translate them into the Globals
1023 MCSectionWasm *GlobalVars = Ctx.getWasmSection(".global_variables", 0, 0);
1024 if (!GlobalVars->getFragmentList().empty()) {
1025 if (GlobalVars->getFragmentList().size() != 1)
1026 report_fatal_error("only one .global_variables fragment supported");
1027 const MCFragment &Frag = *GlobalVars->begin();
1028 if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data)
1029 report_fatal_error("only data supported in .global_variables");
1030 const auto &DataFrag = cast<MCDataFragment>(Frag);
1031 if (!DataFrag.getFixups().empty())
1032 report_fatal_error("fixups not supported in .global_variables");
1033 const SmallVectorImpl<char> &Contents = DataFrag.getContents();
1034 for (const uint8_t *p = (const uint8_t *)Contents.data(),
1035 *end = (const uint8_t *)Contents.data() + Contents.size();
1039 report_fatal_error("truncated global variable encoding");
1040 G.Type = wasm::ValType(int8_t(*p++));
1041 G.IsMutable = bool(*p++);
1042 G.HasImport = bool(*p++);
1047 Import.ModuleName = (const char *)p;
1048 const uint8_t *nul = (const uint8_t *)memchr(p, '\0', end - p);
1050 report_fatal_error("global module name must be nul-terminated");
1052 nul = (const uint8_t *)memchr(p, '\0', end - p);
1054 report_fatal_error("global base name must be nul-terminated");
1055 Import.FieldName = (const char *)p;
1058 Import.Kind = wasm::WASM_EXTERNAL_GLOBAL;
1059 Import.Type = int32_t(G.Type);
1061 G.ImportIndex = NumGlobalImports;
1064 Imports.push_back(Import);
1067 G.InitialValue = decodeSLEB128(p, &n);
1069 if ((ptrdiff_t)n > end - p)
1070 report_fatal_error("global initial value must be valid SLEB128");
1073 Globals.push_back(G);
1077 // In the special .stack_pointer section, we've encoded the stack pointer
1079 MCSectionWasm *StackPtr = Ctx.getWasmSection(".stack_pointer", 0, 0);
1080 if (!StackPtr->getFragmentList().empty()) {
1081 if (StackPtr->getFragmentList().size() != 1)
1082 report_fatal_error("only one .stack_pointer fragment supported");
1083 const MCFragment &Frag = *StackPtr->begin();
1084 if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data)
1085 report_fatal_error("only data supported in .stack_pointer");
1086 const auto &DataFrag = cast<MCDataFragment>(Frag);
1087 if (!DataFrag.getFixups().empty())
1088 report_fatal_error("fixups not supported in .stack_pointer");
1089 const SmallVectorImpl<char> &Contents = DataFrag.getContents();
1090 if (Contents.size() != 4)
1091 report_fatal_error("only one entry supported in .stack_pointer");
1092 HasStackPointer = true;
1093 StackPointerGlobal = NumGlobalImports + *(const int32_t *)Contents.data();
1096 // Handle regular defined and undefined symbols.
1097 for (const MCSymbol &S : Asm.symbols()) {
1098 // Ignore unnamed temporary symbols, which aren't ever exported, imported,
1099 // or used in relocations.
1100 if (S.isTemporary() && S.getName().empty())
1103 const auto &WS = static_cast<const MCSymbolWasm &>(S);
1104 DEBUG(dbgs() << "MCSymbol: '" << S << "'"
1105 << " isDefined=" << S.isDefined() << " isExternal="
1106 << S.isExternal() << " isTemporary=" << S.isTemporary()
1107 << " isFunction=" << WS.isFunction()
1108 << " isWeak=" << WS.isWeak()
1109 << " isVariable=" << WS.isVariable() << "\n");
1112 WeakSymbols.push_back(WS.getName());
1114 if (WS.isVariable())
1119 if (WS.isFunction()) {
1120 if (WS.isDefined(/*SetUsed=*/false)) {
1121 if (WS.getOffset() != 0)
1123 "function sections must contain one function each");
1125 if (WS.getSize() == 0)
1127 "function symbols must have a size set with .size");
1129 // A definition. Take the next available index.
1130 Index = NumFuncImports + Functions.size();
1132 // Prepare the function.
1134 Func.Type = getFunctionType(WS);
1136 SymbolIndices[&WS] = Index;
1137 Functions.push_back(Func);
1139 // An import; the index was assigned above.
1140 Index = SymbolIndices.find(&WS)->second;
1143 DEBUG(dbgs() << " -> function index: " << Index << "\n");
1145 // If needed, prepare the function to be called indirectly.
1146 if (IsAddressTaken.count(&WS) != 0) {
1147 IndirectSymbolIndices[&WS] = TableElems.size();
1148 DEBUG(dbgs() << " -> adding to table: " << TableElems.size() << "\n");
1149 TableElems.push_back(Index);
1152 if (WS.isTemporary() && !WS.getSize())
1155 if (!WS.isDefined(/*SetUsed=*/false))
1158 if (WS.getOffset() != 0)
1159 report_fatal_error("data sections must contain one variable each: " +
1162 report_fatal_error("data symbols must have a size set with .size: " +
1166 if (!WS.getSize()->evaluateAsAbsolute(Size, Layout))
1167 report_fatal_error(".size expression must be evaluatable");
1169 auto &DataSection = static_cast<MCSectionWasm &>(WS.getSection());
1171 if (uint64_t(Size) != Layout.getSectionFileSize(&DataSection))
1172 report_fatal_error("data sections must contain at most one variable");
1174 DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment()));
1175 DataAlignment = std::max(DataAlignment, DataSection.getAlignment());
1177 DataSection.setSectionOffset(DataBytes.size());
1179 for (const MCFragment &Frag : DataSection) {
1180 if (Frag.hasInstructions())
1181 report_fatal_error("only data supported in data sections");
1183 if (auto *Align = dyn_cast<MCAlignFragment>(&Frag)) {
1184 if (Align->getValueSize() != 1)
1185 report_fatal_error("only byte values supported for alignment");
1186 // If nops are requested, use zeros, as this is the data section.
1187 uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue();
1188 uint64_t Size = std::min<uint64_t>(alignTo(DataBytes.size(),
1189 Align->getAlignment()),
1191 Align->getMaxBytesToEmit());
1192 DataBytes.resize(Size, Value);
1193 } else if (auto *Fill = dyn_cast<MCFillFragment>(&Frag)) {
1194 DataBytes.insert(DataBytes.end(), Fill->getSize(), Fill->getValue());
1196 const auto &DataFrag = cast<MCDataFragment>(Frag);
1197 const SmallVectorImpl<char> &Contents = DataFrag.getContents();
1199 DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end());
1203 // For each global, prepare a corresponding wasm global holding its
1204 // address. For externals these will also be named exports.
1205 Index = NumGlobalImports + Globals.size();
1208 Global.Type = PtrType;
1209 Global.IsMutable = false;
1210 Global.HasImport = false;
1211 Global.InitialValue = DataSection.getSectionOffset();
1212 Global.ImportIndex = 0;
1213 SymbolIndices[&WS] = Index;
1214 DEBUG(dbgs() << " -> global index: " << Index << "\n");
1215 Globals.push_back(Global);
1218 // If the symbol is visible outside this translation unit, export it.
1219 if ((WS.isExternal() && WS.isDefined(/*SetUsed=*/false))) {
1221 Export.FieldName = WS.getName();
1222 Export.Index = Index;
1223 if (WS.isFunction())
1224 Export.Kind = wasm::WASM_EXTERNAL_FUNCTION;
1226 Export.Kind = wasm::WASM_EXTERNAL_GLOBAL;
1227 DEBUG(dbgs() << " -> export " << Exports.size() << "\n");
1228 Exports.push_back(Export);
1232 // Handle weak aliases. We need to process these in a separate pass because
1233 // we need to have processed the target of the alias before the alias itself
1234 // and the symbols are not necessarily ordered in this way.
1235 for (const MCSymbol &S : Asm.symbols()) {
1236 if (!S.isVariable())
1238 assert(S.isDefined(/*SetUsed=*/false));
1240 const auto &WS = static_cast<const MCSymbolWasm &>(S);
1241 // Find the target symbol of this weak alias and export that index
1242 const MCExpr *Expr = WS.getVariableValue();
1243 auto *Inner = cast<MCSymbolRefExpr>(Expr);
1244 const auto *ResolvedSym = cast<MCSymbolWasm>(&Inner->getSymbol());
1245 DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *ResolvedSym << "'\n");
1246 assert(SymbolIndices.count(ResolvedSym) > 0);
1247 uint32_t Index = SymbolIndices.find(ResolvedSym)->second;
1248 DEBUG(dbgs() << " -> index:" << Index << "\n");
1251 Export.FieldName = WS.getName();
1252 Export.Index = Index;
1253 if (WS.isFunction())
1254 Export.Kind = wasm::WASM_EXTERNAL_FUNCTION;
1256 Export.Kind = wasm::WASM_EXTERNAL_GLOBAL;
1257 DEBUG(dbgs() << " -> export " << Exports.size() << "\n");
1258 Exports.push_back(Export);
1261 // Add types for indirect function calls.
1262 for (const WasmRelocationEntry &Fixup : CodeRelocations) {
1263 if (Fixup.Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB)
1266 registerFunctionType(*Fixup.Symbol);
1269 // Write out the Wasm header.
1272 writeTypeSection(FunctionTypes);
1273 writeImportSection(Imports);
1274 writeFunctionSection(Functions);
1275 writeTableSection(TableElems.size());
1276 writeMemorySection(DataBytes);
1277 writeGlobalSection(Globals);
1278 writeExportSection(Exports);
1279 // TODO: Start Section
1280 writeElemSection(TableElems);
1281 writeCodeSection(Asm, Layout, Functions);
1282 uint64_t DataSectionHeaderSize = writeDataSection(DataBytes);
1283 writeNameSection(Functions, Imports, NumFuncImports);
1284 writeCodeRelocSection();
1285 writeDataRelocSection(DataSectionHeaderSize);
1286 writeLinkingMetaDataSection(DataBytes.size(), DataAlignment, WeakSymbols, HasStackPointer, StackPointerGlobal);
1288 // TODO: Translate the .comment section to the output.
1289 // TODO: Translate debug sections to the output.
1292 MCObjectWriter *llvm::createWasmObjectWriter(MCWasmObjectTargetWriter *MOTW,
1293 raw_pwrite_stream &OS) {
1294 return new WasmObjectWriter(MOTW, OS);