1 //===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \file These data structures comprise the "normalized" view of
12 /// mach-o object files. The normalized view is an in-memory only data structure
13 /// which is always in native endianness and pointer size.
15 /// The normalized view easily converts to and from YAML using YAML I/O.
17 /// The normalized view converts to and from binary mach-o object files using
18 /// the writeBinary() and readBinary() functions.
20 /// The normalized view converts to and from lld::Atoms using the
21 /// normalizedToAtoms() and normalizedFromAtoms().
23 /// Overall, the conversion paths available look like:
31 /// +------------+ +------+
32 /// | normalized | <-> | yaml |
33 /// +------------+ +------+
42 #ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
43 #define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
45 #include "lld/Core/Error.h"
46 #include "lld/Core/LLVM.h"
47 #include "lld/ReaderWriter/MachOLinkingContext.h"
48 #include "llvm/ADT/SmallString.h"
49 #include "llvm/ADT/StringRef.h"
50 #include "llvm/Support/Allocator.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/ErrorOr.h"
53 #include "llvm/Support/MachO.h"
54 #include "llvm/Support/YAMLTraits.h"
56 using llvm::BumpPtrAllocator;
57 using llvm::yaml::Hex64;
58 using llvm::yaml::Hex32;
59 using llvm::yaml::Hex16;
60 using llvm::yaml::Hex8;
61 using llvm::yaml::SequenceTraits;
62 using llvm::MachO::HeaderFileType;
63 using llvm::MachO::BindType;
64 using llvm::MachO::RebaseType;
65 using llvm::MachO::NListType;
66 using llvm::MachO::RelocationInfoType;
67 using llvm::MachO::SectionType;
68 using llvm::MachO::LoadCommandType;
69 using llvm::MachO::ExportSymbolKind;
70 using llvm::MachO::DataRegionType;
74 namespace normalized {
77 /// The real mach-o relocation record is 8-bytes on disk and is
78 /// encoded in one of two different bit-field patterns. This
79 /// normalized form has the union of all possible fields.
81 Relocation() : offset(0), scattered(false),
82 type(llvm::MachO::GENERIC_RELOC_VANILLA),
83 length(0), pcRel(false), isExtern(false), value(0),
88 RelocationInfoType type;
96 /// A typedef so that YAML I/O can treat this vector as a sequence.
97 typedef std::vector<Relocation> Relocations;
99 /// A typedef so that YAML I/O can process the raw bytes in a section.
100 typedef std::vector<Hex8> ContentBytes;
102 /// A typedef so that YAML I/O can treat indirect symbols as a flow sequence.
103 typedef std::vector<uint32_t> IndirectSymbols;
105 /// A typedef so that YAML I/O can encode/decode section attributes.
106 LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr)
108 /// A typedef so that YAML I/O can encode/decode section alignment.
109 LLVM_YAML_STRONG_TYPEDEF(uint16_t, SectionAlignment)
111 /// Mach-O has a 32-bit and 64-bit section record. This normalized form
112 /// can support either kind.
114 Section() : type(llvm::MachO::S_REGULAR),
115 attributes(0), alignment(1), address(0) { }
117 StringRef segmentName;
118 StringRef sectionName;
120 SectionAttr attributes;
121 SectionAlignment alignment;
123 ArrayRef<uint8_t> content;
124 Relocations relocations;
125 IndirectSymbols indirectSymbols;
128 raw_ostream& operator<<(raw_ostream &OS) const {
133 void dump(raw_ostream &OS = llvm::dbgs()) const;
138 /// A typedef so that YAML I/O can encode/decode the scope bits of an nlist.
139 LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope)
141 /// A typedef so that YAML I/O can encode/decode the desc bits of an nlist.
142 LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc)
144 /// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol
145 /// type and scope and mixed in the same n_type field. This normalized form
146 /// works for any pointer size and separates out the type and scope.
148 Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { }
158 /// Check whether the given section type indicates a zero-filled section.
159 // FIXME: Utility functions of this kind should probably be moved into
161 inline bool isZeroFillSection(SectionType T) {
162 return (T == llvm::MachO::S_ZEROFILL ||
163 T == llvm::MachO::S_THREAD_LOCAL_ZEROFILL);
166 /// A typedef so that YAML I/O can (de/en)code the protection bits of a segment.
167 LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect)
169 /// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz
170 LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion)
172 /// Segments are only used in normalized final linked images (not in relocatable
173 /// object files). They specify how a range of the file is loaded.
178 VMProtect init_access;
179 VMProtect max_access;
182 /// Only used in normalized final linked images to specify on which dylibs
184 struct DependentDylib {
186 LoadCommandType kind;
187 PackedVersion compatVersion;
188 PackedVersion currentVersion;
191 /// A normalized rebasing entry. Only used in normalized final linked images.
192 struct RebaseLocation {
198 /// A normalized binding entry. Only used in normalized final linked images.
199 struct BindLocation {
205 StringRef symbolName;
209 /// A typedef so that YAML I/O can encode/decode export flags.
210 LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags)
212 /// A normalized export entry. Only used in normalized final linked images.
216 ExportSymbolKind kind;
222 /// A normalized data-in-code entry.
230 /// A typedef so that YAML I/O can encode/decode mach_header.flags.
231 LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags)
234 struct NormalizedFile {
235 MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown;
236 HeaderFileType fileType = llvm::MachO::MH_OBJECT;
238 std::vector<Segment> segments; // Not used in object files.
239 std::vector<Section> sections;
241 // Symbols sorted by kind.
242 std::vector<Symbol> localSymbols;
243 std::vector<Symbol> globalSymbols;
244 std::vector<Symbol> undefinedSymbols;
246 // Maps to load commands with no LINKEDIT content (final linked images only).
247 std::vector<DependentDylib> dependentDylibs;
248 StringRef installName; // dylibs only
249 PackedVersion compatVersion = 0; // dylibs only
250 PackedVersion currentVersion = 0; // dylibs only
251 bool hasUUID = false;
252 bool hasMinVersionLoadCommand = false;
253 bool generateDataInCodeLoadCommand = false;
254 std::vector<StringRef> rpaths;
255 Hex64 entryAddress = 0;
257 MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown;
258 Hex64 sourceVersion = 0;
259 PackedVersion minOSverson = 0;
260 PackedVersion sdkVersion = 0;
261 LoadCommandType minOSVersionKind = (LoadCommandType)0;
263 // Maps to load commands with LINKEDIT content (final linked images only).
265 std::vector<RebaseLocation> rebasingInfo;
266 std::vector<BindLocation> bindingInfo;
267 std::vector<BindLocation> weakBindingInfo;
268 std::vector<BindLocation> lazyBindingInfo;
269 std::vector<Export> exportInfo;
270 std::vector<uint8_t> functionStarts;
271 std::vector<DataInCode> dataInCode;
278 // For any allocations in this struct which need to be owned by this struct.
279 BumpPtrAllocator ownedAllocations;
282 /// Tests if a file is a non-fat mach-o object file.
283 bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch);
285 /// If the buffer is a fat file with the request arch, then this function
286 /// returns true with 'offset' and 'size' set to location of the arch slice
287 /// within the buffer. Otherwise returns false;
288 bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch,
289 uint32_t &offset, uint32_t &size);
291 /// Reads a mach-o file and produces an in-memory normalized view.
292 llvm::Expected<std::unique_ptr<NormalizedFile>>
293 readBinary(std::unique_ptr<MemoryBuffer> &mb,
294 const MachOLinkingContext::Arch arch);
296 /// Takes in-memory normalized view and writes a mach-o object file.
297 llvm::Error writeBinary(const NormalizedFile &file, StringRef path);
299 size_t headerAndLoadCommandsSize(const NormalizedFile &file);
302 /// Parses a yaml encoded mach-o file to produce an in-memory normalized view.
303 llvm::Expected<std::unique_ptr<NormalizedFile>>
304 readYaml(std::unique_ptr<MemoryBuffer> &mb);
306 /// Writes a yaml encoded mach-o files given an in-memory normalized view.
307 std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out);
310 normalizedObjectToAtoms(MachOFile *file,
311 const NormalizedFile &normalizedFile,
315 normalizedDylibToAtoms(MachODylibFile *file,
316 const NormalizedFile &normalizedFile,
319 /// Takes in-memory normalized dylib or object and parses it into lld::File
320 llvm::Expected<std::unique_ptr<lld::File>>
321 normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
324 /// Takes atoms and generates a normalized macho-o view.
325 llvm::Expected<std::unique_ptr<NormalizedFile>>
326 normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt);
329 } // namespace normalized
331 /// Class for interfacing mach-o yaml files into generic yaml parsing
332 class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler {
334 MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch)
336 bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override;
338 const MachOLinkingContext::Arch _arch;
341 } // namespace mach_o
344 #endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H