1 //===-- ObjectFile.h --------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef liblldb_ObjectFile_h_
10 #define liblldb_ObjectFile_h_
12 #include "lldb/Core/FileSpecList.h"
13 #include "lldb/Core/ModuleChild.h"
14 #include "lldb/Core/PluginInterface.h"
15 #include "lldb/Symbol/Symtab.h"
16 #include "lldb/Symbol/UnwindTable.h"
17 #include "lldb/Utility/DataExtractor.h"
18 #include "lldb/Utility/Endian.h"
19 #include "lldb/Utility/FileSpec.h"
20 #include "lldb/Utility/UUID.h"
21 #include "lldb/lldb-private.h"
22 #include "llvm/Support/VersionTuple.h"
24 namespace lldb_private {
26 class ObjectFileJITDelegate {
28 ObjectFileJITDelegate() {}
30 virtual ~ObjectFileJITDelegate() {}
32 virtual lldb::ByteOrder GetByteOrder() const = 0;
34 virtual uint32_t GetAddressByteSize() const = 0;
36 virtual void PopulateSymtab(lldb_private::ObjectFile *obj_file,
37 lldb_private::Symtab &symtab) = 0;
39 virtual void PopulateSectionList(lldb_private::ObjectFile *obj_file,
40 lldb_private::SectionList §ion_list) = 0;
42 virtual ArchSpec GetArchitecture() = 0;
45 /// \class ObjectFile ObjectFile.h "lldb/Symbol/ObjectFile.h"
46 /// A plug-in interface definition class for object file parsers.
48 /// Object files belong to Module objects and know how to extract information
49 /// from executable, shared library, and object (.o) files used by operating
50 /// system runtime. The symbol table and section list for an object file.
52 /// Object files can be represented by the entire file, or by part of a file.
53 /// An example of a partial file ObjectFile is one that contains information
54 /// for one of multiple architectures in the same file.
56 /// Once an architecture is selected the object file information can be
57 /// extracted from this abstract class.
58 class ObjectFile : public std::enable_shared_from_this<ObjectFile>,
59 public PluginInterface,
61 friend class lldb_private::Module;
66 eTypeCoreFile, /// A core file that has a checkpoint of a program's
68 eTypeExecutable, /// A normal executable
69 eTypeDebugInfo, /// An object file that contains only debug information
70 eTypeDynamicLinker, /// The platform's dynamic linker executable
71 eTypeObjectFile, /// An intermediate object file
72 eTypeSharedLibrary, /// A shared library that can be used during execution
73 eTypeStubLibrary, /// A library that can be linked against but not used for
75 eTypeJIT, /// JIT code that has symbols, sections and possibly debug info
90 llvm::ArrayRef<uint8_t> Contents;
93 /// Construct with a parent module, offset, and header data.
95 /// Object files belong to modules and a valid module must be supplied upon
96 /// construction. The at an offset within a file for objects that contain
97 /// more than one architecture or object.
98 ObjectFile(const lldb::ModuleSP &module_sp, const FileSpec *file_spec_ptr,
99 lldb::offset_t file_offset, lldb::offset_t length,
100 const lldb::DataBufferSP &data_sp, lldb::offset_t data_offset);
102 ObjectFile(const lldb::ModuleSP &module_sp, const lldb::ProcessSP &process_sp,
103 lldb::addr_t header_addr, lldb::DataBufferSP &data_sp);
107 /// The destructor is virtual since this class is designed to be inherited
108 /// from by the plug-in instance.
109 ~ObjectFile() override;
111 /// Dump a description of this object to a Stream.
113 /// Dump a description of the current contents of this object to the
114 /// supplied stream \a s. The dumping should include the section list if it
115 /// has been parsed, and the symbol table if it has been parsed.
118 /// The stream to which to dump the object description.
119 virtual void Dump(Stream *s) = 0;
121 /// Find a ObjectFile plug-in that can parse \a file_spec.
123 /// Scans all loaded plug-in interfaces that implement versions of the
124 /// ObjectFile plug-in interface and returns the first instance that can
127 /// \param[in] module
128 /// The parent module that owns this object file.
130 /// \param[in] file_spec
131 /// A file specification that indicates which file to use as the
134 /// \param[in] file_offset
135 /// The offset into the file at which to start parsing the
136 /// object. This is for files that contain multiple
137 /// architectures or objects.
139 /// \param[in] file_size
140 /// The size of the current object file if it can be determined
141 /// or if it is known. This can be zero.
143 /// \see ObjectFile::ParseHeader()
144 static lldb::ObjectFileSP
145 FindPlugin(const lldb::ModuleSP &module_sp, const FileSpec *file_spec,
146 lldb::offset_t file_offset, lldb::offset_t file_size,
147 lldb::DataBufferSP &data_sp, lldb::offset_t &data_offset);
149 /// Find a ObjectFile plug-in that can parse a file in memory.
151 /// Scans all loaded plug-in interfaces that implement versions of the
152 /// ObjectFile plug-in interface and returns the first instance that can
155 /// \param[in] module
156 /// The parent module that owns this object file.
158 /// \param[in] process_sp
159 /// A shared pointer to the process whose memory space contains
160 /// an object file. This will be stored as a std::weak_ptr.
162 /// \param[in] header_addr
163 /// The address of the header for the object file in memory.
164 static lldb::ObjectFileSP FindPlugin(const lldb::ModuleSP &module_sp,
165 const lldb::ProcessSP &process_sp,
166 lldb::addr_t header_addr,
167 lldb::DataBufferSP &file_data_sp);
169 static size_t GetModuleSpecifications(const FileSpec &file,
170 lldb::offset_t file_offset,
171 lldb::offset_t file_size,
172 ModuleSpecList &specs);
174 static size_t GetModuleSpecifications(const lldb_private::FileSpec &file,
175 lldb::DataBufferSP &data_sp,
176 lldb::offset_t data_offset,
177 lldb::offset_t file_offset,
178 lldb::offset_t file_size,
179 lldb_private::ModuleSpecList &specs);
180 /// Split a path into a file path with object name.
182 /// For paths like "/tmp/foo.a(bar.o)" we often need to split a path up into
183 /// the actual path name and into the object name so we can make a valid
184 /// object file from it.
186 /// \param[in] path_with_object
187 /// A path that might contain an archive path with a .o file
188 /// specified in parens in the basename of the path.
190 /// \param[out] archive_file
191 /// If \b true is returned, \a file_spec will be filled in with
192 /// the path to the archive.
194 /// \param[out] archive_object
195 /// If \b true is returned, \a object will be filled in with
196 /// the name of the object inside the archive.
199 /// \b true if the path matches the pattern of archive + object
200 /// and \a archive_file and \a archive_object are modified,
201 /// \b false otherwise and \a archive_file and \a archive_object
202 /// are guaranteed to be remain unchanged.
203 static bool SplitArchivePathWithObject(
204 const char *path_with_object, lldb_private::FileSpec &archive_file,
205 lldb_private::ConstString &archive_object, bool must_exist);
207 /// Gets the address size in bytes for the current object file.
210 /// The size of an address in bytes for the currently selected
211 /// architecture (and object for archives). Returns zero if no
212 /// architecture or object has been selected.
213 virtual uint32_t GetAddressByteSize() const = 0;
215 /// Get the address type given a file address in an object file.
217 /// Many binary file formats know what kinds This is primarily for ARM
218 /// binaries, though it can be applied to any executable file format that
219 /// supports different opcode types within the same binary. ARM binaries
220 /// support having both ARM and Thumb within the same executable container.
221 /// We need to be able to get \return
222 /// The size of an address in bytes for the currently selected
223 /// architecture (and object for archives). Returns zero if no
224 /// architecture or object has been selected.
225 virtual AddressClass GetAddressClass(lldb::addr_t file_addr);
227 /// Extract the dependent modules from an object file.
229 /// If an object file has information about which other images it depends on
230 /// (such as shared libraries), this function will provide the list. Since
231 /// many executables or shared libraries may depend on the same files,
232 /// FileSpecList::AppendIfUnique(const FileSpec &) should be used to make
233 /// sure any files that are added are not already in the list.
235 /// \param[out] file_list
236 /// A list of file specification objects that gets dependent
237 /// files appended to.
240 /// The number of new files that were appended to \a file_list.
242 /// \see FileSpecList::AppendIfUnique(const FileSpec &)
243 virtual uint32_t GetDependentModules(FileSpecList &file_list) = 0;
245 /// Tells whether this object file is capable of being the main executable
249 /// \b true if it is, \b false otherwise.
250 virtual bool IsExecutable() const = 0;
252 /// Returns the offset into a file at which this object resides.
254 /// Some files contain many object files, and this function allows access to
255 /// an object's offset within the file.
258 /// The offset in bytes into the file. Defaults to zero for
259 /// simple object files that a represented by an entire file.
260 virtual lldb::addr_t GetFileOffset() const { return m_file_offset; }
262 virtual lldb::addr_t GetByteSize() const { return m_length; }
264 /// Get accessor to the object file specification.
267 /// The file specification object pointer if there is one, or
268 /// NULL if this object is only from memory.
269 virtual FileSpec &GetFileSpec() { return m_file; }
271 /// Get const accessor to the object file specification.
274 /// The const file specification object pointer if there is one,
275 /// or NULL if this object is only from memory.
276 virtual const FileSpec &GetFileSpec() const { return m_file; }
278 /// Get the ArchSpec for this object file.
281 /// The ArchSpec of this object file. In case of error, an invalid
282 /// ArchSpec object is returned.
283 virtual ArchSpec GetArchitecture() = 0;
285 /// Gets the section list for the currently selected architecture (and
286 /// object for archives).
288 /// Section list parsing can be deferred by ObjectFile instances until this
289 /// accessor is called the first time.
292 /// The list of sections contained in this object file.
293 virtual SectionList *GetSectionList(bool update_module_section_list = true);
295 virtual void CreateSections(SectionList &unified_section_list) = 0;
297 /// Notify the ObjectFile that the file addresses in the Sections for this
298 /// module have been changed.
299 virtual void SectionFileAddressesChanged() {}
301 /// Gets the symbol table for the currently selected architecture (and
302 /// object for archives).
304 /// Symbol table parsing can be deferred by ObjectFile instances until this
305 /// accessor is called the first time.
308 /// The symbol table for this object file.
309 virtual Symtab *GetSymtab() = 0;
311 /// Perform relocations on the section if necessary.
313 virtual void RelocateSection(lldb_private::Section *section);
315 /// Appends a Symbol for the specified so_addr to the symbol table.
317 /// If verify_unique is false, the symbol table is not searched to determine
318 /// if a Symbol found at this address has already been added to the symbol
319 /// table. When verify_unique is true, this method resolves the Symbol as
320 /// the first match in the SymbolTable and appends a Symbol only if
324 /// The resolved symbol or nullptr. Returns nullptr if a
325 /// a Symbol could not be found for the specified so_addr.
326 virtual Symbol *ResolveSymbolForAddress(const Address &so_addr,
327 bool verify_unique) {
328 // Typically overridden to lazily add stripped symbols recoverable from the
329 // exception handling unwind information (i.e. without parsing the entire
332 // The availability of LC_FUNCTION_STARTS allows ObjectFileMachO to
333 // efficiently add stripped symbols when the symbol table is first
334 // constructed. Poorer cousins are PECoff and ELF.
338 /// Detect if this object file has been stripped of local symbols.
339 /// Detect if this object file has been stripped of local symbols.
342 /// Return \b true if the object file has been stripped of local
344 virtual bool IsStripped() = 0;
346 /// Frees the symbol table.
348 /// This function should only be used when an object file is
351 /// eSymtabFromUnifiedSectionList: Whether to clear symbol table
352 /// for unified module section list, or object file.
355 /// The symbol table for this object file.
356 virtual void ClearSymtab();
358 /// Gets the UUID for this object file.
360 /// If the object file format contains a UUID, the value should be returned.
361 /// Else ObjectFile instances should return the MD5 checksum of all of the
362 /// bytes for the object file (or memory for memory based object files).
365 /// The object file's UUID. In case of an error, an empty UUID is
367 virtual UUID GetUUID() = 0;
369 /// Gets the symbol file spec list for this object file.
371 /// If the object file format contains a debug symbol file link, the values
372 /// will be returned in the FileSpecList.
375 /// Returns filespeclist.
376 virtual lldb_private::FileSpecList GetDebugSymbolFilePaths() {
377 return FileSpecList();
380 /// Gets the file spec list of libraries re-exported by this object file.
382 /// If the object file format has the notion of one library re-exporting the
383 /// symbols from another, the re-exported libraries will be returned in the
387 /// Returns filespeclist.
388 virtual lldb_private::FileSpecList GetReExportedLibraries() {
389 return FileSpecList();
392 /// Sets the load address for an entire module, assuming a rigid slide of
393 /// sections, if possible in the implementation.
396 /// Returns true iff any section's load address changed.
397 virtual bool SetLoadAddress(Target &target, lldb::addr_t value,
398 bool value_is_offset) {
402 /// Gets whether endian swapping should occur when extracting data from this
406 /// Returns \b true if endian swapping is needed, \b false
408 virtual lldb::ByteOrder GetByteOrder() const = 0;
410 /// Attempts to parse the object header.
412 /// This function is used as a test to see if a given plug-in instance can
413 /// parse the header data already contained in ObjectFile::m_data. If an
414 /// object file parser does not recognize that magic bytes in a header,
415 /// false should be returned and the next plug-in can attempt to parse an
419 /// Returns \b true if the header was parsed successfully, \b
421 virtual bool ParseHeader() = 0;
423 /// Returns if the function bounds for symbols in this symbol file are
426 /// The unwinder can emulate the instructions of functions to understand
427 /// prologue/epilogue code sequences, where registers are spilled on the
428 /// stack, etc. This feature relies on having the correct start addresses
429 /// of all functions. If the ObjectFile has a way to tell that symbols have
430 /// been stripped and there's no way to reconstruct start addresses (e.g.
431 /// LC_FUNCTION_STARTS on Mach-O, or eh_frame unwind info), the ObjectFile
432 /// should indicate that assembly emulation should not be used for this
435 /// It is uncommon for this to return false. An ObjectFile needs to be sure
436 /// that symbol start addresses are unavailable before false is returned.
437 /// If it is unclear, this should return true.
440 /// Returns true if assembly emulation should be used for this
442 /// Only returns false if the ObjectFile is sure that symbol
443 /// addresses are insufficient for accurate assembly emulation.
444 virtual bool AllowAssemblyEmulationUnwindPlans() { return true; }
446 /// Similar to Process::GetImageInfoAddress().
448 /// Some platforms embed auxiliary structures useful to debuggers in the
449 /// address space of the inferior process. This method returns the address
450 /// of such a structure if the information can be resolved via entries in
451 /// the object file. ELF, for example, provides a means to hook into the
452 /// runtime linker so that a debugger may monitor the loading and unloading
453 /// of shared libraries.
456 /// The address of any auxiliary tables, or an invalid address if this
457 /// object file format does not support or contain such information.
458 virtual lldb_private::Address GetImageInfoAddress(Target *target) {
462 /// Returns the address of the Entry Point in this object file - if the
463 /// object file doesn't have an entry point (because it is not an executable
464 /// file) then an invalid address is returned.
467 /// Returns the entry address for this module.
468 virtual lldb_private::Address GetEntryPointAddress() { return Address(); }
470 /// Returns base address of this object file.
472 /// This also sometimes referred to as the "preferred load address" or the
473 /// "image base address". Addresses within object files are often expressed
474 /// relative to this base. If this address corresponds to a specific section
475 /// (usually the first byte of the first section) then the returned address
476 /// will have this section set. Otherwise, the address will just have the
477 /// offset member filled in, indicating that this represents a file address.
478 virtual lldb_private::Address GetBaseAddress() {
479 return Address(m_memory_addr);
482 virtual uint32_t GetNumThreadContexts() { return 0; }
484 /// Some object files may have an identifier string embedded in them, e.g.
485 /// in a Mach-O core file using the LC_IDENT load command (which is
486 /// obsolete, but can still be found in some old files)
489 /// Returns the identifier string if one exists, else an empty
491 virtual std::string GetIdentifierString () {
492 return std::string();
495 /// When the ObjectFile is a core file, lldb needs to locate the "binary" in
496 /// the core file. lldb can iterate over the pages looking for a valid
497 /// binary, but some core files may have metadata describing where the main
498 /// binary is exactly which removes ambiguity when there are multiple
499 /// binaries present in the captured memory pages.
501 /// \param[out] address
502 /// If the address of the binary is specified, this will be set.
503 /// This is an address is the virtual address space of the core file
504 /// memory segments; it is not an offset into the object file.
505 /// If no address is available, will be set to LLDB_INVALID_ADDRESS.
508 /// If the uuid of the binary is specified, this will be set.
509 /// If no UUID is available, will be cleared.
512 /// Returns true if either address or uuid has been set.
513 virtual bool GetCorefileMainBinaryInfo (lldb::addr_t &address, UUID &uuid) {
514 address = LLDB_INVALID_ADDRESS;
519 virtual lldb::RegisterContextSP
520 GetThreadContextAtIndex(uint32_t idx, lldb_private::Thread &thread) {
521 return lldb::RegisterContextSP();
524 /// The object file should be able to calculate its type by looking at its
525 /// file header and possibly the sections or other data in the object file.
526 /// The file type is used in the debugger to help select the correct plug-
527 /// ins for the job at hand, so this is important to get right. If any
528 /// eTypeXXX definitions do not match up with the type of file you are
529 /// loading, please feel free to add a new enumeration value.
532 /// The calculated file type for the current object file.
533 virtual Type CalculateType() = 0;
535 /// In cases where the type can't be calculated (elf files), this routine
536 /// allows someone to explicitly set it. As an example, SymbolVendorELF uses
537 /// this routine to set eTypeDebugInfo when loading debug link files.
538 virtual void SetType(Type type) { m_type = type; }
540 /// The object file should be able to calculate the strata of the object
543 /// Many object files for platforms might be for either user space debugging
544 /// or for kernel debugging. If your object file subclass can figure this
545 /// out, it will help with debugger plug-in selection when it comes time to
549 /// The calculated object file strata for the current object
551 virtual Strata CalculateStrata() = 0;
553 /// Get the object file version numbers.
555 /// Many object files have a set of version numbers that describe the
556 /// version of the executable or shared library. Typically there are major,
557 /// minor and build, but there may be more. This function will extract the
558 /// versions from object files if they are available.
561 /// This function returns extracted version numbers as a
562 /// llvm::VersionTuple. In case of error an empty VersionTuple is
564 virtual llvm::VersionTuple GetVersion() { return llvm::VersionTuple(); }
566 /// Get the minimum OS version this object file can run on.
568 /// Some object files have information that specifies the minimum OS version
569 /// that they can be used on.
572 /// This function returns extracted version numbers as a
573 /// llvm::VersionTuple. In case of error an empty VersionTuple is
575 virtual llvm::VersionTuple GetMinimumOSVersion() {
576 return llvm::VersionTuple();
579 /// Get the SDK OS version this object file was built with.
582 /// This function returns extracted version numbers as a
583 /// llvm::VersionTuple. In case of error an empty VersionTuple is
585 virtual llvm::VersionTuple GetSDKVersion() { return llvm::VersionTuple(); }
587 /// Return true if this file is a dynamic link editor (dyld)
589 /// Often times dyld has symbols that mirror symbols in libc and other
590 /// shared libraries (like "malloc" and "free") and the user does _not_ want
591 /// to stop in these shared libraries by default. We can ask the ObjectFile
592 /// if it is such a file and should be avoided for things like settings
593 /// breakpoints and doing function lookups for expressions.
594 virtual bool GetIsDynamicLinkEditor() { return false; }
598 if (m_type == eTypeInvalid)
599 m_type = CalculateType();
604 if (m_strata == eStrataInvalid)
605 m_strata = CalculateStrata();
609 // When an object file is in memory, subclasses should try and lock the
610 // process weak pointer. If the process weak pointer produces a valid
611 // ProcessSP, then subclasses can call this function to read memory.
612 static lldb::DataBufferSP ReadMemory(const lldb::ProcessSP &process_sp,
613 lldb::addr_t addr, size_t byte_size);
615 // This function returns raw file contents. Do not use it if you want
616 // transparent decompression of section contents.
617 size_t GetData(lldb::offset_t offset, size_t length,
618 DataExtractor &data) const;
620 // This function returns raw file contents. Do not use it if you want
621 // transparent decompression of section contents.
622 size_t CopyData(lldb::offset_t offset, size_t length, void *dst) const;
624 // This function will transparently decompress section data if the section if
626 virtual size_t ReadSectionData(Section *section,
627 lldb::offset_t section_offset, void *dst,
630 // This function will transparently decompress section data if the section if
631 // compressed. Note that for compressed section the resulting data size may
632 // be larger than what Section::GetFileSize reports.
633 virtual size_t ReadSectionData(Section *section,
634 DataExtractor §ion_data);
636 bool IsInMemory() const { return m_memory_addr != LLDB_INVALID_ADDRESS; }
638 // Strip linker annotations (such as @@VERSION) from symbol names.
639 virtual llvm::StringRef
640 StripLinkerSymbolAnnotations(llvm::StringRef symbol_name) const {
644 static lldb::SymbolType GetSymbolTypeFromName(
645 llvm::StringRef name,
646 lldb::SymbolType symbol_type_hint = lldb::eSymbolTypeUndefined);
648 /// Loads this objfile to memory.
650 /// Loads the bits needed to create an executable image to the memory. It is
651 /// useful with bare-metal targets where target does not have the ability to
652 /// start a process itself.
654 /// \param[in] target
655 /// Target where to load.
658 virtual std::vector<LoadableData> GetLoadableData(Target &target);
665 lldb::addr_t m_file_offset; ///< The offset in bytes into the file, or the
667 lldb::addr_t m_length; ///< The length of this object file if it is known (can
668 ///be zero if length is unknown or can't be
671 m_data; ///< The data for this object file so things can be parsed lazily.
672 lldb::ProcessWP m_process_wp;
673 const lldb::addr_t m_memory_addr;
674 std::unique_ptr<lldb_private::SectionList> m_sections_up;
675 std::unique_ptr<lldb_private::Symtab> m_symtab_up;
676 uint32_t m_synthetic_symbol_idx;
678 /// Sets the architecture for a module. At present the architecture can
679 /// only be set if it is invalid. It is not allowed to switch from one
680 /// concrete architecture to another.
682 /// \param[in] new_arch
683 /// The architecture this module will be set to.
686 /// Returns \b true if the architecture was changed, \b
688 bool SetModulesArchitecture(const ArchSpec &new_arch);
690 ConstString GetNextSyntheticSymbolName();
692 static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size,
696 DISALLOW_COPY_AND_ASSIGN(ObjectFile);
699 } // namespace lldb_private
702 template <> struct format_provider<lldb_private::ObjectFile::Type> {
703 static void format(const lldb_private::ObjectFile::Type &type,
704 raw_ostream &OS, StringRef Style);
707 template <> struct format_provider<lldb_private::ObjectFile::Strata> {
708 static void format(const lldb_private::ObjectFile::Strata &strata,
709 raw_ostream &OS, StringRef Style);
713 #endif // liblldb_ObjectFile_h_