1 //===-- Disassembler.h ------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef liblldb_Disassembler_h_
10 #define liblldb_Disassembler_h_
12 #include "lldb/Core/Address.h"
13 #include "lldb/Core/EmulateInstruction.h"
14 #include "lldb/Core/FormatEntity.h"
15 #include "lldb/Core/Opcode.h"
16 #include "lldb/Core/PluginInterface.h"
17 #include "lldb/Interpreter/OptionValue.h"
18 #include "lldb/Symbol/LineEntry.h"
19 #include "lldb/Target/ExecutionContext.h"
20 #include "lldb/Utility/ArchSpec.h"
21 #include "lldb/Utility/ConstString.h"
22 #include "lldb/Utility/FileSpec.h"
23 #include "lldb/lldb-defines.h"
24 #include "lldb/lldb-forward.h"
25 #include "lldb/lldb-private-enumerations.h"
26 #include "lldb/lldb-types.h"
28 #include "llvm/ADT/StringRef.h"
42 template <typename T> class SmallVectorImpl;
45 namespace lldb_private {
53 class SymbolContextList;
59 Instruction(const Address &address,
60 AddressClass addr_class = AddressClass::eInvalid);
62 virtual ~Instruction();
64 const Address &GetAddress() const { return m_address; }
66 const char *GetMnemonic(const ExecutionContext *exe_ctx) {
67 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
68 return m_opcode_name.c_str();
71 const char *GetOperands(const ExecutionContext *exe_ctx) {
72 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
73 return m_mnemonics.c_str();
76 const char *GetComment(const ExecutionContext *exe_ctx) {
77 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
78 return m_comment.c_str();
82 CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;
84 AddressClass GetAddressClass();
86 void SetAddress(const Address &addr) {
87 // Invalidate the address class to lazily discover it if we need to.
88 m_address_class = AddressClass::eInvalid;
92 /// Dump the text representation of this Instruction to a Stream
94 /// Print the (optional) address, (optional) bytes, opcode,
95 /// operands, and instruction comments to a stream.
98 /// The Stream to add the text to.
100 /// \param[in] show_address
101 /// Whether the address (using disassembly_addr_format_spec formatting)
102 /// should be printed.
104 /// \param[in] show_bytes
105 /// Whether the bytes of the assembly instruction should be printed.
107 /// \param[in] max_opcode_byte_size
108 /// The size (in bytes) of the largest instruction in the list that
109 /// we are printing (for text justification/alignment purposes)
110 /// Only needed if show_bytes is true.
112 /// \param[in] exe_ctx
113 /// The current execution context, if available. May be used in
114 /// the assembling of the operands+comments for this instruction.
115 /// Pass NULL if not applicable.
117 /// \param[in] sym_ctx
118 /// The SymbolContext for this instruction.
119 /// Pass NULL if not available/computed.
120 /// Only needed if show_address is true.
122 /// \param[in] prev_sym_ctx
123 /// The SymbolContext for the previous instruction. Depending on
124 /// the disassembly address format specification, a change in
125 /// Symbol / Function may mean that a line is printed with the new
126 /// symbol/function name.
127 /// Pass NULL if unavailable, or if this is the first instruction of
128 /// the InstructionList.
129 /// Only needed if show_address is true.
131 /// \param[in] disassembly_addr_format
132 /// The format specification for how addresses are printed.
133 /// Only needed if show_address is true.
135 /// \param[in] max_address_text_size
136 /// The length of the longest address string at the start of the
137 /// disassembly line that will be printed (the
138 /// Debugger::FormatDisassemblerAddress() string)
139 /// so this method can properly align the instruction opcodes.
140 /// May be 0 to indicate no indentation/alignment of the opcodes.
141 virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address,
142 bool show_bytes, const ExecutionContext *exe_ctx,
143 const SymbolContext *sym_ctx,
144 const SymbolContext *prev_sym_ctx,
145 const FormatEntity::Entry *disassembly_addr_format,
146 size_t max_address_text_size);
148 virtual bool DoesBranch() = 0;
150 virtual bool HasDelaySlot();
152 bool CanSetBreakpoint ();
154 virtual size_t Decode(const Disassembler &disassembler,
155 const DataExtractor &data,
156 lldb::offset_t data_offset) = 0;
158 virtual void SetDescription(llvm::StringRef) {
159 } // May be overridden in sub-classes that have descriptions.
161 lldb::OptionValueSP ReadArray(FILE *in_file, Stream *out_stream,
162 OptionValue::Type data_type);
164 lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream *out_stream);
166 bool DumpEmulation(const ArchSpec &arch);
168 virtual bool TestEmulation(Stream *stream, const char *test_file_name);
170 bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton,
171 EmulateInstruction::ReadMemoryCallback read_mem_callback,
172 EmulateInstruction::WriteMemoryCallback write_mem_calback,
173 EmulateInstruction::ReadRegisterCallback read_reg_callback,
174 EmulateInstruction::WriteRegisterCallback write_reg_callback);
176 const Opcode &GetOpcode() const { return m_opcode; }
178 uint32_t GetData(DataExtractor &data);
188 } m_type = Type::Invalid;
189 std::vector<Operand> m_children;
190 lldb::addr_t m_immediate = 0;
191 ConstString m_register;
192 bool m_negative = false;
193 bool m_clobbered = false;
195 bool IsValid() { return m_type != Type::Invalid; }
197 static Operand BuildRegister(ConstString &r);
198 static Operand BuildImmediate(lldb::addr_t imm, bool neg);
199 static Operand BuildImmediate(int64_t imm);
200 static Operand BuildDereference(const Operand &ref);
201 static Operand BuildSum(const Operand &lhs, const Operand &rhs);
202 static Operand BuildProduct(const Operand &lhs, const Operand &rhs);
205 virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) {
209 virtual bool IsCall() { return false; }
212 Address m_address; // The section offset address of this instruction
213 // We include an address class in the Instruction class to
214 // allow the instruction specify the
215 // AddressClass::eCodeAlternateISA (currently used for
216 // thumb), and also to specify data (AddressClass::eData).
217 // The usual value will be AddressClass::eCode, but often
218 // when disassembling memory, you might run into data.
219 // This can help us to disassemble appropriately.
221 AddressClass m_address_class; // Use GetAddressClass () accessor function!
224 Opcode m_opcode; // The opcode for this instruction
225 std::string m_opcode_name;
226 std::string m_mnemonics;
227 std::string m_comment;
228 bool m_calculated_strings;
231 CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) {
232 if (!m_calculated_strings) {
233 m_calculated_strings = true;
234 CalculateMnemonicOperandsAndComment(exe_ctx);
239 namespace OperandMatchers {
240 std::function<bool(const Instruction::Operand &)>
241 MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base,
242 std::function<bool(const Instruction::Operand &)> left,
243 std::function<bool(const Instruction::Operand &)> right);
245 std::function<bool(const Instruction::Operand &)>
246 MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base,
247 std::function<bool(const Instruction::Operand &)> child);
249 std::function<bool(const Instruction::Operand &)>
250 MatchRegOp(const RegisterInfo &info);
252 std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString ®);
254 std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm);
256 std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm);
258 std::function<bool(const Instruction::Operand &)>
259 MatchOpType(Instruction::Operand::Type type);
262 class InstructionList {
267 size_t GetSize() const;
269 uint32_t GetMaxOpcocdeByteSize() const;
271 lldb::InstructionSP GetInstructionAtIndex(size_t idx) const;
273 //------------------------------------------------------------------
274 /// Get the index of the next branch instruction.
276 /// Given a list of instructions, find the next branch instruction
277 /// in the list by returning an index.
280 /// The instruction index of the first instruction to check.
282 /// @param[in] target
283 /// A LLDB target object that is used to resolve addresses.
285 /// @param[in] ignore_calls
286 /// It true, then fine the first branch instruction that isn't
287 /// a function call (a branch that calls and returns to the next
288 /// instruction). If false, find the instruction index of any
289 /// branch in the list.
292 /// The instruction index of the first branch that is at or past
293 /// \a start. Returns UINT32_MAX if no matching branches are
295 //------------------------------------------------------------------
296 uint32_t GetIndexOfNextBranchInstruction(uint32_t start,
298 bool ignore_calls) const;
300 uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
303 uint32_t GetIndexOfInstructionAtAddress(const Address &addr);
307 void Append(lldb::InstructionSP &inst_sp);
309 void Dump(Stream *s, bool show_address, bool show_bytes,
310 const ExecutionContext *exe_ctx);
313 typedef std::vector<lldb::InstructionSP> collection;
314 typedef collection::iterator iterator;
315 typedef collection::const_iterator const_iterator;
317 collection m_instructions;
320 class PseudoInstruction : public Instruction {
324 ~PseudoInstruction() override;
326 bool DoesBranch() override;
328 bool HasDelaySlot() override;
330 void CalculateMnemonicOperandsAndComment(
331 const ExecutionContext *exe_ctx) override {
332 // TODO: fill this in and put opcode name into Instruction::m_opcode_name,
333 // mnemonic into Instruction::m_mnemonics, and any comment into
334 // Instruction::m_comment
337 size_t Decode(const Disassembler &disassembler, const DataExtractor &data,
338 lldb::offset_t data_offset) override;
340 void SetOpcode(size_t opcode_size, void *opcode_data);
342 void SetDescription(llvm::StringRef description) override;
345 std::string m_description;
347 DISALLOW_COPY_AND_ASSIGN(PseudoInstruction);
350 class Disassembler : public std::enable_shared_from_this<Disassembler>,
351 public PluginInterface {
355 eOptionShowBytes = (1u << 0),
356 eOptionRawOuput = (1u << 1),
357 eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains
358 // the current PC (mixed mode only)
359 eOptionMarkPCAddress =
360 (1u << 3) // Mark the disassembly line the contains the PC
363 enum HexImmediateStyle {
368 // FindPlugin should be lax about the flavor string (it is too annoying to
369 // have various internal uses of the disassembler fail because the global
370 // flavor string gets set wrong. Instead, if you get a flavor string you
371 // don't understand, use the default. Folks who care to check can use the
372 // FlavorValidForArchSpec method on the disassembler they got back.
373 static lldb::DisassemblerSP
374 FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name);
376 // This version will use the value in the Target settings if flavor is NULL;
377 static lldb::DisassemblerSP
378 FindPluginForTarget(const lldb::TargetSP target_sp, const ArchSpec &arch,
379 const char *flavor, const char *plugin_name);
381 static lldb::DisassemblerSP
382 DisassembleRange(const ArchSpec &arch, const char *plugin_name,
383 const char *flavor, const ExecutionContext &exe_ctx,
384 const AddressRange &disasm_range, bool prefer_file_cache);
386 static lldb::DisassemblerSP
387 DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
388 const char *flavor, const Address &start, const void *bytes,
389 size_t length, uint32_t max_num_instructions,
390 bool data_from_file);
392 static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
393 const char *plugin_name, const char *flavor,
394 const ExecutionContext &exe_ctx,
395 const AddressRange &range, uint32_t num_instructions,
396 bool mixed_source_and_assembly,
397 uint32_t num_mixed_context_lines, uint32_t options,
400 static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
401 const char *plugin_name, const char *flavor,
402 const ExecutionContext &exe_ctx, const Address &start,
403 uint32_t num_instructions,
404 bool mixed_source_and_assembly,
405 uint32_t num_mixed_context_lines, uint32_t options,
409 Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name,
410 const char *flavor, const ExecutionContext &exe_ctx,
411 SymbolContextList &sc_list, uint32_t num_instructions,
412 bool mixed_source_and_assembly, uint32_t num_mixed_context_lines,
413 uint32_t options, Stream &strm);
416 Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name,
417 const char *flavor, const ExecutionContext &exe_ctx,
418 ConstString name, Module *module,
419 uint32_t num_instructions, bool mixed_source_and_assembly,
420 uint32_t num_mixed_context_lines, uint32_t options, Stream &strm);
423 Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name,
424 const char *flavor, const ExecutionContext &exe_ctx,
425 uint32_t num_instructions, bool mixed_source_and_assembly,
426 uint32_t num_mixed_context_lines, uint32_t options, Stream &strm);
428 // Constructors and Destructors
429 Disassembler(const ArchSpec &arch, const char *flavor);
430 ~Disassembler() override;
432 typedef const char *(*SummaryCallback)(const Instruction &inst,
433 ExecutionContext *exe_context,
436 static bool PrintInstructions(Disassembler *disasm_ptr, Debugger &debugger,
437 const ArchSpec &arch,
438 const ExecutionContext &exe_ctx,
439 uint32_t num_instructions,
440 bool mixed_source_and_assembly,
441 uint32_t num_mixed_context_lines,
442 uint32_t options, Stream &strm);
444 size_t ParseInstructions(const ExecutionContext *exe_ctx,
445 const AddressRange &range, Stream *error_strm_ptr,
446 bool prefer_file_cache);
448 size_t ParseInstructions(const ExecutionContext *exe_ctx,
449 const Address &range, uint32_t num_instructions,
450 bool prefer_file_cache);
452 virtual size_t DecodeInstructions(const Address &base_addr,
453 const DataExtractor &data,
454 lldb::offset_t data_offset,
455 size_t num_instructions, bool append,
456 bool data_from_file) = 0;
458 InstructionList &GetInstructionList();
460 const InstructionList &GetInstructionList() const;
462 const ArchSpec &GetArchitecture() const { return m_arch; }
464 const char *GetFlavor() const { return m_flavor.c_str(); }
466 virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch,
467 const char *flavor) = 0;
470 // SourceLine and SourceLinesToDisplay structures are only used in the mixed
471 // source and assembly display methods internal to this class.
478 SourceLine() : file(), line(LLDB_INVALID_LINE_NUMBER), column(0) {}
480 bool operator==(const SourceLine &rhs) const {
481 return file == rhs.file && line == rhs.line && rhs.column == column;
484 bool operator!=(const SourceLine &rhs) const {
485 return file != rhs.file || line != rhs.line || column != rhs.column;
488 bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; }
491 struct SourceLinesToDisplay {
492 std::vector<SourceLine> lines;
494 // index of the "current" source line, if we want to highlight that when
495 // displaying the source lines. (as opposed to the surrounding source
496 // lines provided to give context)
497 size_t current_source_line;
499 // Whether to print a blank line at the end of the source lines.
500 bool print_source_context_end_eol;
502 SourceLinesToDisplay()
503 : lines(), current_source_line(-1), print_source_context_end_eol(true) {
507 // Get the function's declaration line number, hopefully a line number
508 // earlier than the opening curly brace at the start of the function body.
509 static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc);
511 // Add the provided SourceLine to the map of filenames-to-source-lines-seen.
512 static void AddLineToSourceLineTables(
514 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen);
516 // Given a source line, determine if we should print it when we're doing
517 // mixed source & assembly output. We're currently using the
518 // target.process.thread.step-avoid-regexp setting (which is used for
519 // stepping over inlined STL functions by default) to determine what source
520 // lines to avoid showing.
522 // Returns true if this source line should be elided (if the source line
523 // should not be displayed).
525 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
526 const SymbolContext &sc, SourceLine &line);
529 ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
530 const SymbolContext &sc, LineEntry &line) {
534 sl.column = line.column;
535 return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl);
538 // Classes that inherit from Disassembler can see and modify these
540 InstructionList m_instruction_list;
541 lldb::addr_t m_base_addr;
542 std::string m_flavor;
545 // For Disassembler only
546 DISALLOW_COPY_AND_ASSIGN(Disassembler);
549 } // namespace lldb_private
551 #endif // liblldb_Disassembler_h_