1 //===-- Disassembler.cpp ----------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "lldb/Core/Disassembler.h"
11 #include "lldb/Core/AddressRange.h"
12 #include "lldb/Core/Debugger.h"
13 #include "lldb/Core/EmulateInstruction.h"
14 #include "lldb/Core/Mangled.h"
15 #include "lldb/Core/Module.h"
16 #include "lldb/Core/ModuleList.h"
17 #include "lldb/Core/PluginManager.h"
18 #include "lldb/Core/SourceManager.h"
19 #include "lldb/Host/FileSystem.h"
20 #include "lldb/Interpreter/OptionValue.h"
21 #include "lldb/Interpreter/OptionValueArray.h"
22 #include "lldb/Interpreter/OptionValueDictionary.h"
23 #include "lldb/Interpreter/OptionValueRegex.h"
24 #include "lldb/Interpreter/OptionValueString.h"
25 #include "lldb/Interpreter/OptionValueUInt64.h"
26 #include "lldb/Symbol/Function.h"
27 #include "lldb/Symbol/Symbol.h"
28 #include "lldb/Symbol/SymbolContext.h"
29 #include "lldb/Target/ExecutionContext.h"
30 #include "lldb/Target/SectionLoadList.h"
31 #include "lldb/Target/StackFrame.h"
32 #include "lldb/Target/Target.h"
33 #include "lldb/Target/Thread.h"
34 #include "lldb/Utility/DataBufferHeap.h"
35 #include "lldb/Utility/DataExtractor.h"
36 #include "lldb/Utility/RegularExpression.h"
37 #include "lldb/Utility/Status.h"
38 #include "lldb/Utility/Stream.h"
39 #include "lldb/Utility/StreamString.h"
40 #include "lldb/Utility/Timer.h"
41 #include "lldb/lldb-private-enumerations.h"
42 #include "lldb/lldb-private-interfaces.h"
43 #include "lldb/lldb-private-types.h"
44 #include "llvm/ADT/Triple.h"
45 #include "llvm/Support/Compiler.h"
53 #define DEFAULT_DISASM_BYTE_SIZE 32
56 using namespace lldb_private;
58 DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch,
60 const char *plugin_name) {
61 static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
62 Timer scoped_timer(func_cat,
63 "Disassembler::FindPlugin (arch = %s, plugin_name = %s)",
64 arch.GetArchitectureName(), plugin_name);
66 DisassemblerCreateInstance create_callback = nullptr;
69 ConstString const_plugin_name(plugin_name);
70 create_callback = PluginManager::GetDisassemblerCreateCallbackForPluginName(
72 if (create_callback) {
73 DisassemblerSP disassembler_sp(create_callback(arch, flavor));
76 return disassembler_sp;
79 for (uint32_t idx = 0;
80 (create_callback = PluginManager::GetDisassemblerCreateCallbackAtIndex(
83 DisassemblerSP disassembler_sp(create_callback(arch, flavor));
86 return disassembler_sp;
89 return DisassemblerSP();
92 DisassemblerSP Disassembler::FindPluginForTarget(const TargetSP target_sp,
95 const char *plugin_name) {
96 if (target_sp && flavor == nullptr) {
97 // FIXME - we don't have the mechanism in place to do per-architecture
98 // settings. But since we know that for now we only support flavors on x86
100 if (arch.GetTriple().getArch() == llvm::Triple::x86 ||
101 arch.GetTriple().getArch() == llvm::Triple::x86_64)
102 flavor = target_sp->GetDisassemblyFlavor();
104 return FindPlugin(arch, flavor, plugin_name);
107 static void ResolveAddress(const ExecutionContext &exe_ctx, const Address &addr,
108 Address &resolved_addr) {
109 if (!addr.IsSectionOffset()) {
110 // If we weren't passed in a section offset address range, try and resolve
112 Target *target = exe_ctx.GetTargetPtr();
115 target->GetSectionLoadList().IsEmpty() ?
116 target->GetImages().ResolveFileAddress(addr.GetOffset(),
118 target->GetSectionLoadList().ResolveLoadAddress(addr.GetOffset(),
121 // We weren't able to resolve the address, just treat it as a raw address
122 if (is_resolved && resolved_addr.IsValid())
126 resolved_addr = addr;
129 size_t Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
130 const char *plugin_name, const char *flavor,
131 const ExecutionContext &exe_ctx,
132 SymbolContextList &sc_list,
133 uint32_t num_instructions,
134 bool mixed_source_and_assembly,
135 uint32_t num_mixed_context_lines,
136 uint32_t options, Stream &strm) {
137 size_t success_count = 0;
138 const size_t count = sc_list.GetSize();
141 const uint32_t scope =
142 eSymbolContextBlock | eSymbolContextFunction | eSymbolContextSymbol;
143 const bool use_inline_block_range = true;
144 for (size_t i = 0; i < count; ++i) {
145 if (!sc_list.GetContextAtIndex(i, sc))
147 for (uint32_t range_idx = 0;
148 sc.GetAddressRange(scope, range_idx, use_inline_block_range, range);
150 if (Disassemble(debugger, arch, plugin_name, flavor, exe_ctx, range,
151 num_instructions, mixed_source_and_assembly,
152 num_mixed_context_lines, options, strm)) {
158 return success_count;
161 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
162 const char *plugin_name, const char *flavor,
163 const ExecutionContext &exe_ctx,
164 ConstString name, Module *module,
165 uint32_t num_instructions,
166 bool mixed_source_and_assembly,
167 uint32_t num_mixed_context_lines,
168 uint32_t options, Stream &strm) {
169 SymbolContextList sc_list;
171 const bool include_symbols = true;
172 const bool include_inlines = true;
174 module->FindFunctions(name, nullptr, eFunctionNameTypeAuto,
175 include_symbols, include_inlines, true, sc_list);
176 } else if (exe_ctx.GetTargetPtr()) {
177 exe_ctx.GetTargetPtr()->GetImages().FindFunctions(
178 name, eFunctionNameTypeAuto, include_symbols, include_inlines, false,
183 if (sc_list.GetSize()) {
184 return Disassemble(debugger, arch, plugin_name, flavor, exe_ctx, sc_list,
185 num_instructions, mixed_source_and_assembly,
186 num_mixed_context_lines, options, strm);
191 lldb::DisassemblerSP Disassembler::DisassembleRange(
192 const ArchSpec &arch, const char *plugin_name, const char *flavor,
193 const ExecutionContext &exe_ctx, const AddressRange &range,
194 bool prefer_file_cache) {
195 lldb::DisassemblerSP disasm_sp;
196 if (range.GetByteSize() > 0 && range.GetBaseAddress().IsValid()) {
197 disasm_sp = Disassembler::FindPluginForTarget(exe_ctx.GetTargetSP(), arch,
198 flavor, plugin_name);
201 size_t bytes_disassembled = disasm_sp->ParseInstructions(
202 &exe_ctx, range, nullptr, prefer_file_cache);
203 if (bytes_disassembled == 0)
211 Disassembler::DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
212 const char *flavor, const Address &start,
213 const void *src, size_t src_len,
214 uint32_t num_instructions, bool data_from_file) {
215 lldb::DisassemblerSP disasm_sp;
218 disasm_sp = Disassembler::FindPlugin(arch, flavor, plugin_name);
221 DataExtractor data(src, src_len, arch.GetByteOrder(),
222 arch.GetAddressByteSize());
224 (void)disasm_sp->DecodeInstructions(start, data, 0, num_instructions,
225 false, data_from_file);
232 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
233 const char *plugin_name, const char *flavor,
234 const ExecutionContext &exe_ctx,
235 const AddressRange &disasm_range,
236 uint32_t num_instructions,
237 bool mixed_source_and_assembly,
238 uint32_t num_mixed_context_lines,
239 uint32_t options, Stream &strm) {
240 if (disasm_range.GetByteSize()) {
241 lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget(
242 exe_ctx.GetTargetSP(), arch, flavor, plugin_name));
246 ResolveAddress(exe_ctx, disasm_range.GetBaseAddress(),
247 range.GetBaseAddress());
248 range.SetByteSize(disasm_range.GetByteSize());
249 const bool prefer_file_cache = false;
250 size_t bytes_disassembled = disasm_sp->ParseInstructions(
251 &exe_ctx, range, &strm, prefer_file_cache);
252 if (bytes_disassembled == 0)
255 return PrintInstructions(disasm_sp.get(), debugger, arch, exe_ctx,
256 num_instructions, mixed_source_and_assembly,
257 num_mixed_context_lines, options, strm);
263 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
264 const char *plugin_name, const char *flavor,
265 const ExecutionContext &exe_ctx,
266 const Address &start_address,
267 uint32_t num_instructions,
268 bool mixed_source_and_assembly,
269 uint32_t num_mixed_context_lines,
270 uint32_t options, Stream &strm) {
271 if (num_instructions > 0) {
272 lldb::DisassemblerSP disasm_sp(Disassembler::FindPluginForTarget(
273 exe_ctx.GetTargetSP(), arch, flavor, plugin_name));
276 ResolveAddress(exe_ctx, start_address, addr);
277 const bool prefer_file_cache = false;
278 size_t bytes_disassembled = disasm_sp->ParseInstructions(
279 &exe_ctx, addr, num_instructions, prefer_file_cache);
280 if (bytes_disassembled == 0)
282 return PrintInstructions(disasm_sp.get(), debugger, arch, exe_ctx,
283 num_instructions, mixed_source_and_assembly,
284 num_mixed_context_lines, options, strm);
290 Disassembler::SourceLine
291 Disassembler::GetFunctionDeclLineEntry(const SymbolContext &sc) {
292 SourceLine decl_line;
293 if (sc.function && sc.line_entry.IsValid()) {
294 LineEntry prologue_end_line = sc.line_entry;
295 FileSpec func_decl_file;
296 uint32_t func_decl_line;
297 sc.function->GetStartLineSourceInfo(func_decl_file, func_decl_line);
298 if (func_decl_file == prologue_end_line.file ||
299 func_decl_file == prologue_end_line.original_file) {
300 decl_line.file = func_decl_file;
301 decl_line.line = func_decl_line;
302 // TODO do we care about column on these entries? If so, we need to
303 // plumb that through GetStartLineSourceInfo.
304 decl_line.column = 0;
310 void Disassembler::AddLineToSourceLineTables(
312 std::map<FileSpec, std::set<uint32_t>> &source_lines_seen) {
313 if (line.IsValid()) {
314 auto source_lines_seen_pos = source_lines_seen.find(line.file);
315 if (source_lines_seen_pos == source_lines_seen.end()) {
316 std::set<uint32_t> lines;
317 lines.insert(line.line);
318 source_lines_seen.emplace(line.file, lines);
320 source_lines_seen_pos->second.insert(line.line);
325 bool Disassembler::ElideMixedSourceAndDisassemblyLine(
326 const ExecutionContext &exe_ctx, const SymbolContext &sc,
329 // TODO: should we also check target.process.thread.step-avoid-libraries ?
331 const RegularExpression *avoid_regex = nullptr;
333 // Skip any line #0 entries - they are implementation details
337 ThreadSP thread_sp = exe_ctx.GetThreadSP();
339 avoid_regex = thread_sp->GetSymbolsToAvoidRegexp();
341 TargetSP target_sp = exe_ctx.GetTargetSP();
344 OptionValueSP value_sp = target_sp->GetDebugger().GetPropertyValue(
345 &exe_ctx, "target.process.thread.step-avoid-regexp", false, error);
346 if (value_sp && value_sp->GetType() == OptionValue::eTypeRegex) {
347 OptionValueRegex *re = value_sp->GetAsRegex();
349 avoid_regex = re->GetCurrentValue();
354 if (avoid_regex && sc.symbol != nullptr) {
355 const char *function_name =
356 sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments)
359 RegularExpression::Match regex_match(1);
360 if (avoid_regex->Execute(function_name, ®ex_match)) {
361 // skip this source line
366 // don't skip this source line
370 bool Disassembler::PrintInstructions(Disassembler *disasm_ptr,
371 Debugger &debugger, const ArchSpec &arch,
372 const ExecutionContext &exe_ctx,
373 uint32_t num_instructions,
374 bool mixed_source_and_assembly,
375 uint32_t num_mixed_context_lines,
376 uint32_t options, Stream &strm) {
377 // We got some things disassembled...
378 size_t num_instructions_found = disasm_ptr->GetInstructionList().GetSize();
380 if (num_instructions > 0 && num_instructions < num_instructions_found)
381 num_instructions_found = num_instructions;
383 const uint32_t max_opcode_byte_size =
384 disasm_ptr->GetInstructionList().GetMaxOpcocdeByteSize();
386 SymbolContext prev_sc;
387 AddressRange current_source_line_range;
388 const Address *pc_addr_ptr = nullptr;
389 StackFrame *frame = exe_ctx.GetFramePtr();
391 TargetSP target_sp(exe_ctx.GetTargetSP());
392 SourceManager &source_manager =
393 target_sp ? target_sp->GetSourceManager() : debugger.GetSourceManager();
396 pc_addr_ptr = &frame->GetFrameCodeAddress();
398 const uint32_t scope =
399 eSymbolContextLineEntry | eSymbolContextFunction | eSymbolContextSymbol;
400 const bool use_inline_block_range = false;
402 const FormatEntity::Entry *disassembly_format = nullptr;
403 FormatEntity::Entry format;
404 if (exe_ctx.HasTargetScope()) {
406 exe_ctx.GetTargetRef().GetDebugger().GetDisassemblyFormat();
408 FormatEntity::Parse("${addr}: ", format);
409 disassembly_format = &format;
412 // First pass: step through the list of instructions, find how long the
413 // initial addresses strings are, insert padding in the second pass so the
414 // opcodes all line up nicely.
416 // Also build up the source line mapping if this is mixed source & assembly
417 // mode. Calculate the source line for each assembly instruction (eliding
418 // inlined functions which the user wants to skip).
420 std::map<FileSpec, std::set<uint32_t>> source_lines_seen;
421 Symbol *previous_symbol = nullptr;
423 size_t address_text_size = 0;
424 for (size_t i = 0; i < num_instructions_found; ++i) {
426 disasm_ptr->GetInstructionList().GetInstructionAtIndex(i).get();
428 const Address &addr = inst->GetAddress();
429 ModuleSP module_sp(addr.GetModule());
431 const SymbolContextItem resolve_mask = eSymbolContextFunction |
432 eSymbolContextSymbol |
433 eSymbolContextLineEntry;
434 uint32_t resolved_mask =
435 module_sp->ResolveSymbolContextForAddress(addr, resolve_mask, sc);
437 StreamString strmstr;
438 Debugger::FormatDisassemblerAddress(disassembly_format, &sc, nullptr,
439 &exe_ctx, &addr, strmstr);
440 size_t cur_line = strmstr.GetSizeOfLastLine();
441 if (cur_line > address_text_size)
442 address_text_size = cur_line;
444 // Add entries to our "source_lines_seen" map+set which list which
445 // sources lines occur in this disassembly session. We will print
446 // lines of context around a source line, but we don't want to print
447 // a source line that has a line table entry of its own - we'll leave
448 // that source line to be printed when it actually occurs in the
451 if (mixed_source_and_assembly && sc.line_entry.IsValid()) {
452 if (sc.symbol != previous_symbol) {
453 SourceLine decl_line = GetFunctionDeclLineEntry(sc);
454 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, decl_line))
455 AddLineToSourceLineTables(decl_line, source_lines_seen);
457 if (sc.line_entry.IsValid()) {
458 SourceLine this_line;
459 this_line.file = sc.line_entry.file;
460 this_line.line = sc.line_entry.line;
461 this_line.column = sc.line_entry.column;
462 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, this_line))
463 AddLineToSourceLineTables(this_line, source_lines_seen);
472 previous_symbol = nullptr;
473 SourceLine previous_line;
474 for (size_t i = 0; i < num_instructions_found; ++i) {
476 disasm_ptr->GetInstructionList().GetInstructionAtIndex(i).get();
479 const Address &addr = inst->GetAddress();
480 const bool inst_is_at_pc = pc_addr_ptr && addr == *pc_addr_ptr;
481 SourceLinesToDisplay source_lines_to_display;
485 ModuleSP module_sp(addr.GetModule());
487 uint32_t resolved_mask = module_sp->ResolveSymbolContextForAddress(
488 addr, eSymbolContextEverything, sc);
490 if (mixed_source_and_assembly) {
492 // If we've started a new function (non-inlined), print all of the
493 // source lines from the function declaration until the first line
494 // table entry - typically the opening curly brace of the function.
495 if (previous_symbol != sc.symbol) {
496 // The default disassembly format puts an extra blank line
497 // between functions - so when we're displaying the source
498 // context for a function, we don't want to add a blank line
499 // after the source context or we'll end up with two of them.
500 if (previous_symbol != nullptr)
501 source_lines_to_display.print_source_context_end_eol = false;
503 previous_symbol = sc.symbol;
504 if (sc.function && sc.line_entry.IsValid()) {
505 LineEntry prologue_end_line = sc.line_entry;
506 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
507 prologue_end_line)) {
508 FileSpec func_decl_file;
509 uint32_t func_decl_line;
510 sc.function->GetStartLineSourceInfo(func_decl_file,
512 if (func_decl_file == prologue_end_line.file ||
513 func_decl_file == prologue_end_line.original_file) {
514 // Add all the lines between the function declaration and
515 // the first non-prologue source line to the list of lines
517 for (uint32_t lineno = func_decl_line;
518 lineno <= prologue_end_line.line; lineno++) {
519 SourceLine this_line;
520 this_line.file = func_decl_file;
521 this_line.line = lineno;
522 source_lines_to_display.lines.push_back(this_line);
524 // Mark the last line as the "current" one. Usually this
525 // is the open curly brace.
526 if (source_lines_to_display.lines.size() > 0)
527 source_lines_to_display.current_source_line =
528 source_lines_to_display.lines.size() - 1;
532 sc.GetAddressRange(scope, 0, use_inline_block_range,
533 current_source_line_range);
536 // If we've left a previous source line's address range, print a
538 if (!current_source_line_range.ContainsFileAddress(addr)) {
539 sc.GetAddressRange(scope, 0, use_inline_block_range,
540 current_source_line_range);
542 if (sc != prev_sc && sc.comp_unit && sc.line_entry.IsValid()) {
543 SourceLine this_line;
544 this_line.file = sc.line_entry.file;
545 this_line.line = sc.line_entry.line;
547 if (!ElideMixedSourceAndDisassemblyLine(exe_ctx, sc,
549 // Only print this source line if it is different from the
550 // last source line we printed. There may have been inlined
551 // functions between these lines that we elided, resulting in
552 // the same line being printed twice in a row for a
553 // contiguous block of assembly instructions.
554 if (this_line != previous_line) {
556 std::vector<uint32_t> previous_lines;
558 i < num_mixed_context_lines &&
559 (this_line.line - num_mixed_context_lines) > 0;
562 this_line.line - num_mixed_context_lines + i;
563 auto pos = source_lines_seen.find(this_line.file);
564 if (pos != source_lines_seen.end()) {
565 if (pos->second.count(line) == 1) {
566 previous_lines.clear();
568 previous_lines.push_back(line);
572 for (size_t i = 0; i < previous_lines.size(); i++) {
573 SourceLine previous_line;
574 previous_line.file = this_line.file;
575 previous_line.line = previous_lines[i];
576 auto pos = source_lines_seen.find(previous_line.file);
577 if (pos != source_lines_seen.end()) {
578 pos->second.insert(previous_line.line);
580 source_lines_to_display.lines.push_back(previous_line);
583 source_lines_to_display.lines.push_back(this_line);
584 source_lines_to_display.current_source_line =
585 source_lines_to_display.lines.size() - 1;
587 for (uint32_t i = 0; i < num_mixed_context_lines; i++) {
588 SourceLine next_line;
589 next_line.file = this_line.file;
590 next_line.line = this_line.line + i + 1;
591 auto pos = source_lines_seen.find(next_line.file);
592 if (pos != source_lines_seen.end()) {
593 if (pos->second.count(next_line.line) == 1)
595 pos->second.insert(next_line.line);
597 source_lines_to_display.lines.push_back(next_line);
600 previous_line = this_line;
610 if (source_lines_to_display.lines.size() > 0) {
612 for (size_t idx = 0; idx < source_lines_to_display.lines.size();
614 SourceLine ln = source_lines_to_display.lines[idx];
615 const char *line_highlight = "";
616 if (inst_is_at_pc && (options & eOptionMarkPCSourceLine)) {
617 line_highlight = "->";
618 } else if (idx == source_lines_to_display.current_source_line) {
619 line_highlight = "**";
621 source_manager.DisplaySourceLinesWithLineNumbers(
622 ln.file, ln.line, ln.column, 0, 0, line_highlight, &strm);
624 if (source_lines_to_display.print_source_context_end_eol)
628 const bool show_bytes = (options & eOptionShowBytes) != 0;
629 inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, &exe_ctx, &sc,
630 &prev_sc, nullptr, address_text_size);
640 bool Disassembler::Disassemble(Debugger &debugger, const ArchSpec &arch,
641 const char *plugin_name, const char *flavor,
642 const ExecutionContext &exe_ctx,
643 uint32_t num_instructions,
644 bool mixed_source_and_assembly,
645 uint32_t num_mixed_context_lines,
646 uint32_t options, Stream &strm) {
648 StackFrame *frame = exe_ctx.GetFramePtr();
651 frame->GetSymbolContext(eSymbolContextFunction | eSymbolContextSymbol));
653 range = sc.function->GetAddressRange();
654 } else if (sc.symbol && sc.symbol->ValueIsAddress()) {
655 range.GetBaseAddress() = sc.symbol->GetAddressRef();
656 range.SetByteSize(sc.symbol->GetByteSize());
658 range.GetBaseAddress() = frame->GetFrameCodeAddress();
661 if (range.GetBaseAddress().IsValid() && range.GetByteSize() == 0)
662 range.SetByteSize(DEFAULT_DISASM_BYTE_SIZE);
665 return Disassemble(debugger, arch, plugin_name, flavor, exe_ctx, range,
666 num_instructions, mixed_source_and_assembly,
667 num_mixed_context_lines, options, strm);
670 Instruction::Instruction(const Address &address, AddressClass addr_class)
671 : m_address(address), m_address_class(addr_class), m_opcode(),
672 m_calculated_strings(false) {}
674 Instruction::~Instruction() = default;
676 AddressClass Instruction::GetAddressClass() {
677 if (m_address_class == AddressClass::eInvalid)
678 m_address_class = m_address.GetAddressClass();
679 return m_address_class;
682 void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
683 bool show_address, bool show_bytes,
684 const ExecutionContext *exe_ctx,
685 const SymbolContext *sym_ctx,
686 const SymbolContext *prev_sym_ctx,
687 const FormatEntity::Entry *disassembly_addr_format,
688 size_t max_address_text_size) {
689 size_t opcode_column_width = 7;
690 const size_t operand_column_width = 25;
692 CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
697 Debugger::FormatDisassemblerAddress(disassembly_addr_format, sym_ctx,
698 prev_sym_ctx, exe_ctx, &m_address, ss);
699 ss.FillLastLineToColumn(max_address_text_size, ' ');
703 if (m_opcode.GetType() == Opcode::eTypeBytes) {
704 // x86_64 and i386 are the only ones that use bytes right now so pad out
705 // the byte dump to be able to always show 15 bytes (3 chars each) plus a
707 if (max_opcode_byte_size > 0)
708 m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
710 m_opcode.Dump(&ss, 15 * 3 + 1);
712 // Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000
713 // (10 spaces) plus two for padding...
714 if (max_opcode_byte_size > 0)
715 m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
717 m_opcode.Dump(&ss, 12);
721 const size_t opcode_pos = ss.GetSizeOfLastLine();
723 // The default opcode size of 7 characters is plenty for most architectures
724 // but some like arm can pull out the occasional vqrshrun.s16. We won't get
725 // consistent column spacing in these cases, unfortunately.
726 if (m_opcode_name.length() >= opcode_column_width) {
727 opcode_column_width = m_opcode_name.length() + 1;
730 ss.PutCString(m_opcode_name);
731 ss.FillLastLineToColumn(opcode_pos + opcode_column_width, ' ');
732 ss.PutCString(m_mnemonics);
734 if (!m_comment.empty()) {
735 ss.FillLastLineToColumn(
736 opcode_pos + opcode_column_width + operand_column_width, ' ');
737 ss.PutCString(" ; ");
738 ss.PutCString(m_comment);
740 s->PutCString(ss.GetString());
743 bool Instruction::DumpEmulation(const ArchSpec &arch) {
744 std::unique_ptr<EmulateInstruction> insn_emulator_up(
745 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
746 if (insn_emulator_up) {
747 insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
748 return insn_emulator_up->EvaluateInstruction(0);
754 bool Instruction::CanSetBreakpoint () {
755 return !HasDelaySlot();
758 bool Instruction::HasDelaySlot() {
763 OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream,
764 OptionValue::Type data_type) {
768 auto option_value_sp = std::make_shared<OptionValueArray>(1u << data_type);
772 if (!fgets(buffer, 1023, in_file)) {
774 "Instruction::ReadArray: Error reading file (fgets).\n");
775 option_value_sp.reset();
776 return option_value_sp;
779 std::string line(buffer);
781 size_t len = line.size();
782 if (line[len - 1] == '\n') {
783 line[len - 1] = '\0';
784 line.resize(len - 1);
787 if ((line.size() == 1) && line[0] == ']') {
794 static RegularExpression g_reg_exp(
795 llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$"));
796 RegularExpression::Match regex_match(1);
797 bool reg_exp_success = g_reg_exp.Execute(line, ®ex_match);
799 regex_match.GetMatchAtIndex(line.c_str(), 1, value);
803 OptionValueSP data_value_sp;
805 case OptionValue::eTypeUInt64:
806 data_value_sp = std::make_shared<OptionValueUInt64>(0, 0);
807 data_value_sp->SetValueFromString(value);
809 // Other types can be added later as needed.
811 data_value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
815 option_value_sp->GetAsArray()->InsertValue(idx, data_value_sp);
820 return option_value_sp;
823 OptionValueSP Instruction::ReadDictionary(FILE *in_file, Stream *out_stream) {
827 auto option_value_sp = std::make_shared<OptionValueDictionary>();
828 static ConstString encoding_key("data_encoding");
829 OptionValue::Type data_type = OptionValue::eTypeInvalid;
832 // Read the next line in the file
833 if (!fgets(buffer, 1023, in_file)) {
835 "Instruction::ReadDictionary: Error reading file (fgets).\n");
836 option_value_sp.reset();
837 return option_value_sp;
840 // Check to see if the line contains the end-of-dictionary marker ("}")
841 std::string line(buffer);
843 size_t len = line.size();
844 if (line[len - 1] == '\n') {
845 line[len - 1] = '\0';
846 line.resize(len - 1);
849 if ((line.size() == 1) && (line[0] == '}')) {
854 // Try to find a key-value pair in the current line and add it to the
857 static RegularExpression g_reg_exp(llvm::StringRef(
858 "^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$"));
859 RegularExpression::Match regex_match(2);
861 bool reg_exp_success = g_reg_exp.Execute(line, ®ex_match);
864 if (reg_exp_success) {
865 regex_match.GetMatchAtIndex(line.c_str(), 1, key);
866 regex_match.GetMatchAtIndex(line.c_str(), 2, value);
868 out_stream->Printf("Instruction::ReadDictionary: Failure executing "
869 "regular expression.\n");
870 option_value_sp.reset();
871 return option_value_sp;
874 ConstString const_key(key.c_str());
875 // Check value to see if it's the start of an array or dictionary.
877 lldb::OptionValueSP value_sp;
878 assert(value.empty() == false);
879 assert(key.empty() == false);
881 if (value[0] == '{') {
882 assert(value.size() == 1);
883 // value is a dictionary
884 value_sp = ReadDictionary(in_file, out_stream);
886 option_value_sp.reset();
887 return option_value_sp;
889 } else if (value[0] == '[') {
890 assert(value.size() == 1);
892 value_sp = ReadArray(in_file, out_stream, data_type);
894 option_value_sp.reset();
895 return option_value_sp;
897 // We've used the data_type to read an array; re-set the type to
899 data_type = OptionValue::eTypeInvalid;
900 } else if ((value[0] == '0') && (value[1] == 'x')) {
901 value_sp = std::make_shared<OptionValueUInt64>(0, 0);
902 value_sp->SetValueFromString(value);
904 size_t len = value.size();
905 if ((value[0] == '"') && (value[len - 1] == '"'))
906 value = value.substr(1, len - 2);
907 value_sp = std::make_shared<OptionValueString>(value.c_str(), "");
910 if (const_key == encoding_key) {
911 // A 'data_encoding=..." is NOT a normal key-value pair; it is meta-data
913 // data type of an upcoming array (usually the next bit of data to be
915 if (strcmp(value.c_str(), "uint32_t") == 0)
916 data_type = OptionValue::eTypeUInt64;
918 option_value_sp->GetAsDictionary()->SetValueForKey(const_key, value_sp,
923 return option_value_sp;
926 bool Instruction::TestEmulation(Stream *out_stream, const char *file_name) {
931 out_stream->Printf("Instruction::TestEmulation: Missing file_name.");
934 FILE *test_file = FileSystem::Instance().Fopen(file_name, "r");
937 "Instruction::TestEmulation: Attempt to open test file failed.");
942 if (!fgets(buffer, 255, test_file)) {
944 "Instruction::TestEmulation: Error reading first line of test file.\n");
949 if (strncmp(buffer, "InstructionEmulationState={", 27) != 0) {
950 out_stream->Printf("Instructin::TestEmulation: Test file does not contain "
951 "emulation state dictionary\n");
956 // Read all the test information from the test file into an
957 // OptionValueDictionary.
959 OptionValueSP data_dictionary_sp(ReadDictionary(test_file, out_stream));
960 if (!data_dictionary_sp) {
962 "Instruction::TestEmulation: Error reading Dictionary Object.\n");
969 OptionValueDictionary *data_dictionary =
970 data_dictionary_sp->GetAsDictionary();
971 static ConstString description_key("assembly_string");
972 static ConstString triple_key("triple");
974 OptionValueSP value_sp = data_dictionary->GetValueForKey(description_key);
977 out_stream->Printf("Instruction::TestEmulation: Test file does not "
978 "contain description string.\n");
982 SetDescription(value_sp->GetStringValue());
984 value_sp = data_dictionary->GetValueForKey(triple_key);
987 "Instruction::TestEmulation: Test file does not contain triple.\n");
992 arch.SetTriple(llvm::Triple(value_sp->GetStringValue()));
994 bool success = false;
995 std::unique_ptr<EmulateInstruction> insn_emulator_up(
996 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
997 if (insn_emulator_up)
999 insn_emulator_up->TestEmulation(out_stream, arch, data_dictionary);
1002 out_stream->Printf("Emulation test succeeded.");
1004 out_stream->Printf("Emulation test failed.");
1009 bool Instruction::Emulate(
1010 const ArchSpec &arch, uint32_t evaluate_options, void *baton,
1011 EmulateInstruction::ReadMemoryCallback read_mem_callback,
1012 EmulateInstruction::WriteMemoryCallback write_mem_callback,
1013 EmulateInstruction::ReadRegisterCallback read_reg_callback,
1014 EmulateInstruction::WriteRegisterCallback write_reg_callback) {
1015 std::unique_ptr<EmulateInstruction> insn_emulator_up(
1016 EmulateInstruction::FindPlugin(arch, eInstructionTypeAny, nullptr));
1017 if (insn_emulator_up) {
1018 insn_emulator_up->SetBaton(baton);
1019 insn_emulator_up->SetCallbacks(read_mem_callback, write_mem_callback,
1020 read_reg_callback, write_reg_callback);
1021 insn_emulator_up->SetInstruction(GetOpcode(), GetAddress(), nullptr);
1022 return insn_emulator_up->EvaluateInstruction(evaluate_options);
1028 uint32_t Instruction::GetData(DataExtractor &data) {
1029 return m_opcode.GetData(data);
1032 InstructionList::InstructionList() : m_instructions() {}
1034 InstructionList::~InstructionList() = default;
1036 size_t InstructionList::GetSize() const { return m_instructions.size(); }
1038 uint32_t InstructionList::GetMaxOpcocdeByteSize() const {
1039 uint32_t max_inst_size = 0;
1040 collection::const_iterator pos, end;
1041 for (pos = m_instructions.begin(), end = m_instructions.end(); pos != end;
1043 uint32_t inst_size = (*pos)->GetOpcode().GetByteSize();
1044 if (max_inst_size < inst_size)
1045 max_inst_size = inst_size;
1047 return max_inst_size;
1050 InstructionSP InstructionList::GetInstructionAtIndex(size_t idx) const {
1051 InstructionSP inst_sp;
1052 if (idx < m_instructions.size())
1053 inst_sp = m_instructions[idx];
1057 void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes,
1058 const ExecutionContext *exe_ctx) {
1059 const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize();
1060 collection::const_iterator pos, begin, end;
1062 const FormatEntity::Entry *disassembly_format = nullptr;
1063 FormatEntity::Entry format;
1064 if (exe_ctx && exe_ctx->HasTargetScope()) {
1065 disassembly_format =
1066 exe_ctx->GetTargetRef().GetDebugger().GetDisassemblyFormat();
1068 FormatEntity::Parse("${addr}: ", format);
1069 disassembly_format = &format;
1072 for (begin = m_instructions.begin(), end = m_instructions.end(), pos = begin;
1073 pos != end; ++pos) {
1076 (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, exe_ctx,
1077 nullptr, nullptr, disassembly_format, 0);
1081 void InstructionList::Clear() { m_instructions.clear(); }
1083 void InstructionList::Append(lldb::InstructionSP &inst_sp) {
1085 m_instructions.push_back(inst_sp);
1089 InstructionList::GetIndexOfNextBranchInstruction(uint32_t start,
1091 bool ignore_calls) const {
1092 size_t num_instructions = m_instructions.size();
1094 uint32_t next_branch = UINT32_MAX;
1096 for (i = start; i < num_instructions; i++) {
1097 if (m_instructions[i]->DoesBranch()) {
1098 if (ignore_calls && m_instructions[i]->IsCall())
1105 // Hexagon needs the first instruction of the packet with the branch. Go
1106 // backwards until we find an instruction marked end-of-packet, or until we
1108 if (target.GetArchitecture().GetTriple().getArch() == llvm::Triple::hexagon) {
1109 // If we didn't find a branch, find the last packet start.
1110 if (next_branch == UINT32_MAX) {
1111 i = num_instructions - 1;
1118 uint32_t inst_bytes;
1119 bool prefer_file_cache = false; // Read from process if process is running
1120 lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
1121 target.ReadMemory(m_instructions[i]->GetAddress(), prefer_file_cache,
1122 &inst_bytes, sizeof(inst_bytes), error, &load_addr);
1123 // If we have an error reading memory, return start
1124 if (!error.Success())
1126 // check if this is the last instruction in a packet bits 15:14 will be
1127 // 11b or 00b for a duplex
1128 if (((inst_bytes & 0xC000) == 0xC000) ||
1129 ((inst_bytes & 0xC000) == 0x0000)) {
1130 // instruction after this should be the start of next packet
1131 next_branch = i + 1;
1136 if (next_branch == UINT32_MAX) {
1137 // We couldn't find the previous packet, so return start
1138 next_branch = start;
1145 InstructionList::GetIndexOfInstructionAtAddress(const Address &address) {
1146 size_t num_instructions = m_instructions.size();
1147 uint32_t index = UINT32_MAX;
1148 for (size_t i = 0; i < num_instructions; i++) {
1149 if (m_instructions[i]->GetAddress() == address) {
1158 InstructionList::GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
1161 address.SetLoadAddress(load_addr, &target);
1162 return GetIndexOfInstructionAtAddress(address);
1165 size_t Disassembler::ParseInstructions(const ExecutionContext *exe_ctx,
1166 const AddressRange &range,
1167 Stream *error_strm_ptr,
1168 bool prefer_file_cache) {
1170 Target *target = exe_ctx->GetTargetPtr();
1171 const addr_t byte_size = range.GetByteSize();
1172 if (target == nullptr || byte_size == 0 ||
1173 !range.GetBaseAddress().IsValid())
1176 auto data_sp = std::make_shared<DataBufferHeap>(byte_size, '\0');
1179 lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
1180 const size_t bytes_read = target->ReadMemory(
1181 range.GetBaseAddress(), prefer_file_cache, data_sp->GetBytes(),
1182 data_sp->GetByteSize(), error, &load_addr);
1184 if (bytes_read > 0) {
1185 if (bytes_read != data_sp->GetByteSize())
1186 data_sp->SetByteSize(bytes_read);
1187 DataExtractor data(data_sp, m_arch.GetByteOrder(),
1188 m_arch.GetAddressByteSize());
1189 const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS;
1190 return DecodeInstructions(range.GetBaseAddress(), data, 0, UINT32_MAX,
1191 false, data_from_file);
1192 } else if (error_strm_ptr) {
1193 const char *error_cstr = error.AsCString();
1195 error_strm_ptr->Printf("error: %s\n", error_cstr);
1198 } else if (error_strm_ptr) {
1199 error_strm_ptr->PutCString("error: invalid execution context\n");
1204 size_t Disassembler::ParseInstructions(const ExecutionContext *exe_ctx,
1205 const Address &start,
1206 uint32_t num_instructions,
1207 bool prefer_file_cache) {
1208 m_instruction_list.Clear();
1210 if (exe_ctx == nullptr || num_instructions == 0 || !start.IsValid())
1213 Target *target = exe_ctx->GetTargetPtr();
1214 // Calculate the max buffer size we will need in order to disassemble
1215 const addr_t byte_size = num_instructions * m_arch.GetMaximumOpcodeByteSize();
1217 if (target == nullptr || byte_size == 0)
1220 DataBufferHeap *heap_buffer = new DataBufferHeap(byte_size, '\0');
1221 DataBufferSP data_sp(heap_buffer);
1224 lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
1225 const size_t bytes_read =
1226 target->ReadMemory(start, prefer_file_cache, heap_buffer->GetBytes(),
1227 byte_size, error, &load_addr);
1229 const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS;
1231 if (bytes_read == 0)
1233 DataExtractor data(data_sp, m_arch.GetByteOrder(),
1234 m_arch.GetAddressByteSize());
1236 const bool append_instructions = true;
1237 DecodeInstructions(start, data, 0, num_instructions, append_instructions,
1240 return m_instruction_list.GetSize();
1243 // Disassembler copy constructor
1244 Disassembler::Disassembler(const ArchSpec &arch, const char *flavor)
1245 : m_arch(arch), m_instruction_list(), m_base_addr(LLDB_INVALID_ADDRESS),
1247 if (flavor == nullptr)
1248 m_flavor.assign("default");
1250 m_flavor.assign(flavor);
1252 // If this is an arm variant that can only include thumb (T16, T32)
1253 // instructions, force the arch triple to be "thumbv.." instead of "armv..."
1254 if (arch.IsAlwaysThumbInstructions()) {
1255 std::string thumb_arch_name(arch.GetTriple().getArchName().str());
1256 // Replace "arm" with "thumb" so we get all thumb variants correct
1257 if (thumb_arch_name.size() > 3) {
1258 thumb_arch_name.erase(0, 3);
1259 thumb_arch_name.insert(0, "thumb");
1261 m_arch.SetTriple(thumb_arch_name.c_str());
1265 Disassembler::~Disassembler() = default;
1267 InstructionList &Disassembler::GetInstructionList() {
1268 return m_instruction_list;
1271 const InstructionList &Disassembler::GetInstructionList() const {
1272 return m_instruction_list;
1275 // Class PseudoInstruction
1277 PseudoInstruction::PseudoInstruction()
1278 : Instruction(Address(), AddressClass::eUnknown), m_description() {}
1280 PseudoInstruction::~PseudoInstruction() = default;
1282 bool PseudoInstruction::DoesBranch() {
1283 // This is NOT a valid question for a pseudo instruction.
1287 bool PseudoInstruction::HasDelaySlot() {
1288 // This is NOT a valid question for a pseudo instruction.
1292 size_t PseudoInstruction::Decode(const lldb_private::Disassembler &disassembler,
1293 const lldb_private::DataExtractor &data,
1294 lldb::offset_t data_offset) {
1295 return m_opcode.GetByteSize();
1298 void PseudoInstruction::SetOpcode(size_t opcode_size, void *opcode_data) {
1302 switch (opcode_size) {
1304 uint8_t value8 = *((uint8_t *)opcode_data);
1305 m_opcode.SetOpcode8(value8, eByteOrderInvalid);
1309 uint16_t value16 = *((uint16_t *)opcode_data);
1310 m_opcode.SetOpcode16(value16, eByteOrderInvalid);
1314 uint32_t value32 = *((uint32_t *)opcode_data);
1315 m_opcode.SetOpcode32(value32, eByteOrderInvalid);
1319 uint64_t value64 = *((uint64_t *)opcode_data);
1320 m_opcode.SetOpcode64(value64, eByteOrderInvalid);
1328 void PseudoInstruction::SetDescription(llvm::StringRef description) {
1329 m_description = description;
1332 Instruction::Operand Instruction::Operand::BuildRegister(ConstString &r) {
1334 ret.m_type = Type::Register;
1339 Instruction::Operand Instruction::Operand::BuildImmediate(lldb::addr_t imm,
1342 ret.m_type = Type::Immediate;
1343 ret.m_immediate = imm;
1344 ret.m_negative = neg;
1348 Instruction::Operand Instruction::Operand::BuildImmediate(int64_t imm) {
1350 ret.m_type = Type::Immediate;
1352 ret.m_immediate = -imm;
1353 ret.m_negative = true;
1355 ret.m_immediate = imm;
1356 ret.m_negative = false;
1361 Instruction::Operand
1362 Instruction::Operand::BuildDereference(const Operand &ref) {
1364 ret.m_type = Type::Dereference;
1365 ret.m_children = {ref};
1369 Instruction::Operand Instruction::Operand::BuildSum(const Operand &lhs,
1370 const Operand &rhs) {
1372 ret.m_type = Type::Sum;
1373 ret.m_children = {lhs, rhs};
1377 Instruction::Operand Instruction::Operand::BuildProduct(const Operand &lhs,
1378 const Operand &rhs) {
1380 ret.m_type = Type::Product;
1381 ret.m_children = {lhs, rhs};
1385 std::function<bool(const Instruction::Operand &)>
1386 lldb_private::OperandMatchers::MatchBinaryOp(
1387 std::function<bool(const Instruction::Operand &)> base,
1388 std::function<bool(const Instruction::Operand &)> left,
1389 std::function<bool(const Instruction::Operand &)> right) {
1390 return [base, left, right](const Instruction::Operand &op) -> bool {
1391 return (base(op) && op.m_children.size() == 2 &&
1392 ((left(op.m_children[0]) && right(op.m_children[1])) ||
1393 (left(op.m_children[1]) && right(op.m_children[0]))));
1397 std::function<bool(const Instruction::Operand &)>
1398 lldb_private::OperandMatchers::MatchUnaryOp(
1399 std::function<bool(const Instruction::Operand &)> base,
1400 std::function<bool(const Instruction::Operand &)> child) {
1401 return [base, child](const Instruction::Operand &op) -> bool {
1402 return (base(op) && op.m_children.size() == 1 && child(op.m_children[0]));
1406 std::function<bool(const Instruction::Operand &)>
1407 lldb_private::OperandMatchers::MatchRegOp(const RegisterInfo &info) {
1408 return [&info](const Instruction::Operand &op) {
1409 return (op.m_type == Instruction::Operand::Type::Register &&
1410 (op.m_register == ConstString(info.name) ||
1411 op.m_register == ConstString(info.alt_name)));
1415 std::function<bool(const Instruction::Operand &)>
1416 lldb_private::OperandMatchers::FetchRegOp(ConstString ®) {
1417 return [®](const Instruction::Operand &op) {
1418 if (op.m_type != Instruction::Operand::Type::Register) {
1421 reg = op.m_register;
1426 std::function<bool(const Instruction::Operand &)>
1427 lldb_private::OperandMatchers::MatchImmOp(int64_t imm) {
1428 return [imm](const Instruction::Operand &op) {
1429 return (op.m_type == Instruction::Operand::Type::Immediate &&
1430 ((op.m_negative && op.m_immediate == (uint64_t)-imm) ||
1431 (!op.m_negative && op.m_immediate == (uint64_t)imm)));
1435 std::function<bool(const Instruction::Operand &)>
1436 lldb_private::OperandMatchers::FetchImmOp(int64_t &imm) {
1437 return [&imm](const Instruction::Operand &op) {
1438 if (op.m_type != Instruction::Operand::Type::Immediate) {
1441 if (op.m_negative) {
1442 imm = -((int64_t)op.m_immediate);
1444 imm = ((int64_t)op.m_immediate);
1450 std::function<bool(const Instruction::Operand &)>
1451 lldb_private::OperandMatchers::MatchOpType(Instruction::Operand::Type type) {
1452 return [type](const Instruction::Operand &op) { return op.m_type == type; };