2 * Copyright (c) 2016 Cavium
5 * This software was developed by Semihalf.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <machine/disassem.h>
35 #include <machine/armreg.h>
38 #define ARM64_MAX_TOKEN_LEN 8
39 #define ARM64_MAX_TOKEN_CNT 10
41 static const char *w_reg[] = {
42 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
43 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
44 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
45 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP",
48 static const char *x_reg[] = {
49 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
50 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
51 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
52 "x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP",
55 static const char *shift_2[] = {
56 "LSL", "LSR", "ASR", "RSV"
60 * Structure representing single token (operand) inside instruction.
61 * name - name of operand
62 * pos - position within the instruction (in bits)
63 * len - operand length (in bits)
65 struct arm64_insn_token {
66 char name[ARM64_MAX_TOKEN_LEN];
72 * Define generic types for instruction printing.
74 enum arm64_format_type {
75 TYPE_01, /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
76 OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
80 * Structure representing single parsed instruction format.
82 * format - opcode format in a human-readable way
83 * type - syntax type for printing
84 * special_ops - special options passed to a printer (if any)
85 * mask - bitmask for instruction matching
86 * pattern - pattern to look for
87 * tokens - array of tokens (operands) inside instruction
92 enum arm64_format_type type;
96 struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT];
100 * Specify instruction opcode format in a human-readable way. Use notation
101 * obtained from ARM Architecture Reference Manual for ARMv8-A.
103 * Format string description:
104 * Each group must be separated by "|". Group made of 0/1 is used to
105 * generate mask and pattern for instruction matching. Groups containing
106 * an operand token (in format NAME(length_bits)) are used to retrieve any
107 * operand data from the instruction. Names here must be meaningful
108 * and match the one described in the Manual.
111 * SF - "0" represents 32-bit access, "1" represents 64-bit access
112 * SHIFT - type of shift (instruction dependent)
113 * IMM - immediate value
114 * Rx - register number
116 static struct arm64_insn arm64_i[] = {
117 { "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", TYPE_01, 0 },
118 { "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)", TYPE_01, 0 },
119 { "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)", TYPE_01, 0 },
124 arm64_disasm_generate_masks(struct arm64_insn *tab)
133 while (tab->name != NULL) {
136 format = tab->format;
141 * For each entry analyze format strings from the
142 * left (i.e. from the MSB).
144 a = (INSN_SIZE * NBBY) - 1;
145 while (*format != '\0' && (a >= 0)) {
148 /* Bit is 0, add to mask and pattern */
154 /* Bit is 1, add to mask and pattern */
165 /* Token found, copy the name */
166 memset(tab->tokens[token].name, 0,
167 sizeof(tab->tokens[token].name));
169 while (*format != '(') {
170 tab->tokens[token].name[i] = *format;
173 if (i >= ARM64_MAX_TOKEN_LEN) {
174 printf("ERROR: token too long in op %s\n",
183 /* Read the length value */
184 ret = sscanf(format, "(%d)", &len);
186 if (token >= ARM64_MAX_TOKEN_CNT) {
187 printf("ERROR: to many tokens in op %s\n",
194 tab->tokens[token].pos = a + 1;
195 tab->tokens[token].len = len;
199 /* Skip to the end of the token */
200 while (*format != 0 && *format != '|')
205 /* Write mask and pattern to the instruction array */
210 * If we got here, format string must be parsed and "a"
211 * should point to -1. If it's not, wrong number of bits
212 * in format string. Mark this as invalid and prevent
213 * from being matched.
215 if (*format != 0 || (a != -1) || (error != 0)) {
217 tab->pattern = 0xffffffff;
218 printf("ERROR: skipping instruction op %s\n",
227 arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode,
228 const char *token, int *val)
232 for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
233 if (strcmp(insn->tokens[i].name, token) == 0) {
234 *val = (opcode >> insn->tokens[i].pos &
235 ((1 << insn->tokens[i].len) - 1));
244 arm64_reg(int b64, int num)
254 disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt)
256 struct arm64_insn *i_ptr = arm64_i;
260 int shift, rm, rd, rn, imm, sf;
263 /* Initialize defaults, all are 0 except SF indicating 64bit access */
264 shift = rd = rm = rn = imm = 0;
268 insn = di->di_readword(loc);
269 while (i_ptr->name) {
270 /* If mask is 0 then the parser was not initialized yet */
271 if ((i_ptr->mask != 0) &&
272 ((insn & i_ptr->mask) == i_ptr->pattern)) {
281 switch (i_ptr->type) {
283 /* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
284 OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
286 /* Mandatory tokens */
287 ret = arm64_disasm_read_token(i_ptr, insn, "SF", &sf);
288 ret |= arm64_disasm_read_token(i_ptr, insn, "RD", &rd);
289 ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
291 printf("ERROR: Missing mandatory token for op %s type %d\n",
292 i_ptr->name, i_ptr->type);
296 /* Optional tokens */
297 arm64_disasm_read_token(i_ptr, insn, "IMM", &imm);
298 arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift);
299 rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
301 di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd),
304 /* If RM is present use it, otherwise use immediate notation */
305 if (rm_absent == 0) {
306 di->di_printf(", %s", arm64_reg(sf, rm));
308 di->di_printf(", %s #%d", shift_2[shift], imm);
310 if (imm != 0 || shift != 0)
311 di->di_printf(", #0x%x", imm);
313 di->di_printf(" LSL #12");
321 return(loc + INSN_SIZE);
324 di->di_printf("undefined\t%08x\n", insn);
325 return(loc + INSN_SIZE);
328 /* Parse format strings at the very beginning */
329 SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES,
330 SI_ORDER_FIRST, arm64_disasm_generate_masks, arm64_i);