2 * Copyright (c) 2016 Cavium
5 * This software was developed by Semihalf.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <machine/armreg.h>
36 #include <machine/disassem.h>
40 #define ARM64_MAX_TOKEN_LEN 8
41 #define ARM64_MAX_TOKEN_CNT 10
43 #define ARM_INSN_SIZE_OFFSET 30
44 #define ARM_INSN_SIZE_MASK 0x3
46 /* Special options for instruction printing */
47 #define OP_SIGN_EXT (1UL << 0) /* Sign-extend immediate value */
48 #define OP_LITERAL (1UL << 1) /* Use literal (memory offset) */
49 #define OP_MULT_4 (1UL << 2) /* Multiply immediate by 4 */
50 #define OP_SF32 (1UL << 3) /* Force 32-bit access */
51 #define OP_SF_INV (1UL << 6) /* SF is inverted (1 means 32 bit access) */
53 static const char *w_reg[] = {
54 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
55 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
56 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
57 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP",
60 static const char *x_reg[] = {
61 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
62 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
63 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
64 "x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP",
67 static const char *shift_2[] = {
68 "LSL", "LSR", "ASR", "RSV"
72 * Structure representing single token (operand) inside instruction.
73 * name - name of operand
74 * pos - position within the instruction (in bits)
75 * len - operand length (in bits)
77 struct arm64_insn_token {
78 char name[ARM64_MAX_TOKEN_LEN];
84 * Define generic types for instruction printing.
86 enum arm64_format_type {
88 * OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #imm} SF32/64
89 * OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64
94 * OP <RT>, [<RN>, #<imm>]{!} SF32/64
95 * OP <RT>, [<RN>], #<imm>{!} SF32/64
96 * OP <RT>, <RN>, <RM> {, EXTEND AMOUNT }
100 /* OP <RT>, #imm SF32/64 */
105 * Structure representing single parsed instruction format.
107 * format - opcode format in a human-readable way
108 * type - syntax type for printing
109 * special_ops - special options passed to a printer (if any)
110 * mask - bitmask for instruction matching
111 * pattern - pattern to look for
112 * tokens - array of tokens (operands) inside instruction
117 enum arm64_format_type type;
118 uint64_t special_ops;
121 struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT];
125 * Specify instruction opcode format in a human-readable way. Use notation
126 * obtained from ARM Architecture Reference Manual for ARMv8-A.
128 * Format string description:
129 * Each group must be separated by "|". Group made of 0/1 is used to
130 * generate mask and pattern for instruction matching. Groups containing
131 * an operand token (in format NAME(length_bits)) are used to retrieve any
132 * operand data from the instruction. Names here must be meaningful
133 * and match the one described in the Manual.
136 * SF - "0" represents 32-bit access, "1" represents 64-bit access
137 * SHIFT - type of shift (instruction dependent)
138 * IMM - immediate value
139 * Rx - register number
140 * OPTION - command specific options
141 * SCALE - scaling of immediate value
143 static struct arm64_insn arm64_i[] = {
144 { "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
146 { "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)",
148 { "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)",
150 { "ldr", "1|SF(1)|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
151 TYPE_02, OP_SIGN_EXT }, /* ldr immediate post/pre index */
152 { "ldr", "1|SF(1)|11100101|IMM(12)|RN(5)|RT(5)",
153 TYPE_02, 0 }, /* ldr immediate unsigned */
154 { "ldr", "1|SF(1)|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
155 TYPE_02, 0 }, /* ldr register */
156 { "ldr", "0|SF(1)|011000|IMM(19)|RT(5)",
157 TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */
158 { "ldrb", "00|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
159 TYPE_02, OP_SIGN_EXT | OP_SF32 }, /* ldrb immediate post/pre index */
160 { "ldrb", "00|11100101|IMM(12)|RN(5)|RT(5)",
161 TYPE_02, OP_SF32 }, /* ldrb immediate unsigned */
162 { "ldrb", "00|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
163 TYPE_02, OP_SF32 }, /* ldrb register */
164 { "ldrh", "01|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", TYPE_02,
165 OP_SIGN_EXT | OP_SF32 }, /* ldrh immediate post/pre index */
166 { "ldrh", "01|11100101|IMM(12)|RN(5)|RT(5)",
167 TYPE_02, OP_SF32 }, /* ldrh immediate unsigned */
168 { "ldrh", "01|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
169 TYPE_02, OP_SF32 }, /* ldrh register */
170 { "ldrsb", "001110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
171 TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsb immediate post/pre index */
172 { "ldrsb", "001110011|SF(1)|IMM(12)|RN(5)|RT(5)",\
173 TYPE_02, OP_SF_INV}, /* ldrsb immediate unsigned */
174 { "ldrsb", "001110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
175 TYPE_02, OP_SF_INV }, /* ldrsb register */
176 { "ldrsh", "011110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
177 TYPE_02, OP_SIGN_EXT | OP_SF_INV }, /* ldrsh immediate post/pre index */
178 { "ldrsh", "011110011|SF(1)|IMM(12)|RN(5)|RT(5)",
179 TYPE_02, OP_SF_INV}, /* ldrsh immediate unsigned */
180 { "ldrsh", "011110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
181 TYPE_02, OP_SF_INV }, /* ldrsh register */
182 { "ldrsw", "10111000100|IMM(9)|OPTION(2)|RN(5)|RT(5)",
183 TYPE_02, OP_SIGN_EXT }, /* ldrsw immediate post/pre index */
184 { "ldrsw", "1011100110|IMM(12)|RN(5)|RT(5)",
185 TYPE_02, 0 }, /* ldrsw immediate unsigned */
186 { "ldrsw", "10111000101|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
187 TYPE_02, 0 }, /* ldrsw register */
188 { "ldrsw", "10011000|IMM(19)|RT(5)",
189 TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */
190 { "str", "1|SF(1)|111000000|IMM(9)|OPTION(2)|RN(5)|RT(5)",
191 TYPE_02, OP_SIGN_EXT }, /* str immediate post/pre index */
192 { "str", "1|SF(1)|11100100|IMM(12)|RN(5)|RT(5)",
193 TYPE_02, 0 }, /* str immediate unsigned */
194 { "str", "1|SF(1)|111000001|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
195 TYPE_02, 0 }, /* str register */
196 { "strb", "00111000000|IMM(9)|OPTION(2)|RN(5)|RT(5)",
197 TYPE_02, OP_SIGN_EXT | OP_SF32 }, /* strb immediate post/pre index */
198 { "strb", "0011100100|IMM(12)|RN(5)|RT(5)",
199 TYPE_02, OP_SF32 }, /* strb immediate unsigned */
200 { "strb", "00111000001|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
201 TYPE_02, OP_SF32 }, /* strb register */
202 { "strh", "01111000000|IMM(9)|OPTION(2)|RN(5)|RT(5)",
203 TYPE_02, OP_SF32 | OP_SIGN_EXT }, /* strh immediate post/pre index */
204 { "strh", "0111100100|IMM(12)|RN(5)|RT(5)",
205 TYPE_02, OP_SF32 }, /* immediate unsigned */
206 { "strh", "01111000001|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
207 TYPE_02, OP_SF32 }, /* strh register */
212 arm64_disasm_generate_masks(struct arm64_insn *tab)
221 while (tab->name != NULL) {
224 format = tab->format;
229 * For each entry analyze format strings from the
230 * left (i.e. from the MSB).
232 a = (INSN_SIZE * NBBY) - 1;
233 while (*format != '\0' && (a >= 0)) {
236 /* Bit is 0, add to mask and pattern */
242 /* Bit is 1, add to mask and pattern */
253 /* Token found, copy the name */
254 memset(tab->tokens[token].name, 0,
255 sizeof(tab->tokens[token].name));
257 while (*format != '(') {
258 tab->tokens[token].name[i] = *format;
261 if (i >= ARM64_MAX_TOKEN_LEN) {
263 "token too long in op %s\n",
272 /* Read the length value */
273 ret = sscanf(format, "(%d)", &len);
275 if (token >= ARM64_MAX_TOKEN_CNT) {
277 "too many tokens in op %s\n",
284 tab->tokens[token].pos = a + 1;
285 tab->tokens[token].len = len;
289 /* Skip to the end of the token */
290 while (*format != 0 && *format != '|')
295 /* Write mask and pattern to the instruction array */
300 * If we got here, format string must be parsed and "a"
301 * should point to -1. If it's not, wrong number of bits
302 * in format string. Mark this as invalid and prevent
303 * from being matched.
305 if (*format != 0 || (a != -1) || (error != 0)) {
307 tab->pattern = 0xffffffff;
308 printf("ERROR: skipping instruction op %s\n",
317 arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode,
318 const char *token, int *val)
322 for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
323 if (strcmp(insn->tokens[i].name, token) == 0) {
324 *val = (opcode >> insn->tokens[i].pos &
325 ((1 << insn->tokens[i].len) - 1));
334 arm64_disasm_read_token_sign_ext(struct arm64_insn *insn, u_int opcode,
335 const char *token, int *val)
340 for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
341 if (strcmp(insn->tokens[i].name, token) == 0) {
342 msk = (1 << insn->tokens[i].len) - 1;
343 *val = ((opcode >> insn->tokens[i].pos) & msk);
345 /* If last bit is 1, sign-extend the value */
346 if (*val & (1 << (insn->tokens[i].len - 1)))
357 arm64_reg(int b64, int num)
367 disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt)
369 struct arm64_insn *i_ptr = arm64_i;
373 int shift, rm, rt, rd, rn, imm, sf, idx, option, scale, amount;
376 /* Indicate if immediate should be outside or inside brackets */
378 /* Print exclamation mark if pre-incremented */
381 /* Initialize defaults, all are 0 except SF indicating 64bit access */
382 shift = rd = rm = rn = imm = idx = option = amount = scale = 0;
387 insn = di->di_readword(loc);
388 while (i_ptr->name) {
389 /* If mask is 0 then the parser was not initialized yet */
390 if ((i_ptr->mask != 0) &&
391 ((insn & i_ptr->mask) == i_ptr->pattern)) {
401 if (i_ptr->special_ops & OP_SF32)
404 /* Global optional tokens */
405 arm64_disasm_read_token(i_ptr, insn, "SF", &sf);
406 if (i_ptr->special_ops & OP_SF_INV)
408 if (arm64_disasm_read_token(i_ptr, insn, "SIGN", &sign_ext) == 0)
409 sign_ext = 1 - sign_ext;
410 if (i_ptr->special_ops & OP_SIGN_EXT)
413 arm64_disasm_read_token_sign_ext(i_ptr, insn, "IMM", &imm);
415 arm64_disasm_read_token(i_ptr, insn, "IMM", &imm);
416 if (i_ptr->special_ops & OP_MULT_4)
419 /* Print opcode by type */
420 switch (i_ptr->type) {
423 * OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
424 * OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64
427 /* Mandatory tokens */
428 ret = arm64_disasm_read_token(i_ptr, insn, "RD", &rd);
429 ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
432 "Missing mandatory token for op %s type %d\n",
433 i_ptr->name, i_ptr->type);
437 /* Optional tokens */
438 arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift);
439 rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
441 di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd),
444 /* If RM is present use it, otherwise use immediate notation */
445 if (rm_absent == 0) {
446 di->di_printf(", %s", arm64_reg(sf, rm));
448 di->di_printf(", %s #%d", shift_2[shift], imm);
450 if (imm != 0 || shift != 0)
451 di->di_printf(", #0x%x", imm);
453 di->di_printf(" LSL #12");
458 * OP <RT>, [<RN>, #<imm>]{!}] SF32/64
459 * OP <RT>, [<RN>], #<imm>{!} SF32/64
460 * OP <RT>, <RN>, <RM> {, EXTEND AMOUNT }
463 /* Mandatory tokens */
464 ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
465 ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
468 "Missing mandatory token for op %s type %d\n",
469 i_ptr->name, i_ptr->type);
473 /* Optional tokens */
474 arm64_disasm_read_token(i_ptr, insn, "OPTION", &option);
475 arm64_disasm_read_token(i_ptr, insn, "SCALE", &scale);
476 rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
480 * In unsigned operation, shift immediate value
481 * and reset options to default.
484 imm = imm << ((insn >> ARM_INSN_SIZE_OFFSET) &
504 di->di_printf("%s\t%s, ", i_ptr->name,
507 di->di_printf("[%s", arm64_reg(1, rn));
509 di->di_printf(", #%d", imm);
512 di->di_printf("[%s]", arm64_reg(1, rn));
514 di->di_printf(", #%d", imm);
519 /* Last bit of option field determines 32/64 bit offset */
520 di->di_printf("%s\t%s, [%s, %s", i_ptr->name,
521 arm64_reg(sf, rt), arm64_reg(1, rn),
522 arm64_reg(option & 1, rm));
527 /* Calculate amount, it's op(31:30) */
528 amount = (insn >> ARM_INSN_SIZE_OFFSET) &
534 di->di_printf(", uxtw #%d", amount);
538 di->di_printf(", lsl #%d", amount);
541 di->di_printf(", sxtw #%d", amount);
544 di->di_printf(", sxtx #%d", amount);
547 di->di_printf(", RSVD");
556 /* OP <RT>, #imm SF32/64 */
558 /* Mandatory tokens */
559 ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
562 "Missing mandatory token for op %s type %d\n",
563 i_ptr->name, i_ptr->type);
567 di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt));
568 if (i_ptr->special_ops & OP_LITERAL)
569 di->di_printf("0x%lx", loc + imm);
571 di->di_printf("#%d", imm);
579 return (loc + INSN_SIZE);
582 di->di_printf("undefined\t%08x\n", insn);
583 return (loc + INSN_SIZE);
586 /* Parse format strings at the very beginning */
587 SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES, SI_ORDER_FIRST,
588 arm64_disasm_generate_masks, arm64_i);