2 * Copyright (c) 2016 Cavium
5 * This software was developed by Semihalf.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <machine/armreg.h>
36 #include <machine/disassem.h>
40 #define ARM64_MAX_TOKEN_LEN 8
41 #define ARM64_MAX_TOKEN_CNT 10
43 #define ARM_INSN_SIZE_OFFSET 30
44 #define ARM_INSN_SIZE_MASK 0x3
46 /* Special options for instruction printing */
47 #define OP_SIGN_EXT (1UL << 0) /* Sign-extend immediate value */
48 #define OP_LITERAL (1UL << 1) /* Use literal (memory offset) */
49 #define OP_MULT_4 (1UL << 2) /* Multiply immediate by 4 */
50 #define OP_SF32 (1UL << 3) /* Force 32-bit access */
51 #define OP_SF_INV (1UL << 6) /* SF is inverted (1 means 32 bit access) */
52 #define OP_RD_SP (1UL << 7) /* Use sp for RD otherwise xzr */
53 #define OP_RT_SP (1UL << 8) /* Use sp for RT otherwise xzr */
54 #define OP_RN_SP (1UL << 9) /* Use sp for RN otherwise xzr */
55 #define OP_RM_SP (1UL << 10) /* Use sp for RM otherwise xzr */
57 static const char *w_reg[] = {
58 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
59 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
60 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
61 "w24", "w25", "w26", "w27", "w28", "w29", "w30"
64 static const char *x_reg[] = {
65 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
66 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
67 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
68 "x24", "x25", "x26", "x27", "x28", "x29", "lr"
71 static const char *shift_2[] = {
72 "lsl", "lsr", "asr", "rsv"
76 * Structure representing single token (operand) inside instruction.
77 * name - name of operand
78 * pos - position within the instruction (in bits)
79 * len - operand length (in bits)
81 struct arm64_insn_token {
82 char name[ARM64_MAX_TOKEN_LEN];
88 * Define generic types for instruction printing.
90 enum arm64_format_type {
92 * OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #imm} SF32/64
93 * OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64
94 * OP <RD>, <RM> {, <shift> #<imm> }
95 * OP <RN>, <RM> {, <shift> #<imm> }
100 * OP <RT>, [<RN>, #<imm>]{!} SF32/64
101 * OP <RT>, [<RN>], #<imm>{!} SF32/64
102 * OP <RT>, <RN>, <RM> {, EXTEND AMOUNT }
106 /* OP <RT>, #imm SF32/64 */
111 * Structure representing single parsed instruction format.
113 * format - opcode format in a human-readable way
114 * type - syntax type for printing
115 * special_ops - special options passed to a printer (if any)
116 * mask - bitmask for instruction matching
117 * pattern - pattern to look for
118 * tokens - array of tokens (operands) inside instruction
123 enum arm64_format_type type;
124 uint64_t special_ops;
127 struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT];
131 * Specify instruction opcode format in a human-readable way. Use notation
132 * obtained from ARM Architecture Reference Manual for ARMv8-A.
134 * Format string description:
135 * Each group must be separated by "|". Group made of 0/1 is used to
136 * generate mask and pattern for instruction matching. Groups containing
137 * an operand token (in format NAME(length_bits)) are used to retrieve any
138 * operand data from the instruction. Names here must be meaningful
139 * and match the one described in the Manual.
142 * SF - "0" represents 32-bit access, "1" represents 64-bit access
143 * SHIFT - type of shift (instruction dependent)
144 * IMM - immediate value
145 * Rx - register number
146 * OPTION - command specific options
147 * SCALE - scaling of immediate value
149 static struct arm64_insn arm64_i[] = {
150 { "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
151 TYPE_01, 0 }, /* add shifted register */
152 { "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)",
153 TYPE_01, OP_RD_SP | OP_RN_SP }, /* mov (to/from sp) */
154 { "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)",
155 TYPE_01, OP_RD_SP | OP_RN_SP }, /* add immediate */
156 { "cmn", "SF(1)|0101011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|11111",
157 TYPE_01, 0 }, /* cmn shifted register */
158 { "adds", "SF(1)|0101011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
159 TYPE_01, 0 }, /* adds shifted register */
160 { "ldr", "1|SF(1)|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
161 TYPE_02, OP_SIGN_EXT | OP_RN_SP }, /* ldr immediate post/pre index */
162 { "ldr", "1|SF(1)|11100101|IMM(12)|RN(5)|RT(5)",
163 TYPE_02, OP_RN_SP }, /* ldr immediate unsigned */
164 { "ldr", "1|SF(1)|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
165 TYPE_02, OP_RN_SP }, /* ldr register */
166 { "ldr", "0|SF(1)|011000|IMM(19)|RT(5)",
167 TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldr literal */
168 { "ldrb", "00|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)",
169 TYPE_02, OP_SIGN_EXT | OP_SF32 | OP_RN_SP },
170 /* ldrb immediate post/pre index */
171 { "ldrb", "00|11100101|IMM(12)|RN(5)|RT(5)",
172 TYPE_02, OP_SF32 | OP_RN_SP }, /* ldrb immediate unsigned */
173 { "ldrb", "00|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
174 TYPE_02, OP_SF32 | OP_RN_SP }, /* ldrb register */
175 { "ldrh", "01|111000010|IMM(9)|OPTION(2)|RN(5)|RT(5)", TYPE_02,
176 OP_SIGN_EXT | OP_SF32 | OP_RN_SP }, /* ldrh immediate post/pre index */
177 { "ldrh", "01|11100101|IMM(12)|RN(5)|RT(5)",
178 TYPE_02, OP_SF32 | OP_RN_SP }, /* ldrh immediate unsigned */
179 { "ldrh", "01|111000011|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
180 TYPE_02, OP_SF32 | OP_RN_SP }, /* ldrh register */
181 { "ldrsb", "001110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
182 TYPE_02, OP_SIGN_EXT | OP_SF_INV | OP_RN_SP },
183 /* ldrsb immediate post/pre index */
184 { "ldrsb", "001110011|SF(1)|IMM(12)|RN(5)|RT(5)",\
185 TYPE_02, OP_SF_INV | OP_RN_SP }, /* ldrsb immediate unsigned */
186 { "ldrsb", "001110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
187 TYPE_02, OP_SF_INV | OP_RN_SP }, /* ldrsb register */
188 { "ldrsh", "011110001|SF(1)|0|IMM(9)|OPTION(2)|RN(5)|RT(5)",
189 TYPE_02, OP_SIGN_EXT | OP_SF_INV | OP_RN_SP },
190 /* ldrsh immediate post/pre index */
191 { "ldrsh", "011110011|SF(1)|IMM(12)|RN(5)|RT(5)",
192 TYPE_02, OP_SF_INV | OP_RN_SP }, /* ldrsh immediate unsigned */
193 { "ldrsh", "011110001|SF(1)|1|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
194 TYPE_02, OP_SF_INV | OP_RN_SP }, /* ldrsh register */
195 { "ldrsw", "10111000100|IMM(9)|OPTION(2)|RN(5)|RT(5)",
196 TYPE_02, OP_SIGN_EXT | OP_RN_SP }, /* ldrsw immediate post/pre index */
197 { "ldrsw", "1011100110|IMM(12)|RN(5)|RT(5)",
198 TYPE_02, OP_RN_SP }, /* ldrsw immediate unsigned */
199 { "ldrsw", "10111000101|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
200 TYPE_02, OP_RN_SP }, /* ldrsw register */
201 { "ldrsw", "10011000|IMM(19)|RT(5)",
202 TYPE_03, OP_SIGN_EXT | OP_LITERAL | OP_MULT_4 }, /* ldrsw literal */
203 { "str", "1|SF(1)|111000000|IMM(9)|OPTION(2)|RN(5)|RT(5)",
204 TYPE_02, OP_SIGN_EXT | OP_RN_SP }, /* str immediate post/pre index */
205 { "str", "1|SF(1)|11100100|IMM(12)|RN(5)|RT(5)",
206 TYPE_02, OP_RN_SP }, /* str immediate unsigned */
207 { "str", "1|SF(1)|111000001|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
208 TYPE_02, OP_RN_SP }, /* str register */
209 { "strb", "00111000000|IMM(9)|OPTION(2)|RN(5)|RT(5)",
210 TYPE_02, OP_SIGN_EXT | OP_SF32 | OP_RN_SP },
211 /* strb immediate post/pre index */
212 { "strb", "0011100100|IMM(12)|RN(5)|RT(5)",
213 TYPE_02, OP_SF32 | OP_RN_SP }, /* strb immediate unsigned */
214 { "strb", "00111000001|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
215 TYPE_02, OP_SF32 | OP_RN_SP }, /* strb register */
216 { "strh", "01111000000|IMM(9)|OPTION(2)|RN(5)|RT(5)",
217 TYPE_02, OP_SF32 | OP_SIGN_EXT | OP_RN_SP },
218 /* strh immediate post/pre index */
219 { "strh", "0111100100|IMM(12)|RN(5)|RT(5)",
220 TYPE_02, OP_SF32 | OP_RN_SP },
221 /* strh immediate unsigned */
222 { "strh", "01111000001|RM(5)|OPTION(3)|SCALE(1)|10|RN(5)|RT(5)",
223 TYPE_02, OP_SF32 | OP_RN_SP },
225 { "neg", "SF(1)|1001011|SHIFT(2)|0|RM(5)|IMM(6)|11111|RD(5)",
226 TYPE_01, 0 }, /* neg shifted register */
227 { "sub", "SF(1)|1001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
228 TYPE_01, 0 }, /* sub shifted register */
229 { "cmp", "SF(1)|1101011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|11111",
230 TYPE_01, 0 }, /* cmp shifted register */
231 { "negs", "SF(1)|1101011|SHIFT(2)|0|RM(5)|IMM(6)|11111|RD(5)",
232 TYPE_01, 0 }, /* negs shifted register */
233 { "subs", "SF(1)|1101011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)",
234 TYPE_01, 0 }, /* subs shifted register */
239 arm64_disasm_generate_masks(struct arm64_insn *tab)
248 while (tab->name != NULL) {
251 format = tab->format;
256 * For each entry analyze format strings from the
257 * left (i.e. from the MSB).
259 a = (INSN_SIZE * NBBY) - 1;
260 while (*format != '\0' && (a >= 0)) {
263 /* Bit is 0, add to mask and pattern */
269 /* Bit is 1, add to mask and pattern */
280 /* Token found, copy the name */
281 memset(tab->tokens[token].name, 0,
282 sizeof(tab->tokens[token].name));
284 while (*format != '(') {
285 tab->tokens[token].name[i] = *format;
288 if (i >= ARM64_MAX_TOKEN_LEN) {
290 "token too long in op %s\n",
299 /* Read the length value */
300 ret = sscanf(format, "(%d)", &len);
302 if (token >= ARM64_MAX_TOKEN_CNT) {
304 "too many tokens in op %s\n",
311 tab->tokens[token].pos = a + 1;
312 tab->tokens[token].len = len;
316 /* Skip to the end of the token */
317 while (*format != 0 && *format != '|')
322 /* Write mask and pattern to the instruction array */
327 * If we got here, format string must be parsed and "a"
328 * should point to -1. If it's not, wrong number of bits
329 * in format string. Mark this as invalid and prevent
330 * from being matched.
332 if (*format != 0 || (a != -1) || (error != 0)) {
334 tab->pattern = 0xffffffff;
335 printf("ERROR: skipping instruction op %s\n",
344 arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode,
345 const char *token, int *val)
349 for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
350 if (strcmp(insn->tokens[i].name, token) == 0) {
351 *val = (opcode >> insn->tokens[i].pos &
352 ((1 << insn->tokens[i].len) - 1));
361 arm64_disasm_read_token_sign_ext(struct arm64_insn *insn, u_int opcode,
362 const char *token, int *val)
367 for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
368 if (strcmp(insn->tokens[i].name, token) == 0) {
369 msk = (1 << insn->tokens[i].len) - 1;
370 *val = ((opcode >> insn->tokens[i].pos) & msk);
372 /* If last bit is 1, sign-extend the value */
373 if (*val & (1 << (insn->tokens[i].len - 1)))
384 arm64_w_reg(int num, int wsp)
387 return (wsp != 0 ? "wsp" : "wzr");
392 arm64_x_reg(int num, int sp)
395 return (sp != 0 ? "sp" : "xzr");
400 arm64_reg(int b64, int num, int sp)
403 return (arm64_x_reg(num, sp));
404 return (arm64_w_reg(num, sp));
408 disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt)
410 struct arm64_insn *i_ptr = arm64_i;
414 int shift, rm, rt, rd, rn, imm, sf, idx, option, scale, amount;
416 bool rm_absent, rd_absent, rn_absent;
417 /* Indicate if immediate should be outside or inside brackets */
419 /* Print exclamation mark if pre-incremented */
421 /* Indicate if x31 register should be printed as sp or xzr */
422 int rm_sp, rt_sp, rd_sp, rn_sp;
424 /* Initialize defaults, all are 0 except SF indicating 64bit access */
425 shift = rd = rm = rn = imm = idx = option = amount = scale = 0;
430 insn = di->di_readword(loc);
431 while (i_ptr->name) {
432 /* If mask is 0 then the parser was not initialized yet */
433 if ((i_ptr->mask != 0) &&
434 ((insn & i_ptr->mask) == i_ptr->pattern)) {
444 if (i_ptr->special_ops & OP_SF32)
447 /* Global optional tokens */
448 arm64_disasm_read_token(i_ptr, insn, "SF", &sf);
449 if (i_ptr->special_ops & OP_SF_INV)
451 if (arm64_disasm_read_token(i_ptr, insn, "SIGN", &sign_ext) == 0)
452 sign_ext = 1 - sign_ext;
453 if (i_ptr->special_ops & OP_SIGN_EXT)
456 arm64_disasm_read_token_sign_ext(i_ptr, insn, "IMM", &imm);
458 arm64_disasm_read_token(i_ptr, insn, "IMM", &imm);
459 if (i_ptr->special_ops & OP_MULT_4)
462 rm_sp = i_ptr->special_ops & OP_RM_SP;
463 rt_sp = i_ptr->special_ops & OP_RT_SP;
464 rd_sp = i_ptr->special_ops & OP_RD_SP;
465 rn_sp = i_ptr->special_ops & OP_RN_SP;
467 /* Print opcode by type */
468 switch (i_ptr->type) {
471 * OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
472 * OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64
473 * OP <RD>, <RM> {, <shift> #<imm> }
474 * OP <RN>, <RM> {, <shift> #<imm> }
477 rd_absent = arm64_disasm_read_token(i_ptr, insn, "RD", &rd);
478 rn_absent = arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
479 rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
480 arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift);
482 di->di_printf("%s\t", i_ptr->name);
485 * If RD and RN are present, we will display the following
487 * - OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
488 * - OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64
489 * Otherwise if only RD is present:
490 * - OP <RD>, <RM> {, <shift> #<imm> }
491 * Otherwise if only RN is present:
492 * - OP <RN>, <RM> {, <shift> #<imm> }
494 if (!rd_absent && !rn_absent)
495 di->di_printf("%s, %s", arm64_reg(sf, rd, rd_sp),
496 arm64_reg(sf, rn, rn_sp));
498 di->di_printf("%s", arm64_reg(sf, rd, rd_sp));
500 di->di_printf("%s", arm64_reg(sf, rn, rn_sp));
502 /* If RM is present use it, otherwise use immediate notation */
504 di->di_printf(", %s", arm64_reg(sf, rm, rm_sp));
506 di->di_printf(", %s #%d", shift_2[shift], imm);
508 if (imm != 0 || shift != 0)
509 di->di_printf(", #0x%x", imm);
511 di->di_printf(" lsl #12");
516 * OP <RT>, [<RN>, #<imm>]{!}] SF32/64
517 * OP <RT>, [<RN>], #<imm>{!} SF32/64
518 * OP <RT>, <RN>, <RM> {, EXTEND AMOUNT }
521 /* Mandatory tokens */
522 ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
523 ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
526 "Missing mandatory token for op %s type %d\n",
527 i_ptr->name, i_ptr->type);
531 /* Optional tokens */
532 arm64_disasm_read_token(i_ptr, insn, "OPTION", &option);
533 arm64_disasm_read_token(i_ptr, insn, "SCALE", &scale);
534 rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
538 * In unsigned operation, shift immediate value
539 * and reset options to default.
542 imm = imm << ((insn >> ARM_INSN_SIZE_OFFSET) &
562 di->di_printf("%s\t%s, ", i_ptr->name,
563 arm64_reg(sf, rt, rt_sp));
565 di->di_printf("[%s", arm64_reg(1, rn, rn_sp));
567 di->di_printf(", #%d", imm);
570 di->di_printf("[%s]", arm64_reg(1, rn, rn_sp));
572 di->di_printf(", #%d", imm);
577 /* Last bit of option field determines 32/64 bit offset */
578 di->di_printf("%s\t%s, [%s, %s", i_ptr->name,
579 arm64_reg(sf, rt, rt_sp), arm64_reg(1, rn, rn_sp),
580 arm64_reg(option & 1, rm, rm_sp));
585 /* Calculate amount, it's op(31:30) */
586 amount = (insn >> ARM_INSN_SIZE_OFFSET) &
592 di->di_printf(", uxtw #%d", amount);
596 di->di_printf(", lsl #%d", amount);
599 di->di_printf(", sxtw #%d", amount);
602 di->di_printf(", sxtx #%d", amount);
605 di->di_printf(", rsv");
614 /* OP <RT>, #imm SF32/64 */
616 /* Mandatory tokens */
617 ret = arm64_disasm_read_token(i_ptr, insn, "RT", &rt);
620 "Missing mandatory token for op %s type %d\n",
621 i_ptr->name, i_ptr->type);
625 di->di_printf("%s\t%s, ", i_ptr->name, arm64_reg(sf, rt, rt_sp));
626 if (i_ptr->special_ops & OP_LITERAL)
627 di->di_printf("0x%lx", loc + imm);
629 di->di_printf("#%d", imm);
637 return (loc + INSN_SIZE);
640 di->di_printf("undefined\t%08x\n", insn);
641 return (loc + INSN_SIZE);
644 /* Parse format strings at the very beginning */
645 SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES, SI_ORDER_FIRST,
646 arm64_disasm_generate_masks, arm64_i);