]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/lldb/tools/compact-unwind/compact-unwind-dumper.c
Update LLDB snapshot to upstream r225923 (git 2b588ecd)
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / lldb / tools / compact-unwind / compact-unwind-dumper.c
1 #include <stdint.h>
2 #include <mach-o/loader.h>
3 #include <mach-o/compact_unwind_encoding.h>
4 #include <mach/machine.h>
5 #include <stdlib.h>
6 #include <stdbool.h>
7 #include <string.h>
8 #include <fcntl.h>
9 #include <sys/types.h>
10 #include <sys/mman.h>
11 #include <sys/errno.h>
12 #include <sys/stat.h>
13 #include <inttypes.h>
14 #include <stdio.h>
15 #include <mach-o/nlist.h>
16
17 #define EXTRACT_BITS(value, mask) \
18         ( (value >> __builtin_ctz(mask)) & (((1 << __builtin_popcount(mask)))-1) )
19
20
21 // A quick sketch of a program which can parse the compact unwind info
22 // used on Darwin systems for exception handling.  The output of
23 // unwinddump will be more authoritative/reliable but this program
24 // can dump at least the UNWIND_X86_64_MODE_RBP_FRAME format entries
25 // correctly.
26
27 struct symbol
28 {
29     uint64_t file_address;
30     const char *name;
31 };
32
33 int
34 symbol_compare (const void *a, const void *b)
35 {
36     return (int) ((struct symbol *)a)->file_address - ((struct symbol *)b)->file_address;
37 }
38
39 struct baton
40 {
41     cpu_type_t cputype;
42
43     uint8_t *mach_header_start;      // pointer into this program's address space
44     uint8_t *compact_unwind_start;   // pointer into this program's address space
45
46     int addr_size;                   // 4 or 8 bytes, the size of addresses in this file
47
48     uint64_t text_segment_vmaddr;    // __TEXT segment vmaddr
49     uint64_t text_segment_file_offset;
50
51     uint64_t text_section_vmaddr;    // __TEXT,__text section vmaddr
52     uint64_t text_section_file_offset;
53
54     uint64_t eh_section_file_address; // the file address of the __TEXT,__eh_frame section
55
56     uint8_t *lsda_array_start;       // for the currently-being-processed first-level index
57     uint8_t *lsda_array_end;         // the lsda_array_start for the NEXT first-level index
58
59     struct symbol *symbols;
60     int    symbols_count;
61
62     uint64_t *function_start_addresses;
63     int function_start_addresses_count;
64
65     int current_index_table_number;
66
67     struct unwind_info_section_header unwind_header;
68     struct unwind_info_section_header_index_entry first_level_index_entry;
69     struct unwind_info_compressed_second_level_page_header compressed_second_level_page_header;
70     struct unwind_info_regular_second_level_page_header regular_second_level_page_header;
71 };
72
73
74 uint64_t 
75 read_leb128 (uint8_t **offset)
76 {
77     uint64_t result = 0;
78     int shift = 0;
79     while (1) 
80     {
81         uint8_t byte = **offset;
82         *offset = *offset + 1;
83         result |= (byte & 0x7f) << shift;
84         if ((byte & 0x80) == 0)
85             break;
86         shift += 7;
87     }
88
89     return result;
90 }
91
92 // step through the load commands in a thin mach-o binary,
93 // find the cputype and the start of the __TEXT,__unwind_info
94 // section, return a pointer to that section or NULL if not found.
95
96 static void
97 scan_macho_load_commands (struct baton *baton)
98 {
99     struct symtab_command symtab_cmd;
100     uint64_t linkedit_segment_vmaddr;
101     uint64_t linkedit_segment_file_offset;
102
103     baton->compact_unwind_start = 0;
104
105     uint32_t *magic = (uint32_t *) baton->mach_header_start;
106
107     if (*magic != MH_MAGIC && *magic != MH_MAGIC_64)
108     {
109         printf ("Unexpected magic number 0x%x in header, exiting.", *magic);
110         exit (1);
111     }
112
113     bool is_64bit = false;
114     if (*magic == MH_MAGIC_64)
115         is_64bit = true;
116
117     uint8_t *offset = baton->mach_header_start;
118
119     struct mach_header mh;
120     memcpy (&mh, offset, sizeof (struct mach_header));
121     if (is_64bit)
122         offset += sizeof (struct mach_header_64);
123     else
124         offset += sizeof (struct mach_header);
125
126     if (is_64bit)
127         baton->addr_size = 8;
128     else
129         baton->addr_size = 4;
130
131     baton->cputype = mh.cputype;
132
133     uint8_t *start_of_load_commands = offset;
134
135     uint32_t cur_cmd = 0;
136     while (cur_cmd < mh.ncmds && (offset - start_of_load_commands) < mh.sizeofcmds)
137     {
138         struct load_command lc;
139         uint32_t *lc_cmd = (uint32_t *) offset;
140         uint32_t *lc_cmdsize = (uint32_t *) offset + 1;
141         uint8_t *start_of_this_load_cmd = offset;
142
143         if (*lc_cmd == LC_SEGMENT || *lc_cmd == LC_SEGMENT_64)
144         {
145             char segment_name[17];
146             segment_name[0] = '\0';
147             uint32_t nsects = 0;
148             uint64_t segment_offset = 0;
149             uint64_t segment_vmaddr = 0;
150
151             if (*lc_cmd == LC_SEGMENT_64)
152             {
153                 struct segment_command_64 seg;
154                 memcpy (&seg, offset, sizeof (struct segment_command_64));
155                 memcpy (&segment_name, &seg.segname, 16);
156                 segment_name[16] = '\0';
157                 nsects = seg.nsects;
158                 segment_offset = seg.fileoff;
159                 segment_vmaddr = seg.vmaddr;
160                 offset += sizeof (struct segment_command_64);
161                 if ((seg.flags & SG_PROTECTED_VERSION_1) == SG_PROTECTED_VERSION_1)
162                 {
163                     printf ("Segment '%s' is encrypted.\n", segment_name);
164                 }
165             }
166
167             if (*lc_cmd == LC_SEGMENT)
168             {
169                 struct segment_command seg;
170                 memcpy (&seg, offset, sizeof (struct segment_command));
171                 memcpy (&segment_name, &seg.segname, 16);
172                 segment_name[16] = '\0';
173                 nsects = seg.nsects;
174                 segment_offset = seg.fileoff;
175                 segment_vmaddr = seg.vmaddr;
176                 offset += sizeof (struct segment_command);
177                 if ((seg.flags & SG_PROTECTED_VERSION_1) == SG_PROTECTED_VERSION_1)
178                 {
179                     printf ("Segment '%s' is encrypted.\n", segment_name);
180                 }
181             }
182
183             if (nsects != 0 && strcmp (segment_name, "__TEXT") == 0)
184             {
185                 baton->text_segment_vmaddr = segment_vmaddr;
186                 baton->text_segment_file_offset = segment_offset;
187
188                 uint32_t current_sect = 0;
189                 while (current_sect < nsects && (offset - start_of_this_load_cmd) < *lc_cmdsize)
190                 {
191                     char sect_name[17];
192                     memcpy (&sect_name, offset, 16);
193                     sect_name[16] = '\0';
194                     if (strcmp (sect_name, "__unwind_info") == 0)
195                     {
196                         if (is_64bit)
197                         {
198                             struct section_64 sect;
199                             memcpy (&sect, offset, sizeof (struct section_64));
200                             baton->compact_unwind_start = baton->mach_header_start + sect.offset;
201                         }
202                         else
203                         {
204                             struct section sect;
205                             memcpy (&sect, offset, sizeof (struct section));
206                             baton->compact_unwind_start = baton->mach_header_start + sect.offset;
207                         }
208                     }
209                     if (strcmp (sect_name, "__eh_frame") == 0)
210                     {
211                         if (is_64bit)
212                         {
213                             struct section_64 sect;
214                             memcpy (&sect, offset, sizeof (struct section_64));
215                             baton->eh_section_file_address = sect.addr;
216                         }
217                         else
218                         {
219                             struct section sect;
220                             memcpy (&sect, offset, sizeof (struct section));
221                             baton->eh_section_file_address = sect.addr;
222                         }
223                     }
224                     if (strcmp (sect_name, "__text") == 0)
225                     {
226                         if (is_64bit)
227                         {
228                             struct section_64 sect;
229                             memcpy (&sect, offset, sizeof (struct section_64));
230                             baton->text_section_vmaddr = sect.addr;
231                             baton->text_section_file_offset = sect.offset;
232                         }
233                         else
234                         {
235                             struct section sect;
236                             memcpy (&sect, offset, sizeof (struct section));
237                             baton->text_section_vmaddr = sect.addr;
238                         }
239                     }
240                     if (is_64bit)
241                     {
242                         offset += sizeof (struct section_64);
243                     }
244                     else
245                     {
246                         offset += sizeof (struct section);
247                     }
248                 }
249             }
250
251             if (strcmp (segment_name, "__LINKEDIT") == 0)
252             {
253                 linkedit_segment_vmaddr = segment_vmaddr;
254                 linkedit_segment_file_offset = segment_offset;
255             }
256         }
257
258         if (*lc_cmd == LC_SYMTAB)
259         {
260             memcpy (&symtab_cmd, offset, sizeof (struct symtab_command));
261         }
262
263         if (*lc_cmd == LC_DYSYMTAB)
264         {
265             struct dysymtab_command dysymtab_cmd;
266             memcpy (&dysymtab_cmd, offset, sizeof (struct dysymtab_command));
267
268             int nlist_size = 12;
269             if (is_64bit)
270                 nlist_size = 16;
271
272             char *string_table = (char *) (baton->mach_header_start + symtab_cmd.stroff);
273             uint8_t *local_syms = baton->mach_header_start + symtab_cmd.symoff + (dysymtab_cmd.ilocalsym * nlist_size);
274             int local_syms_count = dysymtab_cmd.nlocalsym;
275             uint8_t *exported_syms = baton->mach_header_start + symtab_cmd.symoff + (dysymtab_cmd.iextdefsym * nlist_size);
276             int exported_syms_count = dysymtab_cmd.nextdefsym;
277
278             // We're only going to create records for a small number of these symbols but to 
279             // simplify the memory management I'll allocate enough space to store all of them.
280             baton->symbols = (struct symbol *) malloc (sizeof (struct symbol) * (local_syms_count + exported_syms_count));
281             baton->symbols_count = 0;
282
283             for (int i = 0; i < local_syms_count; i++)
284             {
285                 struct nlist_64 nlist;
286                 if (is_64bit)
287                 {
288                     memcpy (&nlist, local_syms + (i * nlist_size), sizeof (struct nlist_64));
289                 }
290                 else
291                 {
292                     struct nlist nlist_32;
293                     memcpy (&nlist_32, local_syms + (i * nlist_size), sizeof (struct nlist));
294                     nlist.n_un.n_strx = nlist_32.n_un.n_strx;
295                     nlist.n_type = nlist_32.n_type;
296                     nlist.n_sect = nlist_32.n_sect;
297                     nlist.n_desc = nlist_32.n_desc;
298                     nlist.n_value = nlist_32.n_value;
299                 }
300                 if ((nlist.n_type & N_STAB) == 0
301                     && ((nlist.n_type & N_EXT) == 1 || 
302                         ((nlist.n_type & N_TYPE) == N_TYPE && nlist.n_sect != NO_SECT))
303                     && nlist.n_value != 0
304                     && nlist.n_value != baton->text_segment_vmaddr)
305                 {
306                     baton->symbols[baton->symbols_count].file_address = nlist.n_value;
307                     baton->symbols[baton->symbols_count].name = string_table + nlist.n_un.n_strx;
308                     baton->symbols_count++;
309                 }
310             }
311
312             for (int i = 0; i < exported_syms_count; i++)
313             {
314                 struct nlist_64 nlist;
315                 if (is_64bit)
316                 {
317                     memcpy (&nlist, exported_syms + (i * nlist_size), sizeof (struct nlist_64));
318                 }
319                 else
320                 {
321                     struct nlist nlist_32;
322                     memcpy (&nlist_32, exported_syms + (i * nlist_size), sizeof (struct nlist));
323                     nlist.n_un.n_strx = nlist_32.n_un.n_strx;
324                     nlist.n_type = nlist_32.n_type;
325                     nlist.n_sect = nlist_32.n_sect;
326                     nlist.n_desc = nlist_32.n_desc;
327                     nlist.n_value = nlist_32.n_value;
328                 }
329                 if ((nlist.n_type & N_STAB) == 0
330                     && ((nlist.n_type & N_EXT) == 1 || 
331                         ((nlist.n_type & N_TYPE) == N_TYPE && nlist.n_sect != NO_SECT))
332                     && nlist.n_value != 0
333                     && nlist.n_value != baton->text_segment_vmaddr)
334                 {
335                     baton->symbols[baton->symbols_count].file_address = nlist.n_value;
336                     baton->symbols[baton->symbols_count].name = string_table + nlist.n_un.n_strx;
337                     baton->symbols_count++;
338                 }
339             }
340
341             qsort (baton->symbols, baton->symbols_count, sizeof (struct symbol), symbol_compare);
342         }
343
344         if (*lc_cmd == LC_FUNCTION_STARTS)
345         {
346             struct linkedit_data_command function_starts_cmd;
347             memcpy (&function_starts_cmd, offset, sizeof (struct linkedit_data_command));
348
349             uint8_t *funcstarts_offset = baton->mach_header_start + function_starts_cmd.dataoff;
350             uint8_t *function_end = funcstarts_offset + function_starts_cmd.datasize;
351             int count = 0;
352
353             while (funcstarts_offset < function_end)
354             {
355                 if (read_leb128 (&funcstarts_offset) != 0)
356                 {
357                     count++;
358                 }
359             }
360
361             baton->function_start_addresses = (uint64_t *) malloc (sizeof (uint64_t) * count);
362             baton->function_start_addresses_count = count;
363
364             funcstarts_offset = baton->mach_header_start + function_starts_cmd.dataoff;
365             uint64_t current_pc = baton->text_segment_vmaddr;
366             int i = 0;
367             while (funcstarts_offset < function_end)
368             {
369                 uint64_t func_start = read_leb128 (&funcstarts_offset);
370                 if (func_start != 0)
371                 {
372                     current_pc += func_start;
373                     baton->function_start_addresses[i++] = current_pc;
374                 }
375             }
376         }
377
378         offset = start_of_this_load_cmd + *lc_cmdsize;
379         cur_cmd++;
380     }
381
382
383     // Augment the symbol table with the function starts table -- adding symbol entries
384     // for functions that were stripped.
385
386     int unnamed_functions_to_add = 0;
387     for (int i = 0; i < baton->function_start_addresses_count; i++)
388     {
389         struct symbol search_key;
390         search_key.file_address = baton->function_start_addresses[i];
391         struct symbol *sym = bsearch (&search_key, baton->symbols, baton->symbols_count, sizeof (struct symbol), symbol_compare);
392         if (sym == NULL)
393             unnamed_functions_to_add++;
394     }
395
396     baton->symbols = (struct symbol *) realloc (baton->symbols, sizeof (struct symbol) * (baton->symbols_count + unnamed_functions_to_add));
397
398     int current_unnamed_symbol = 1;
399     int number_symbols_added = 0;
400     for (int i = 0; i < baton->function_start_addresses_count; i++)
401     {
402         struct symbol search_key;
403         search_key.file_address = baton->function_start_addresses[i];
404         struct symbol *sym = bsearch (&search_key, baton->symbols, baton->symbols_count, sizeof (struct symbol), symbol_compare);
405         if (sym == NULL)
406         {
407             char *name;
408             asprintf (&name, "unnamed function #%d", current_unnamed_symbol++);
409             baton->symbols[baton->symbols_count + number_symbols_added].file_address = baton->function_start_addresses[i];
410             baton->symbols[baton->symbols_count + number_symbols_added].name = name;
411             number_symbols_added++;
412         }
413     }
414     baton->symbols_count += number_symbols_added;
415     qsort (baton->symbols, baton->symbols_count, sizeof (struct symbol), symbol_compare);
416
417
418 //    printf ("function start addresses\n");
419 //    for (int i = 0; i < baton->function_start_addresses_count; i++)
420 //    {
421 //        printf ("0x%012llx\n", baton->function_start_addresses[i]);
422 //    }
423
424 //    printf ("symbol table names & addresses\n");
425 //    for (int i = 0; i < baton->symbols_count; i++)
426 //    {
427 //        printf ("0x%012llx %s\n", baton->symbols[i].file_address, baton->symbols[i].name);
428 //    }
429
430 }
431
432 void
433 print_encoding_x86_64 (struct baton baton, uint8_t *function_start, uint32_t encoding)
434 {
435     int mode = encoding & UNWIND_X86_64_MODE_MASK;
436     switch (mode)
437     {
438         case UNWIND_X86_64_MODE_RBP_FRAME:
439         {
440             printf ("frame func: CFA is rbp+%d ", 16);
441             printf (" rip=[CFA-8] rbp=[CFA-16]");
442             uint32_t saved_registers_offset = EXTRACT_BITS (encoding, UNWIND_X86_64_RBP_FRAME_OFFSET);
443
444             uint32_t saved_registers_locations = EXTRACT_BITS (encoding, UNWIND_X86_64_RBP_FRAME_REGISTERS);
445
446
447             saved_registers_offset += 2;
448
449             for (int i = 0; i < 5; i++)
450             {
451                 switch (saved_registers_locations & 0x7)
452                 {
453                     case UNWIND_X86_64_REG_NONE:
454                         break;
455                     case UNWIND_X86_64_REG_RBX:
456                         printf (" rbx=[CFA-%d]", saved_registers_offset * 8);
457                         break;
458                     case UNWIND_X86_64_REG_R12:
459                         printf (" r12=[CFA-%d]", saved_registers_offset * 8);
460                         break;
461                     case UNWIND_X86_64_REG_R13:
462                         printf (" r13=[CFA-%d]", saved_registers_offset * 8);
463                         break;
464                     case UNWIND_X86_64_REG_R14:
465                         printf (" r14=[CFA-%d]", saved_registers_offset * 8);
466                         break;
467                     case UNWIND_X86_64_REG_R15:
468                         printf (" r15=[CFA-%d]", saved_registers_offset * 8);
469                         break;
470                 }
471                 saved_registers_offset--;
472                 saved_registers_locations >>= 3;
473             }
474         }
475         break;
476
477         case UNWIND_X86_64_MODE_STACK_IND:
478         case UNWIND_X86_64_MODE_STACK_IMMD:
479         {
480             uint32_t stack_size = EXTRACT_BITS (encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE);
481             uint32_t register_count = EXTRACT_BITS (encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT);
482             uint32_t permutation = EXTRACT_BITS (encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION);
483
484             if (mode == UNWIND_X86_64_MODE_STACK_IND && function_start)
485             {
486                 uint32_t stack_adjust = EXTRACT_BITS (encoding, UNWIND_X86_64_FRAMELESS_STACK_ADJUST);
487
488                 // offset into the function instructions; 0 == beginning of first instruction
489                 uint32_t offset_to_subl_insn = EXTRACT_BITS (encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE);
490
491                 stack_size = *((uint32_t*) (function_start + offset_to_subl_insn));
492
493                 stack_size += stack_adjust * 8;
494
495                 printf ("large stack ");
496             }
497             
498             printf ("frameless function: stack size %d, register count %d ", stack_size * 8, register_count);
499
500             if (register_count == 0)
501             {
502                 printf (" no registers saved");
503             }
504             else
505             {
506
507                 // We need to include (up to) 6 registers in 10 bits.
508                 // That would be 18 bits if we just used 3 bits per reg to indicate
509                 // the order they're saved on the stack. 
510                 //
511                 // This is done with Lehmer code permutation, e.g. see
512                 // http://stackoverflow.com/questions/1506078/fast-permutation-number-permutation-mapping-algorithms
513                 int permunreg[6];
514
515                 // This decodes the variable-base number in the 10 bits
516                 // and gives us the Lehmer code sequence which can then
517                 // be decoded.
518
519                 switch (register_count) 
520                 {
521                     case 6:
522                         permunreg[0] = permutation/120;    // 120 == 5!
523                         permutation -= (permunreg[0]*120);
524                         permunreg[1] = permutation/24;     // 24 == 4!
525                         permutation -= (permunreg[1]*24);
526                         permunreg[2] = permutation/6;      // 6 == 3!
527                         permutation -= (permunreg[2]*6);
528                         permunreg[3] = permutation/2;      // 2 == 2!
529                         permutation -= (permunreg[3]*2);
530                         permunreg[4] = permutation;        // 1 == 1!
531                         permunreg[5] = 0;
532                         break;
533                     case 5:
534                         permunreg[0] = permutation/120;
535                         permutation -= (permunreg[0]*120);
536                         permunreg[1] = permutation/24;
537                         permutation -= (permunreg[1]*24);
538                         permunreg[2] = permutation/6;
539                         permutation -= (permunreg[2]*6);
540                         permunreg[3] = permutation/2;
541                         permutation -= (permunreg[3]*2);
542                         permunreg[4] = permutation;
543                         break;
544                     case 4:
545                         permunreg[0] = permutation/60;
546                         permutation -= (permunreg[0]*60);
547                         permunreg[1] = permutation/12;
548                         permutation -= (permunreg[1]*12);
549                         permunreg[2] = permutation/3;
550                         permutation -= (permunreg[2]*3);
551                         permunreg[3] = permutation;
552                         break;
553                     case 3:
554                         permunreg[0] = permutation/20;
555                         permutation -= (permunreg[0]*20);
556                         permunreg[1] = permutation/4;
557                         permutation -= (permunreg[1]*4);
558                         permunreg[2] = permutation;
559                         break;
560                     case 2:
561                         permunreg[0] = permutation/5;
562                         permutation -= (permunreg[0]*5);
563                         permunreg[1] = permutation;
564                         break;
565                     case 1:
566                         permunreg[0] = permutation;
567                         break;
568                 }
569                 
570                 // Decode the Lehmer code for this permutation of
571                 // the registers v. http://en.wikipedia.org/wiki/Lehmer_code
572
573                 int registers[6];
574                 bool used[7] = { false, false, false, false, false, false, false };
575                 for (int i = 0; i < register_count; i++)
576                 {
577                     int renum = 0;
578                     for (int j = 1; j < 7; j++)
579                     {
580                         if (used[j] == false)
581                         {
582                             if (renum == permunreg[i])
583                             {
584                                 registers[i] = j;
585                                 used[j] = true;
586                                 break;
587                             }
588                             renum++;
589                         }
590                     }
591                 }
592
593
594                 printf (" CFA is rsp+%d ", stack_size * 8);
595
596                 uint32_t saved_registers_offset = 1;
597                 printf (" rip=[CFA-%d]", saved_registers_offset * 8);
598                 saved_registers_offset++;
599
600                 for (int i = (sizeof (registers) / sizeof (int)) - 1; i >= 0; i--)
601                 {
602                     switch (registers[i])
603                     {
604                         case UNWIND_X86_64_REG_NONE:
605                             break;
606                         case UNWIND_X86_64_REG_RBX:
607                             printf (" rbx=[CFA-%d]", saved_registers_offset * 8);
608                             break;
609                         case UNWIND_X86_64_REG_R12:
610                             printf (" r12=[CFA-%d]", saved_registers_offset * 8);
611                             break;
612                         case UNWIND_X86_64_REG_R13:
613                             printf (" r13=[CFA-%d]", saved_registers_offset * 8);
614                             break;
615                         case UNWIND_X86_64_REG_R14:
616                             printf (" r14=[CFA-%d]", saved_registers_offset * 8);
617                             break;
618                         case UNWIND_X86_64_REG_R15:
619                             printf (" r15=[CFA-%d]", saved_registers_offset * 8);
620                             break;
621                         case UNWIND_X86_64_REG_RBP:
622                             printf (" rbp=[CFA-%d]", saved_registers_offset * 8);
623                             break;
624                     }
625                     saved_registers_offset++;
626                 }
627
628             }
629
630         }
631         break;
632
633         case UNWIND_X86_64_MODE_DWARF:
634         {
635             uint32_t dwarf_offset = encoding & UNWIND_X86_DWARF_SECTION_OFFSET;
636             printf ("DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64 ")",
637                     dwarf_offset, dwarf_offset + baton.eh_section_file_address);
638         }
639         break;
640
641         case 0:
642         {
643             printf (" no unwind information");
644         }
645         break;
646     }
647 }
648
649 void
650 print_encoding_i386 (struct baton baton, uint8_t *function_start, uint32_t encoding)
651 {
652     int mode = encoding & UNWIND_X86_MODE_MASK;
653     switch (mode)
654     {
655         case UNWIND_X86_MODE_EBP_FRAME:
656         {
657             printf ("frame func: CFA is ebp+%d ", 8);
658             printf (" eip=[CFA-4] ebp=[CFA-8]");
659             uint32_t saved_registers_offset = EXTRACT_BITS (encoding, UNWIND_X86_EBP_FRAME_OFFSET);
660
661             uint32_t saved_registers_locations = EXTRACT_BITS (encoding, UNWIND_X86_EBP_FRAME_REGISTERS);
662
663
664             saved_registers_offset += 2;
665
666             for (int i = 0; i < 5; i++)
667             {
668                 switch (saved_registers_locations & 0x7)
669                 {
670                     case UNWIND_X86_REG_NONE:
671                         break;
672                     case UNWIND_X86_REG_EBX:
673                         printf (" ebx=[CFA-%d]", saved_registers_offset * 4);
674                         break;
675                     case UNWIND_X86_REG_ECX:
676                         printf (" ecx=[CFA-%d]", saved_registers_offset * 4);
677                         break;
678                     case UNWIND_X86_REG_EDX:
679                         printf (" edx=[CFA-%d]", saved_registers_offset * 4);
680                         break;
681                     case UNWIND_X86_REG_EDI:
682                         printf (" edi=[CFA-%d]", saved_registers_offset * 4);
683                         break;
684                     case UNWIND_X86_REG_ESI:
685                         printf (" esi=[CFA-%d]", saved_registers_offset * 4);
686                         break;
687                 }
688                 saved_registers_offset--;
689                 saved_registers_locations >>= 3;
690             }
691         }
692         break;
693
694         case UNWIND_X86_MODE_STACK_IND:
695         case UNWIND_X86_MODE_STACK_IMMD:
696         {
697             uint32_t stack_size = EXTRACT_BITS (encoding, UNWIND_X86_FRAMELESS_STACK_SIZE);
698             uint32_t register_count = EXTRACT_BITS (encoding, UNWIND_X86_FRAMELESS_STACK_REG_COUNT);
699             uint32_t permutation = EXTRACT_BITS (encoding, UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION);
700
701             if (mode == UNWIND_X86_MODE_STACK_IND && function_start)
702             {
703                 uint32_t stack_adjust = EXTRACT_BITS (encoding, UNWIND_X86_FRAMELESS_STACK_ADJUST);
704
705                 // offset into the function instructions; 0 == beginning of first instruction
706                 uint32_t offset_to_subl_insn = EXTRACT_BITS (encoding, UNWIND_X86_FRAMELESS_STACK_SIZE);
707
708                 stack_size = *((uint32_t*) (function_start + offset_to_subl_insn));
709
710                 stack_size += stack_adjust * 4;
711
712                 printf ("large stack ");
713             }
714             
715             printf ("frameless function: stack size %d, register count %d ", stack_size * 4, register_count);
716
717             if (register_count == 0)
718             {
719                 printf (" no registers saved");
720             }
721             else
722             {
723
724                 // We need to include (up to) 6 registers in 10 bits.
725                 // That would be 18 bits if we just used 3 bits per reg to indicate
726                 // the order they're saved on the stack. 
727                 //
728                 // This is done with Lehmer code permutation, e.g. see
729                 // http://stackoverflow.com/questions/1506078/fast-permutation-number-permutation-mapping-algorithms
730                 int permunreg[6];
731
732                 // This decodes the variable-base number in the 10 bits
733                 // and gives us the Lehmer code sequence which can then
734                 // be decoded.
735
736                 switch (register_count) 
737                 {
738                     case 6:
739                         permunreg[0] = permutation/120;    // 120 == 5!
740                         permutation -= (permunreg[0]*120);
741                         permunreg[1] = permutation/24;     // 24 == 4!
742                         permutation -= (permunreg[1]*24);
743                         permunreg[2] = permutation/6;      // 6 == 3!
744                         permutation -= (permunreg[2]*6);
745                         permunreg[3] = permutation/2;      // 2 == 2!
746                         permutation -= (permunreg[3]*2);
747                         permunreg[4] = permutation;        // 1 == 1!
748                         permunreg[5] = 0;
749                         break;
750                     case 5:
751                         permunreg[0] = permutation/120;
752                         permutation -= (permunreg[0]*120);
753                         permunreg[1] = permutation/24;
754                         permutation -= (permunreg[1]*24);
755                         permunreg[2] = permutation/6;
756                         permutation -= (permunreg[2]*6);
757                         permunreg[3] = permutation/2;
758                         permutation -= (permunreg[3]*2);
759                         permunreg[4] = permutation;
760                         break;
761                     case 4:
762                         permunreg[0] = permutation/60;
763                         permutation -= (permunreg[0]*60);
764                         permunreg[1] = permutation/12;
765                         permutation -= (permunreg[1]*12);
766                         permunreg[2] = permutation/3;
767                         permutation -= (permunreg[2]*3);
768                         permunreg[3] = permutation;
769                         break;
770                     case 3:
771                         permunreg[0] = permutation/20;
772                         permutation -= (permunreg[0]*20);
773                         permunreg[1] = permutation/4;
774                         permutation -= (permunreg[1]*4);
775                         permunreg[2] = permutation;
776                         break;
777                     case 2:
778                         permunreg[0] = permutation/5;
779                         permutation -= (permunreg[0]*5);
780                         permunreg[1] = permutation;
781                         break;
782                     case 1:
783                         permunreg[0] = permutation;
784                         break;
785                 }
786                 
787                 // Decode the Lehmer code for this permutation of
788                 // the registers v. http://en.wikipedia.org/wiki/Lehmer_code
789
790                 int registers[6];
791                 bool used[7] = { false, false, false, false, false, false, false };
792                 for (int i = 0; i < register_count; i++)
793                 {
794                     int renum = 0;
795                     for (int j = 1; j < 7; j++)
796                     {
797                         if (used[j] == false)
798                         {
799                             if (renum == permunreg[i])
800                             {
801                                 registers[i] = j;
802                                 used[j] = true;
803                                 break;
804                             }
805                             renum++;
806                         }
807                     }
808                 }
809
810
811                 printf (" CFA is esp+%d ", stack_size * 4);
812
813                 uint32_t saved_registers_offset = 1;
814                 printf (" eip=[CFA-%d]", saved_registers_offset * 4);
815                 saved_registers_offset++;
816
817                 for (int i = (sizeof (registers) / sizeof (int)) - 1; i >= 0; i--)
818                 {
819                     switch (registers[i])
820                     {
821                         case UNWIND_X86_REG_NONE:
822                             break;
823                         case UNWIND_X86_REG_EBX:
824                             printf (" ebx=[CFA-%d]", saved_registers_offset * 4);
825                             break;
826                         case UNWIND_X86_REG_ECX:
827                             printf (" ecx=[CFA-%d]", saved_registers_offset * 4);
828                             break;
829                         case UNWIND_X86_REG_EDX:
830                             printf (" edx=[CFA-%d]", saved_registers_offset * 4);
831                             break;
832                         case UNWIND_X86_REG_EDI:
833                             printf (" edi=[CFA-%d]", saved_registers_offset * 4);
834                             break;
835                         case UNWIND_X86_REG_ESI:
836                             printf (" esi=[CFA-%d]", saved_registers_offset * 4);
837                             break;
838                         case UNWIND_X86_REG_EBP:
839                             printf (" ebp=[CFA-%d]", saved_registers_offset * 4);
840                             break;
841                     }
842                     saved_registers_offset++;
843                 }
844
845             }
846
847         }
848         break;
849
850         case UNWIND_X86_MODE_DWARF:
851         {
852             uint32_t dwarf_offset = encoding & UNWIND_X86_DWARF_SECTION_OFFSET;
853             printf ("DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64 ")",
854                     dwarf_offset, dwarf_offset + baton.eh_section_file_address);
855         }
856         break;
857
858         case 0:
859         {
860             printf (" no unwind information");
861         }
862         break;
863     }
864 }
865
866
867 void print_encoding (struct baton baton, uint8_t *function_start, uint32_t encoding)
868 {
869
870     if (baton.cputype == CPU_TYPE_X86_64)
871     {
872         print_encoding_x86_64 (baton, function_start, encoding);
873     }
874     else if (baton.cputype == CPU_TYPE_I386)
875     {
876         print_encoding_i386 (baton, function_start, encoding);
877     }
878     else
879     {
880         printf (" -- unsupported encoding arch -- ");
881     }
882 }
883
884 void
885 print_function_encoding (struct baton baton, uint32_t idx, uint32_t encoding, uint32_t entry_encoding_index, uint32_t entry_func_offset)
886 {
887
888     char *entry_encoding_index_str = "";
889     if (entry_encoding_index != (uint32_t) -1)
890     {
891         asprintf (&entry_encoding_index_str, ", encoding #%d", entry_encoding_index);
892     }
893     else
894     {
895         asprintf (&entry_encoding_index_str, "");
896     }
897
898     uint64_t file_address = baton.first_level_index_entry.functionOffset + entry_func_offset + baton.text_segment_vmaddr;
899
900     printf ("    func [%d] offset %d (file addr 0x%" PRIx64 ")%s, encoding is 0x%x", 
901             idx, entry_func_offset, 
902             file_address,
903             entry_encoding_index_str, 
904             encoding);
905
906     struct symbol *symbol = NULL;
907     for (int i = 0; i < baton.symbols_count; i++)
908     {
909         if (i == baton.symbols_count - 1 && baton.symbols[i].file_address <= file_address)
910         {
911             symbol = &(baton.symbols[i]);
912             break;
913         }
914         else
915         {
916             if (baton.symbols[i].file_address <= file_address && baton.symbols[i + 1].file_address > file_address)
917             {
918                 symbol = &(baton.symbols[i]);
919                 break;
920             }
921         }
922     }
923
924     printf ("\n         ");
925     if (symbol)
926     {
927         int offset = file_address - symbol->file_address;
928
929         // FIXME this is a poor heuristic - if we're greater than 16 bytes past the
930         // start of the function, this is the unwind info for a stripped function.
931         // In reality the compact unwind entry may not line up exactly with the 
932         // function bounds.
933         if (offset >= 0)
934         {
935             printf ("name: %s", symbol->name);
936             if (offset > 0)
937             {
938                 printf (" + %d", offset);
939             }
940         }
941         printf ("\n         ");
942     }
943
944     print_encoding (baton, baton.mach_header_start + baton.first_level_index_entry.functionOffset + baton.text_section_file_offset + entry_func_offset, encoding);
945
946     bool has_lsda = encoding & UNWIND_HAS_LSDA;
947
948     if (has_lsda)
949     {
950         uint32_t func_offset = entry_func_offset + baton.first_level_index_entry.functionOffset;
951
952         int lsda_entry_number = -1;
953
954         uint32_t low = 0;
955         uint32_t high = (baton.lsda_array_end - baton.lsda_array_start) / sizeof (struct unwind_info_section_header_lsda_index_entry);
956
957         while (low < high)
958         {
959             uint32_t mid = (low + high) / 2;
960
961             uint8_t *mid_lsda_entry_addr = (baton.lsda_array_start + (mid * sizeof (struct unwind_info_section_header_lsda_index_entry)));
962             struct unwind_info_section_header_lsda_index_entry mid_lsda_entry;
963             memcpy (&mid_lsda_entry, mid_lsda_entry_addr, sizeof (struct unwind_info_section_header_lsda_index_entry));
964             if (mid_lsda_entry.functionOffset == func_offset)
965             {
966                 lsda_entry_number = (mid_lsda_entry_addr - baton.lsda_array_start) / sizeof (struct unwind_info_section_header_lsda_index_entry);
967                 break;
968             }
969             else if (mid_lsda_entry.functionOffset < func_offset)
970             {
971                 low = mid + 1;
972             }
973             else
974             {
975                 high = mid;
976             }
977         }
978
979         if (lsda_entry_number != -1)
980         {
981             printf (", LSDA entry #%d", lsda_entry_number);
982         }
983         else
984         {
985             printf (", LSDA entry not found");
986         }
987     }
988
989     uint32_t pers_idx = EXTRACT_BITS (encoding, UNWIND_PERSONALITY_MASK);
990     if (pers_idx != 0)
991     {
992         pers_idx--;  // Change 1-based to 0-based index
993         printf (", personality entry #%d", pers_idx);
994     }
995
996     printf ("\n");
997 }
998
999 void
1000 print_second_level_index_regular (struct baton baton)
1001 {
1002     uint8_t *page_entries = baton.compact_unwind_start + baton.first_level_index_entry.secondLevelPagesSectionOffset + baton.regular_second_level_page_header.entryPageOffset;
1003     uint32_t entries_count =  baton.regular_second_level_page_header.entryCount;
1004
1005     uint8_t *offset = page_entries;
1006
1007     uint32_t idx = 0;
1008     while (idx < entries_count)
1009     {
1010         uint32_t func_offset = *((uint32_t *) (offset));
1011         uint32_t encoding = *((uint32_t *) (offset + 4)); 
1012
1013         // UNWIND_SECOND_LEVEL_REGULAR entries have a funcOffset which includes the 
1014         // functionOffset from the containing index table already.  UNWIND_SECOND_LEVEL_COMPRESSED
1015         // entries only have the offset from the containing index table functionOffset.
1016         // So strip off the contianing index table functionOffset value here so they can
1017         // be treated the same at the lower layers.
1018
1019         print_function_encoding (baton, idx, encoding, (uint32_t) -1, func_offset - baton.first_level_index_entry.functionOffset);
1020         idx++;
1021         offset += 8;
1022     }
1023 }
1024
1025 void
1026 print_second_level_index_compressed (struct baton baton)
1027 {
1028     uint8_t *this_index = baton.compact_unwind_start + baton.first_level_index_entry.secondLevelPagesSectionOffset;
1029     uint8_t *start_of_entries = this_index + baton.compressed_second_level_page_header.entryPageOffset;
1030     uint8_t *offset = start_of_entries;
1031     for (uint16_t idx = 0; idx < baton.compressed_second_level_page_header.entryCount; idx++)
1032     {
1033         uint32_t entry = *((uint32_t*) offset);
1034         offset += 4;
1035         uint32_t encoding;
1036
1037         uint32_t entry_encoding_index = UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX (entry);
1038         uint32_t entry_func_offset = UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET (entry);
1039
1040         if (entry_encoding_index < baton.unwind_header.commonEncodingsArrayCount)
1041         {
1042             // encoding is in common table in section header
1043             encoding = *((uint32_t*) (baton.compact_unwind_start + baton.unwind_header.commonEncodingsArraySectionOffset + (entry_encoding_index * sizeof (uint32_t))));
1044         }
1045         else
1046         {
1047             // encoding is in page specific table
1048             uint32_t page_encoding_index = entry_encoding_index - baton.unwind_header.commonEncodingsArrayCount;
1049             encoding = *((uint32_t*) (this_index + baton.compressed_second_level_page_header.encodingsPageOffset + (page_encoding_index * sizeof (uint32_t))));
1050         }
1051
1052
1053         print_function_encoding (baton, idx, encoding, entry_encoding_index, entry_func_offset);
1054     }
1055 }
1056
1057 void
1058 print_second_level_index (struct baton baton)
1059 {
1060     uint8_t *index_start = baton.compact_unwind_start + baton.first_level_index_entry.secondLevelPagesSectionOffset;
1061
1062     if ((*(uint32_t*) index_start) == UNWIND_SECOND_LEVEL_REGULAR)
1063     {
1064         struct unwind_info_regular_second_level_page_header header;
1065         memcpy (&header, index_start, sizeof (struct unwind_info_regular_second_level_page_header));
1066         printf ("  UNWIND_SECOND_LEVEL_REGULAR #%d entryPageOffset %d, entryCount %d\n", baton.current_index_table_number, header.entryPageOffset, header.entryCount);
1067         baton.regular_second_level_page_header = header;
1068         print_second_level_index_regular (baton);
1069     }
1070
1071     if ((*(uint32_t*) index_start) == UNWIND_SECOND_LEVEL_COMPRESSED)
1072     {
1073         struct unwind_info_compressed_second_level_page_header header;
1074         memcpy (&header, index_start, sizeof (struct unwind_info_compressed_second_level_page_header));
1075         printf ("  UNWIND_SECOND_LEVEL_COMPRESSED #%d entryPageOffset %d, entryCount %d, encodingsPageOffset %d, encodingsCount %d\n", baton.current_index_table_number, header.entryPageOffset, header.entryCount, header.encodingsPageOffset, header.encodingsCount);
1076         baton.compressed_second_level_page_header = header;
1077         print_second_level_index_compressed (baton);
1078     }
1079 }
1080
1081
1082 void
1083 print_index_sections (struct baton baton)
1084 {    
1085     uint8_t *index_section_offset = baton.compact_unwind_start + baton.unwind_header.indexSectionOffset;
1086     uint32_t index_count = baton.unwind_header.indexCount;
1087
1088     uint32_t cur_idx = 0;
1089
1090     uint8_t *offset = index_section_offset;
1091     while (cur_idx < index_count)
1092     {
1093         baton.current_index_table_number = cur_idx;
1094         struct unwind_info_section_header_index_entry index_entry;
1095         memcpy (&index_entry, offset, sizeof (struct unwind_info_section_header_index_entry));
1096         printf ("index section #%d: functionOffset %d, secondLevelPagesSectionOffset %d, lsdaIndexArraySectionOffset %d\n", cur_idx, index_entry.functionOffset, index_entry.secondLevelPagesSectionOffset, index_entry.lsdaIndexArraySectionOffset);
1097
1098         // secondLevelPagesSectionOffset == 0 means this is a sentinel entry
1099         if (index_entry.secondLevelPagesSectionOffset != 0)
1100         {
1101             struct unwind_info_section_header_index_entry next_index_entry;
1102             memcpy (&next_index_entry, offset + sizeof (struct unwind_info_section_header_index_entry), sizeof (struct unwind_info_section_header_index_entry));
1103
1104             baton.lsda_array_start = baton.compact_unwind_start + index_entry.lsdaIndexArraySectionOffset;
1105             baton.lsda_array_end = baton.compact_unwind_start + next_index_entry.lsdaIndexArraySectionOffset;
1106
1107             uint8_t *lsda_entry_offset = baton.lsda_array_start;
1108             uint32_t lsda_count = 0;
1109             while (lsda_entry_offset < baton.lsda_array_end)
1110             {
1111                 struct unwind_info_section_header_lsda_index_entry lsda_entry;
1112                 memcpy (&lsda_entry, lsda_entry_offset, sizeof (struct unwind_info_section_header_lsda_index_entry));
1113                 uint64_t function_file_address = baton.first_level_index_entry.functionOffset + lsda_entry.functionOffset + baton.text_segment_vmaddr;
1114                 uint64_t lsda_file_address = lsda_entry.lsdaOffset + baton.text_segment_vmaddr;
1115                 printf ("    LSDA [%d] functionOffset %d (%d) (file address 0x%" PRIx64 "), lsdaOffset %d (file address 0x%" PRIx64 ")\n", 
1116                         lsda_count, lsda_entry.functionOffset, 
1117                         lsda_entry.functionOffset - index_entry.functionOffset, 
1118                         function_file_address,
1119                         lsda_entry.lsdaOffset, lsda_file_address);
1120                 lsda_count++;
1121                 lsda_entry_offset += sizeof (struct unwind_info_section_header_lsda_index_entry);
1122             }
1123
1124             printf ("\n");
1125
1126             baton.first_level_index_entry = index_entry;
1127             print_second_level_index (baton);
1128         }
1129
1130         printf ("\n");
1131
1132         cur_idx++;
1133         offset += sizeof (struct unwind_info_section_header_index_entry);
1134     }
1135 }
1136
1137 int main (int argc, char **argv)
1138 {
1139     struct stat st;
1140     char *file = argv[0];
1141     if (argc > 1)
1142         file = argv[1];
1143     int fd = open (file, O_RDONLY);
1144     if (fd == -1)
1145     {
1146         printf ("Failed to open '%s'\n", file);
1147         exit (1);
1148     }
1149     fstat (fd, &st);
1150     uint8_t *file_mem = (uint8_t*) mmap (0, st.st_size, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, 0);
1151     if (file_mem == MAP_FAILED)
1152     {
1153         printf ("Failed to mmap() '%s'\n", file);
1154     }
1155
1156     FILE *f = fopen ("a.out", "r");
1157
1158     struct baton baton;
1159     baton.mach_header_start = file_mem;
1160     baton.symbols = NULL;
1161     baton.symbols_count = 0;
1162     baton.function_start_addresses = NULL;
1163     baton.function_start_addresses_count = 0;
1164
1165     scan_macho_load_commands (&baton);
1166
1167     if (baton.compact_unwind_start == NULL)
1168     {
1169         printf ("could not find __TEXT,__unwind_info section\n");
1170         exit (1);
1171     }
1172
1173
1174     struct unwind_info_section_header header;
1175     memcpy (&header, baton.compact_unwind_start, sizeof (struct unwind_info_section_header));
1176     printf ("Header:\n");
1177     printf ("  version %u\n", header.version);
1178     printf ("  commonEncodingsArraySectionOffset is %d\n", header.commonEncodingsArraySectionOffset);
1179     printf ("  commonEncodingsArrayCount is %d\n", header.commonEncodingsArrayCount);
1180     printf ("  personalityArraySectionOffset is %d\n", header.personalityArraySectionOffset);
1181     printf ("  personalityArrayCount is %d\n", header.personalityArrayCount);
1182     printf ("  indexSectionOffset is %d\n", header.indexSectionOffset);
1183     printf ("  indexCount is %d\n", header.indexCount);
1184
1185     uint8_t *common_encodings = baton.compact_unwind_start + header.commonEncodingsArraySectionOffset;
1186     uint32_t encoding_idx = 0;
1187     while (encoding_idx < header.commonEncodingsArrayCount)
1188     {
1189         uint32_t encoding = *((uint32_t*) common_encodings);
1190         printf ("    Common Encoding [%d]: 0x%x ", encoding_idx, encoding);
1191         print_encoding (baton, NULL, encoding);
1192         printf ("\n");
1193         common_encodings += sizeof (uint32_t);
1194         encoding_idx++;
1195     }
1196
1197     uint8_t *pers_arr = baton.compact_unwind_start + header.personalityArraySectionOffset;
1198     uint32_t pers_idx = 0;
1199     while (pers_idx < header.personalityArrayCount)
1200     {
1201         int32_t pers_delta = *((int32_t*) (baton.compact_unwind_start + header.personalityArraySectionOffset + (pers_idx * sizeof (uint32_t))));
1202         printf ("    Personality [%d]: personality function ptr @ offset %d (file address 0x%" PRIx64 ")\n", pers_idx, pers_delta, baton.text_segment_vmaddr + pers_delta);
1203         pers_idx++;
1204         pers_arr += sizeof (uint32_t);
1205     }
1206
1207     printf ("\n");
1208
1209     baton.unwind_header = header;
1210
1211     print_index_sections (baton);
1212
1213
1214     return 0;
1215 }