]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/bhyve/gdb.c
Merge commit 'd0e943077d94e6266ece9856789c5d5313676e38'
[FreeBSD/FreeBSD.git] / usr.sbin / bhyve / gdb.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/endian.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/queue.h>
39 #include <sys/socket.h>
40 #include <machine/atomic.h>
41 #include <machine/specialreg.h>
42 #include <machine/vmm.h>
43 #include <netinet/in.h>
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <pthread.h>
52 #include <pthread_np.h>
53 #include <stdbool.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <sysexits.h>
58 #include <unistd.h>
59 #include <vmmapi.h>
60
61 #include "bhyverun.h"
62 #include "gdb.h"
63 #include "mem.h"
64 #include "mevent.h"
65
66 /*
67  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
68  * use SIGTRAP.
69  */
70 #define GDB_SIGNAL_TRAP         5
71
72 static void gdb_resume_vcpus(void);
73 static void check_command(int fd);
74
75 static struct mevent *read_event, *write_event;
76
77 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
78 static pthread_mutex_t gdb_lock;
79 static pthread_cond_t idle_vcpus;
80 static bool first_stop, report_next_stop, swbreak_enabled;
81
82 /*
83  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
84  * read buffer, 'start' is unused and 'len' contains the number of
85  * valid bytes in the buffer.  For a write buffer, 'start' is set to
86  * the index of the next byte in 'data' to send, and 'len' contains
87  * the remaining number of valid bytes to send.
88  */
89 struct io_buffer {
90         uint8_t *data;
91         size_t capacity;
92         size_t start;
93         size_t len;
94 };
95
96 struct breakpoint {
97         uint64_t gpa;
98         uint8_t shadow_inst;
99         TAILQ_ENTRY(breakpoint) link;
100 };
101
102 /*
103  * When a vCPU stops to due to an event that should be reported to the
104  * debugger, information about the event is stored in this structure.
105  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
106  * and stops other vCPUs so the event can be reported.  The
107  * report_stop() function reports the event for the 'stopped_vcpu'
108  * vCPU.  When the debugger resumes execution via continue or step,
109  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
110  * event handlers until the associated event is reported or disabled.
111  *
112  * An idle vCPU will have all of the boolean fields set to false.
113  *
114  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
115  * released to execute the stepped instruction.  When the vCPU reports
116  * the stepping trap, 'stepped' is set.
117  *
118  * When a vCPU hits a breakpoint set by the debug server,
119  * 'hit_swbreak' is set to true.
120  */
121 struct vcpu_state {
122         bool stepping;
123         bool stepped;
124         bool hit_swbreak;
125 };
126
127 static struct io_buffer cur_comm, cur_resp;
128 static uint8_t cur_csum;
129 static struct vmctx *ctx;
130 static int cur_fd = -1;
131 static TAILQ_HEAD(, breakpoint) breakpoints;
132 static struct vcpu_state *vcpu_state;
133 static int cur_vcpu, stopped_vcpu;
134 static bool gdb_active = false;
135
136 const int gdb_regset[] = {
137         VM_REG_GUEST_RAX,
138         VM_REG_GUEST_RBX,
139         VM_REG_GUEST_RCX,
140         VM_REG_GUEST_RDX,
141         VM_REG_GUEST_RSI,
142         VM_REG_GUEST_RDI,
143         VM_REG_GUEST_RBP,
144         VM_REG_GUEST_RSP,
145         VM_REG_GUEST_R8,
146         VM_REG_GUEST_R9,
147         VM_REG_GUEST_R10,
148         VM_REG_GUEST_R11,
149         VM_REG_GUEST_R12,
150         VM_REG_GUEST_R13,
151         VM_REG_GUEST_R14,
152         VM_REG_GUEST_R15,
153         VM_REG_GUEST_RIP,
154         VM_REG_GUEST_RFLAGS,
155         VM_REG_GUEST_CS,
156         VM_REG_GUEST_SS,
157         VM_REG_GUEST_DS,
158         VM_REG_GUEST_ES,
159         VM_REG_GUEST_FS,
160         VM_REG_GUEST_GS
161 };
162
163 const int gdb_regsize[] = {
164         8,
165         8,
166         8,
167         8,
168         8,
169         8,
170         8,
171         8,
172         8,
173         8,
174         8,
175         8,
176         8,
177         8,
178         8,
179         8,
180         8,
181         4,
182         4,
183         4,
184         4,
185         4,
186         4,
187         4
188 };
189
190 #ifdef GDB_LOG
191 #include <stdarg.h>
192 #include <stdio.h>
193
194 static void __printflike(1, 2)
195 debug(const char *fmt, ...)
196 {
197         static FILE *logfile;
198         va_list ap;
199
200         if (logfile == NULL) {
201                 logfile = fopen("/tmp/bhyve_gdb.log", "w");
202                 if (logfile == NULL)
203                         return;
204 #ifndef WITHOUT_CAPSICUM
205                 if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
206                         fclose(logfile);
207                         logfile = NULL;
208                         return;
209                 }
210 #endif
211                 setlinebuf(logfile);
212         }
213         va_start(ap, fmt);
214         vfprintf(logfile, fmt, ap);
215         va_end(ap);
216 }
217 #else
218 #define debug(...)
219 #endif
220
221 static void     remove_all_sw_breakpoints(void);
222
223 static int
224 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
225 {
226         uint64_t regs[4];
227         const int regset[4] = {
228                 VM_REG_GUEST_CR0,
229                 VM_REG_GUEST_CR3,
230                 VM_REG_GUEST_CR4,
231                 VM_REG_GUEST_EFER
232         };
233
234         if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
235                 return (-1);
236
237         /*
238          * For the debugger, always pretend to be the kernel (CPL 0),
239          * and if long-mode is enabled, always parse addresses as if
240          * in 64-bit mode.
241          */
242         paging->cr3 = regs[1];
243         paging->cpl = 0;
244         if (regs[3] & EFER_LMA)
245                 paging->cpu_mode = CPU_MODE_64BIT;
246         else if (regs[0] & CR0_PE)
247                 paging->cpu_mode = CPU_MODE_PROTECTED;
248         else
249                 paging->cpu_mode = CPU_MODE_REAL;
250         if (!(regs[0] & CR0_PG))
251                 paging->paging_mode = PAGING_MODE_FLAT;
252         else if (!(regs[2] & CR4_PAE))
253                 paging->paging_mode = PAGING_MODE_32;
254         else if (regs[3] & EFER_LME)
255                 paging->paging_mode = (regs[2] & CR4_LA57) ?
256                     PAGING_MODE_64_LA57 :  PAGING_MODE_64;
257         else
258                 paging->paging_mode = PAGING_MODE_PAE;
259         return (0);
260 }
261
262 /*
263  * Map a guest virtual address to a physical address (for a given vcpu).
264  * If a guest virtual address is valid, return 1.  If the address is
265  * not valid, return 0.  If an error occurs obtaining the mapping,
266  * return -1.
267  */
268 static int
269 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
270 {
271         struct vm_guest_paging paging;
272         int fault;
273
274         if (guest_paging_info(vcpu, &paging) == -1)
275                 return (-1);
276
277         /*
278          * Always use PROT_READ.  We really care if the VA is
279          * accessible, not if the current vCPU can write.
280          */
281         if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
282             &fault) == -1)
283                 return (-1);
284         if (fault)
285                 return (0);
286         return (1);
287 }
288
289 static void
290 io_buffer_reset(struct io_buffer *io)
291 {
292
293         io->start = 0;
294         io->len = 0;
295 }
296
297 /* Available room for adding data. */
298 static size_t
299 io_buffer_avail(struct io_buffer *io)
300 {
301
302         return (io->capacity - (io->start + io->len));
303 }
304
305 static uint8_t *
306 io_buffer_head(struct io_buffer *io)
307 {
308
309         return (io->data + io->start);
310 }
311
312 static uint8_t *
313 io_buffer_tail(struct io_buffer *io)
314 {
315
316         return (io->data + io->start + io->len);
317 }
318
319 static void
320 io_buffer_advance(struct io_buffer *io, size_t amount)
321 {
322
323         assert(amount <= io->len);
324         io->start += amount;
325         io->len -= amount;
326 }
327
328 static void
329 io_buffer_consume(struct io_buffer *io, size_t amount)
330 {
331
332         io_buffer_advance(io, amount);
333         if (io->len == 0) {
334                 io->start = 0;
335                 return;
336         }
337
338         /*
339          * XXX: Consider making this move optional and compacting on a
340          * future read() before realloc().
341          */
342         memmove(io->data, io_buffer_head(io), io->len);
343         io->start = 0;
344 }
345
346 static void
347 io_buffer_grow(struct io_buffer *io, size_t newsize)
348 {
349         uint8_t *new_data;
350         size_t avail, new_cap;
351
352         avail = io_buffer_avail(io);
353         if (newsize <= avail)
354                 return;
355
356         new_cap = io->capacity + (newsize - avail);
357         new_data = realloc(io->data, new_cap);
358         if (new_data == NULL)
359                 err(1, "Failed to grow GDB I/O buffer");
360         io->data = new_data;
361         io->capacity = new_cap;
362 }
363
364 static bool
365 response_pending(void)
366 {
367
368         if (cur_resp.start == 0 && cur_resp.len == 0)
369                 return (false);
370         if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
371                 return (false);
372         return (true);
373 }
374
375 static void
376 close_connection(void)
377 {
378
379         /*
380          * XXX: This triggers a warning because mevent does the close
381          * before the EV_DELETE.
382          */
383         pthread_mutex_lock(&gdb_lock);
384         mevent_delete(write_event);
385         mevent_delete_close(read_event);
386         write_event = NULL;
387         read_event = NULL;
388         io_buffer_reset(&cur_comm);
389         io_buffer_reset(&cur_resp);
390         cur_fd = -1;
391
392         remove_all_sw_breakpoints();
393
394         /* Clear any pending events. */
395         memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
396
397         /* Resume any stopped vCPUs. */
398         gdb_resume_vcpus();
399         pthread_mutex_unlock(&gdb_lock);
400 }
401
402 static uint8_t
403 hex_digit(uint8_t nibble)
404 {
405
406         if (nibble <= 9)
407                 return (nibble + '0');
408         else
409                 return (nibble + 'a' - 10);
410 }
411
412 static uint8_t
413 parse_digit(uint8_t v)
414 {
415
416         if (v >= '0' && v <= '9')
417                 return (v - '0');
418         if (v >= 'a' && v <= 'f')
419                 return (v - 'a' + 10);
420         if (v >= 'A' && v <= 'F')
421                 return (v - 'A' + 10);
422         return (0xF);
423 }
424
425 /* Parses big-endian hexadecimal. */
426 static uintmax_t
427 parse_integer(const uint8_t *p, size_t len)
428 {
429         uintmax_t v;
430
431         v = 0;
432         while (len > 0) {
433                 v <<= 4;
434                 v |= parse_digit(*p);
435                 p++;
436                 len--;
437         }
438         return (v);
439 }
440
441 static uint8_t
442 parse_byte(const uint8_t *p)
443 {
444
445         return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
446 }
447
448 static void
449 send_pending_data(int fd)
450 {
451         ssize_t nwritten;
452
453         if (cur_resp.len == 0) {
454                 mevent_disable(write_event);
455                 return;
456         }
457         nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
458         if (nwritten == -1) {
459                 warn("Write to GDB socket failed");
460                 close_connection();
461         } else {
462                 io_buffer_advance(&cur_resp, nwritten);
463                 if (cur_resp.len == 0)
464                         mevent_disable(write_event);
465                 else
466                         mevent_enable(write_event);
467         }
468 }
469
470 /* Append a single character to the output buffer. */
471 static void
472 send_char(uint8_t data)
473 {
474         io_buffer_grow(&cur_resp, 1);
475         *io_buffer_tail(&cur_resp) = data;
476         cur_resp.len++;
477 }
478
479 /* Append an array of bytes to the output buffer. */
480 static void
481 send_data(const uint8_t *data, size_t len)
482 {
483
484         io_buffer_grow(&cur_resp, len);
485         memcpy(io_buffer_tail(&cur_resp), data, len);
486         cur_resp.len += len;
487 }
488
489 static void
490 format_byte(uint8_t v, uint8_t *buf)
491 {
492
493         buf[0] = hex_digit(v >> 4);
494         buf[1] = hex_digit(v & 0xf);
495 }
496
497 /*
498  * Append a single byte (formatted as two hex characters) to the
499  * output buffer.
500  */
501 static void
502 send_byte(uint8_t v)
503 {
504         uint8_t buf[2];
505
506         format_byte(v, buf);
507         send_data(buf, sizeof(buf));
508 }
509
510 static void
511 start_packet(void)
512 {
513
514         send_char('$');
515         cur_csum = 0;
516 }
517
518 static void
519 finish_packet(void)
520 {
521
522         send_char('#');
523         send_byte(cur_csum);
524         debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
525 }
526
527 /*
528  * Append a single character (for the packet payload) and update the
529  * checksum.
530  */
531 static void
532 append_char(uint8_t v)
533 {
534
535         send_char(v);
536         cur_csum += v;
537 }
538
539 /*
540  * Append an array of bytes (for the packet payload) and update the
541  * checksum.
542  */
543 static void
544 append_packet_data(const uint8_t *data, size_t len)
545 {
546
547         send_data(data, len);
548         while (len > 0) {
549                 cur_csum += *data;
550                 data++;
551                 len--;
552         }
553 }
554
555 static void
556 append_string(const char *str)
557 {
558
559         append_packet_data(str, strlen(str));
560 }
561
562 static void
563 append_byte(uint8_t v)
564 {
565         uint8_t buf[2];
566
567         format_byte(v, buf);
568         append_packet_data(buf, sizeof(buf));
569 }
570
571 static void
572 append_unsigned_native(uintmax_t value, size_t len)
573 {
574         size_t i;
575
576         for (i = 0; i < len; i++) {
577                 append_byte(value);
578                 value >>= 8;
579         }
580 }
581
582 static void
583 append_unsigned_be(uintmax_t value, size_t len)
584 {
585         char buf[len * 2];
586         size_t i;
587
588         for (i = 0; i < len; i++) {
589                 format_byte(value, buf + (len - i - 1) * 2);
590                 value >>= 8;
591         }
592         append_packet_data(buf, sizeof(buf));
593 }
594
595 static void
596 append_integer(unsigned int value)
597 {
598
599         if (value == 0)
600                 append_char('0');
601         else
602                 append_unsigned_be(value, (fls(value) + 7) / 8);
603 }
604
605 static void
606 append_asciihex(const char *str)
607 {
608
609         while (*str != '\0') {
610                 append_byte(*str);
611                 str++;
612         }
613 }
614
615 static void
616 send_empty_response(void)
617 {
618
619         start_packet();
620         finish_packet();
621 }
622
623 static void
624 send_error(int error)
625 {
626
627         start_packet();
628         append_char('E');
629         append_byte(error);
630         finish_packet();
631 }
632
633 static void
634 send_ok(void)
635 {
636
637         start_packet();
638         append_string("OK");
639         finish_packet();
640 }
641
642 static int
643 parse_threadid(const uint8_t *data, size_t len)
644 {
645
646         if (len == 1 && *data == '0')
647                 return (0);
648         if (len == 2 && memcmp(data, "-1", 2) == 0)
649                 return (-1);
650         if (len == 0)
651                 return (-2);
652         return (parse_integer(data, len));
653 }
654
655 /*
656  * Report the current stop event to the debugger.  If the stop is due
657  * to an event triggered on a specific vCPU such as a breakpoint or
658  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
659  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
660  * the reporting vCPU for vCPU events.
661  */
662 static void
663 report_stop(bool set_cur_vcpu)
664 {
665         struct vcpu_state *vs;
666
667         start_packet();
668         if (stopped_vcpu == -1) {
669                 append_char('S');
670                 append_byte(GDB_SIGNAL_TRAP);
671         } else {
672                 vs = &vcpu_state[stopped_vcpu];
673                 if (set_cur_vcpu)
674                         cur_vcpu = stopped_vcpu;
675                 append_char('T');
676                 append_byte(GDB_SIGNAL_TRAP);
677                 append_string("thread:");
678                 append_integer(stopped_vcpu + 1);
679                 append_char(';');
680                 if (vs->hit_swbreak) {
681                         debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
682                         if (swbreak_enabled)
683                                 append_string("swbreak:;");
684                 } else if (vs->stepped)
685                         debug("$vCPU %d reporting step\n", stopped_vcpu);
686                 else
687                         debug("$vCPU %d reporting ???\n", stopped_vcpu);
688         }
689         finish_packet();
690         report_next_stop = false;
691 }
692
693 /*
694  * If this stop is due to a vCPU event, clear that event to mark it as
695  * acknowledged.
696  */
697 static void
698 discard_stop(void)
699 {
700         struct vcpu_state *vs;
701
702         if (stopped_vcpu != -1) {
703                 vs = &vcpu_state[stopped_vcpu];
704                 vs->hit_swbreak = false;
705                 vs->stepped = false;
706                 stopped_vcpu = -1;
707         }
708         report_next_stop = true;
709 }
710
711 static void
712 gdb_finish_suspend_vcpus(void)
713 {
714
715         if (first_stop) {
716                 first_stop = false;
717                 stopped_vcpu = -1;
718         } else if (report_next_stop) {
719                 assert(!response_pending());
720                 report_stop(true);
721                 send_pending_data(cur_fd);
722         }
723 }
724
725 /*
726  * vCPU threads invoke this function whenever the vCPU enters the
727  * debug server to pause or report an event.  vCPU threads wait here
728  * as long as the debug server keeps them suspended.
729  */
730 static void
731 _gdb_cpu_suspend(int vcpu, bool report_stop)
732 {
733
734         if (!gdb_active)
735                 return;
736         debug("$vCPU %d suspending\n", vcpu);
737         CPU_SET(vcpu, &vcpus_waiting);
738         if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
739                 gdb_finish_suspend_vcpus();
740         while (CPU_ISSET(vcpu, &vcpus_suspended))
741                 pthread_cond_wait(&idle_vcpus, &gdb_lock);
742         CPU_CLR(vcpu, &vcpus_waiting);
743         debug("$vCPU %d resuming\n", vcpu);
744 }
745
746 /*
747  * Invoked at the start of a vCPU thread's execution to inform the
748  * debug server about the new thread.
749  */
750 void
751 gdb_cpu_add(int vcpu)
752 {
753
754         if (!gdb_active)
755                 return;
756         debug("$vCPU %d starting\n", vcpu);
757         pthread_mutex_lock(&gdb_lock);
758         assert(vcpu < guest_ncpus);
759         CPU_SET(vcpu, &vcpus_active);
760         if (!TAILQ_EMPTY(&breakpoints)) {
761                 vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1);
762                 debug("$vCPU %d enabled breakpoint exits\n", vcpu);
763         }
764
765         /*
766          * If a vcpu is added while vcpus are stopped, suspend the new
767          * vcpu so that it will pop back out with a debug exit before
768          * executing the first instruction.
769          */
770         if (!CPU_EMPTY(&vcpus_suspended)) {
771                 CPU_SET(vcpu, &vcpus_suspended);
772                 _gdb_cpu_suspend(vcpu, false);
773         }
774         pthread_mutex_unlock(&gdb_lock);
775 }
776
777 /*
778  * Invoked by vCPU before resuming execution.  This enables stepping
779  * if the vCPU is marked as stepping.
780  */
781 static void
782 gdb_cpu_resume(int vcpu)
783 {
784         struct vcpu_state *vs;
785         int error;
786
787         vs = &vcpu_state[vcpu];
788
789         /*
790          * Any pending event should already be reported before
791          * resuming.
792          */
793         assert(vs->hit_swbreak == false);
794         assert(vs->stepped == false);
795         if (vs->stepping) {
796                 error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
797                 assert(error == 0);
798         }
799 }
800
801 /*
802  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
803  * has been suspended due to an event on different vCPU or in response
804  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
805  */
806 void
807 gdb_cpu_suspend(int vcpu)
808 {
809
810         pthread_mutex_lock(&gdb_lock);
811         _gdb_cpu_suspend(vcpu, true);
812         gdb_cpu_resume(vcpu);
813         pthread_mutex_unlock(&gdb_lock);
814 }
815
816 static void
817 gdb_suspend_vcpus(void)
818 {
819
820         assert(pthread_mutex_isowned_np(&gdb_lock));
821         debug("suspending all CPUs\n");
822         vcpus_suspended = vcpus_active;
823         vm_suspend_cpu(ctx, -1);
824         if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
825                 gdb_finish_suspend_vcpus();
826 }
827
828 /*
829  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
830  * the VT-x-specific MTRAP exit.
831  */
832 void
833 gdb_cpu_mtrap(int vcpu)
834 {
835         struct vcpu_state *vs;
836
837         if (!gdb_active)
838                 return;
839         debug("$vCPU %d MTRAP\n", vcpu);
840         pthread_mutex_lock(&gdb_lock);
841         vs = &vcpu_state[vcpu];
842         if (vs->stepping) {
843                 vs->stepping = false;
844                 vs->stepped = true;
845                 vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
846                 while (vs->stepped) {
847                         if (stopped_vcpu == -1) {
848                                 debug("$vCPU %d reporting step\n", vcpu);
849                                 stopped_vcpu = vcpu;
850                                 gdb_suspend_vcpus();
851                         }
852                         _gdb_cpu_suspend(vcpu, true);
853                 }
854                 gdb_cpu_resume(vcpu);
855         }
856         pthread_mutex_unlock(&gdb_lock);
857 }
858
859 static struct breakpoint *
860 find_breakpoint(uint64_t gpa)
861 {
862         struct breakpoint *bp;
863
864         TAILQ_FOREACH(bp, &breakpoints, link) {
865                 if (bp->gpa == gpa)
866                         return (bp);
867         }
868         return (NULL);
869 }
870
871 void
872 gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit)
873 {
874         struct breakpoint *bp;
875         struct vcpu_state *vs;
876         uint64_t gpa;
877         int error;
878
879         if (!gdb_active) {
880                 fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
881                 exit(4);
882         }
883         pthread_mutex_lock(&gdb_lock);
884         error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
885         assert(error == 1);
886         bp = find_breakpoint(gpa);
887         if (bp != NULL) {
888                 vs = &vcpu_state[vcpu];
889                 assert(vs->stepping == false);
890                 assert(vs->stepped == false);
891                 assert(vs->hit_swbreak == false);
892                 vs->hit_swbreak = true;
893                 vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
894                 for (;;) {
895                         if (stopped_vcpu == -1) {
896                                 debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
897                                     vmexit->rip);
898                                 stopped_vcpu = vcpu;
899                                 gdb_suspend_vcpus();
900                         }
901                         _gdb_cpu_suspend(vcpu, true);
902                         if (!vs->hit_swbreak) {
903                                 /* Breakpoint reported. */
904                                 break;
905                         }
906                         bp = find_breakpoint(gpa);
907                         if (bp == NULL) {
908                                 /* Breakpoint was removed. */
909                                 vs->hit_swbreak = false;
910                                 break;
911                         }
912                 }
913                 gdb_cpu_resume(vcpu);
914         } else {
915                 debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu,
916                     vmexit->rip);
917                 error = vm_set_register(ctx, vcpu,
918                     VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length);
919                 assert(error == 0);
920                 error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0);
921                 assert(error == 0);
922         }
923         pthread_mutex_unlock(&gdb_lock);
924 }
925
926 static bool
927 gdb_step_vcpu(int vcpu)
928 {
929         int error, val;
930
931         debug("$vCPU %d step\n", vcpu);
932         error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
933         if (error < 0)
934                 return (false);
935
936         discard_stop();
937         vcpu_state[vcpu].stepping = true;
938         vm_resume_cpu(ctx, vcpu);
939         CPU_CLR(vcpu, &vcpus_suspended);
940         pthread_cond_broadcast(&idle_vcpus);
941         return (true);
942 }
943
944 static void
945 gdb_resume_vcpus(void)
946 {
947
948         assert(pthread_mutex_isowned_np(&gdb_lock));
949         vm_resume_cpu(ctx, -1);
950         debug("resuming all CPUs\n");
951         CPU_ZERO(&vcpus_suspended);
952         pthread_cond_broadcast(&idle_vcpus);
953 }
954
955 static void
956 gdb_read_regs(void)
957 {
958         uint64_t regvals[nitems(gdb_regset)];
959         int i;
960
961         if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
962             gdb_regset, regvals) == -1) {
963                 send_error(errno);
964                 return;
965         }
966         start_packet();
967         for (i = 0; i < nitems(regvals); i++)
968                 append_unsigned_native(regvals[i], gdb_regsize[i]);
969         finish_packet();
970 }
971
972 static void
973 gdb_read_mem(const uint8_t *data, size_t len)
974 {
975         uint64_t gpa, gva, val;
976         uint8_t *cp;
977         size_t resid, todo, bytes;
978         bool started;
979         int error;
980
981         /* Skip 'm' */
982         data += 1;
983         len -= 1;
984
985         /* Parse and consume address. */
986         cp = memchr(data, ',', len);
987         if (cp == NULL || cp == data) {
988                 send_error(EINVAL);
989                 return;
990         }
991         gva = parse_integer(data, cp - data);
992         len -= (cp - data) + 1;
993         data += (cp - data) + 1;
994
995         /* Parse length. */
996         resid = parse_integer(data, len);
997
998         started = false;
999         while (resid > 0) {
1000                 error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1001                 if (error == -1) {
1002                         if (started)
1003                                 finish_packet();
1004                         else
1005                                 send_error(errno);
1006                         return;
1007                 }
1008                 if (error == 0) {
1009                         if (started)
1010                                 finish_packet();
1011                         else
1012                                 send_error(EFAULT);
1013                         return;
1014                 }
1015
1016                 /* Read bytes from current page. */
1017                 todo = getpagesize() - gpa % getpagesize();
1018                 if (todo > resid)
1019                         todo = resid;
1020
1021                 cp = paddr_guest2host(ctx, gpa, todo);
1022                 if (cp != NULL) {
1023                         /*
1024                          * If this page is guest RAM, read it a byte
1025                          * at a time.
1026                          */
1027                         if (!started) {
1028                                 start_packet();
1029                                 started = true;
1030                         }
1031                         while (todo > 0) {
1032                                 append_byte(*cp);
1033                                 cp++;
1034                                 gpa++;
1035                                 gva++;
1036                                 resid--;
1037                                 todo--;
1038                         }
1039                 } else {
1040                         /*
1041                          * If this page isn't guest RAM, try to handle
1042                          * it via MMIO.  For MMIO requests, use
1043                          * aligned reads of words when possible.
1044                          */
1045                         while (todo > 0) {
1046                                 if (gpa & 1 || todo == 1)
1047                                         bytes = 1;
1048                                 else if (gpa & 2 || todo == 2)
1049                                         bytes = 2;
1050                                 else
1051                                         bytes = 4;
1052                                 error = read_mem(ctx, cur_vcpu, gpa, &val,
1053                                     bytes);
1054                                 if (error == 0) {
1055                                         if (!started) {
1056                                                 start_packet();
1057                                                 started = true;
1058                                         }
1059                                         gpa += bytes;
1060                                         gva += bytes;
1061                                         resid -= bytes;
1062                                         todo -= bytes;
1063                                         while (bytes > 0) {
1064                                                 append_byte(val);
1065                                                 val >>= 8;
1066                                                 bytes--;
1067                                         }
1068                                 } else {
1069                                         if (started)
1070                                                 finish_packet();
1071                                         else
1072                                                 send_error(EFAULT);
1073                                         return;
1074                                 }
1075                         }
1076                 }
1077                 assert(resid == 0 || gpa % getpagesize() == 0);
1078         }
1079         if (!started)
1080                 start_packet();
1081         finish_packet();
1082 }
1083
1084 static void
1085 gdb_write_mem(const uint8_t *data, size_t len)
1086 {
1087         uint64_t gpa, gva, val;
1088         uint8_t *cp;
1089         size_t resid, todo, bytes;
1090         int error;
1091
1092         /* Skip 'M' */
1093         data += 1;
1094         len -= 1;
1095
1096         /* Parse and consume address. */
1097         cp = memchr(data, ',', len);
1098         if (cp == NULL || cp == data) {
1099                 send_error(EINVAL);
1100                 return;
1101         }
1102         gva = parse_integer(data, cp - data);
1103         len -= (cp - data) + 1;
1104         data += (cp - data) + 1;
1105
1106         /* Parse and consume length. */
1107         cp = memchr(data, ':', len);
1108         if (cp == NULL || cp == data) {
1109                 send_error(EINVAL);
1110                 return;
1111         }
1112         resid = parse_integer(data, cp - data);
1113         len -= (cp - data) + 1;
1114         data += (cp - data) + 1;
1115
1116         /* Verify the available bytes match the length. */
1117         if (len != resid * 2) {
1118                 send_error(EINVAL);
1119                 return;
1120         }
1121
1122         while (resid > 0) {
1123                 error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1124                 if (error == -1) {
1125                         send_error(errno);
1126                         return;
1127                 }
1128                 if (error == 0) {
1129                         send_error(EFAULT);
1130                         return;
1131                 }
1132
1133                 /* Write bytes to current page. */
1134                 todo = getpagesize() - gpa % getpagesize();
1135                 if (todo > resid)
1136                         todo = resid;
1137
1138                 cp = paddr_guest2host(ctx, gpa, todo);
1139                 if (cp != NULL) {
1140                         /*
1141                          * If this page is guest RAM, write it a byte
1142                          * at a time.
1143                          */
1144                         while (todo > 0) {
1145                                 assert(len >= 2);
1146                                 *cp = parse_byte(data);
1147                                 data += 2;
1148                                 len -= 2;
1149                                 cp++;
1150                                 gpa++;
1151                                 gva++;
1152                                 resid--;
1153                                 todo--;
1154                         }
1155                 } else {
1156                         /*
1157                          * If this page isn't guest RAM, try to handle
1158                          * it via MMIO.  For MMIO requests, use
1159                          * aligned writes of words when possible.
1160                          */
1161                         while (todo > 0) {
1162                                 if (gpa & 1 || todo == 1) {
1163                                         bytes = 1;
1164                                         val = parse_byte(data);
1165                                 } else if (gpa & 2 || todo == 2) {
1166                                         bytes = 2;
1167                                         val = be16toh(parse_integer(data, 4));
1168                                 } else {
1169                                         bytes = 4;
1170                                         val = be32toh(parse_integer(data, 8));
1171                                 }
1172                                 error = write_mem(ctx, cur_vcpu, gpa, val,
1173                                     bytes);
1174                                 if (error == 0) {
1175                                         gpa += bytes;
1176                                         gva += bytes;
1177                                         resid -= bytes;
1178                                         todo -= bytes;
1179                                         data += 2 * bytes;
1180                                         len -= 2 * bytes;
1181                                 } else {
1182                                         send_error(EFAULT);
1183                                         return;
1184                                 }
1185                         }
1186                 }
1187                 assert(resid == 0 || gpa % getpagesize() == 0);
1188         }
1189         assert(len == 0);
1190         send_ok();
1191 }
1192
1193 static bool
1194 set_breakpoint_caps(bool enable)
1195 {
1196         cpuset_t mask;
1197         int vcpu;
1198
1199         mask = vcpus_active;
1200         while (!CPU_EMPTY(&mask)) {
1201                 vcpu = CPU_FFS(&mask) - 1;
1202                 CPU_CLR(vcpu, &mask);
1203                 if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT,
1204                     enable ? 1 : 0) < 0)
1205                         return (false);
1206                 debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1207                     enable ? "en" : "dis");
1208         }
1209         return (true);
1210 }
1211
1212 static void
1213 remove_all_sw_breakpoints(void)
1214 {
1215         struct breakpoint *bp, *nbp;
1216         uint8_t *cp;
1217
1218         if (TAILQ_EMPTY(&breakpoints))
1219                 return;
1220
1221         TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1222                 debug("remove breakpoint at %#lx\n", bp->gpa);
1223                 cp = paddr_guest2host(ctx, bp->gpa, 1);
1224                 *cp = bp->shadow_inst;
1225                 TAILQ_REMOVE(&breakpoints, bp, link);
1226                 free(bp);
1227         }
1228         TAILQ_INIT(&breakpoints);
1229         set_breakpoint_caps(false);
1230 }
1231
1232 static void
1233 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1234 {
1235         struct breakpoint *bp;
1236         uint64_t gpa;
1237         uint8_t *cp;
1238         int error;
1239
1240         if (kind != 1) {
1241                 send_error(EINVAL);
1242                 return;
1243         }
1244
1245         error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1246         if (error == -1) {
1247                 send_error(errno);
1248                 return;
1249         }
1250         if (error == 0) {
1251                 send_error(EFAULT);
1252                 return;
1253         }
1254
1255         cp = paddr_guest2host(ctx, gpa, 1);
1256
1257         /* Only permit breakpoints in guest RAM. */
1258         if (cp == NULL) {
1259                 send_error(EFAULT);
1260                 return;
1261         }
1262
1263         /* Find any existing breakpoint. */
1264         bp = find_breakpoint(gpa);
1265
1266         /*
1267          * Silently ignore duplicate commands since the protocol
1268          * requires these packets to be idempotent.
1269          */
1270         if (insert) {
1271                 if (bp == NULL) {
1272                         if (TAILQ_EMPTY(&breakpoints) &&
1273                             !set_breakpoint_caps(true)) {
1274                                 send_empty_response();
1275                                 return;
1276                         }
1277                         bp = malloc(sizeof(*bp));
1278                         bp->gpa = gpa;
1279                         bp->shadow_inst = *cp;
1280                         *cp = 0xcc;     /* INT 3 */
1281                         TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1282                         debug("new breakpoint at %#lx\n", gpa);
1283                 }
1284         } else {
1285                 if (bp != NULL) {
1286                         debug("remove breakpoint at %#lx\n", gpa);
1287                         *cp = bp->shadow_inst;
1288                         TAILQ_REMOVE(&breakpoints, bp, link);
1289                         free(bp);
1290                         if (TAILQ_EMPTY(&breakpoints))
1291                                 set_breakpoint_caps(false);
1292                 }
1293         }
1294         send_ok();
1295 }
1296
1297 static void
1298 parse_breakpoint(const uint8_t *data, size_t len)
1299 {
1300         uint64_t gva;
1301         uint8_t *cp;
1302         bool insert;
1303         int kind, type;
1304
1305         insert = data[0] == 'Z';
1306
1307         /* Skip 'Z/z' */
1308         data += 1;
1309         len -= 1;
1310
1311         /* Parse and consume type. */
1312         cp = memchr(data, ',', len);
1313         if (cp == NULL || cp == data) {
1314                 send_error(EINVAL);
1315                 return;
1316         }
1317         type = parse_integer(data, cp - data);
1318         len -= (cp - data) + 1;
1319         data += (cp - data) + 1;
1320
1321         /* Parse and consume address. */
1322         cp = memchr(data, ',', len);
1323         if (cp == NULL || cp == data) {
1324                 send_error(EINVAL);
1325                 return;
1326         }
1327         gva = parse_integer(data, cp - data);
1328         len -= (cp - data) + 1;
1329         data += (cp - data) + 1;
1330
1331         /* Parse and consume kind. */
1332         cp = memchr(data, ';', len);
1333         if (cp == data) {
1334                 send_error(EINVAL);
1335                 return;
1336         }
1337         if (cp != NULL) {
1338                 /*
1339                  * We do not advertise support for either the
1340                  * ConditionalBreakpoints or BreakpointCommands
1341                  * features, so we should not be getting conditions or
1342                  * commands from the remote end.
1343                  */
1344                 send_empty_response();
1345                 return;
1346         }
1347         kind = parse_integer(data, len);
1348         data += len;
1349         len = 0;
1350
1351         switch (type) {
1352         case 0:
1353                 update_sw_breakpoint(gva, kind, insert);
1354                 break;
1355         default:
1356                 send_empty_response();
1357                 break;
1358         }
1359 }
1360
1361 static bool
1362 command_equals(const uint8_t *data, size_t len, const char *cmd)
1363 {
1364
1365         if (strlen(cmd) > len)
1366                 return (false);
1367         return (memcmp(data, cmd, strlen(cmd)) == 0);
1368 }
1369
1370 static void
1371 check_features(const uint8_t *data, size_t len)
1372 {
1373         char *feature, *next_feature, *str, *value;
1374         bool supported;
1375
1376         str = malloc(len + 1);
1377         memcpy(str, data, len);
1378         str[len] = '\0';
1379         next_feature = str;
1380
1381         while ((feature = strsep(&next_feature, ";")) != NULL) {
1382                 /*
1383                  * Null features shouldn't exist, but skip if they
1384                  * do.
1385                  */
1386                 if (strcmp(feature, "") == 0)
1387                         continue;
1388
1389                 /*
1390                  * Look for the value or supported / not supported
1391                  * flag.
1392                  */
1393                 value = strchr(feature, '=');
1394                 if (value != NULL) {
1395                         *value = '\0';
1396                         value++;
1397                         supported = true;
1398                 } else {
1399                         value = feature + strlen(feature) - 1;
1400                         switch (*value) {
1401                         case '+':
1402                                 supported = true;
1403                                 break;
1404                         case '-':
1405                                 supported = false;
1406                                 break;
1407                         default:
1408                                 /*
1409                                  * This is really a protocol error,
1410                                  * but we just ignore malformed
1411                                  * features for ease of
1412                                  * implementation.
1413                                  */
1414                                 continue;
1415                         }
1416                         value = NULL;
1417                 }
1418
1419                 if (strcmp(feature, "swbreak") == 0)
1420                         swbreak_enabled = supported;
1421         }
1422         free(str);
1423
1424         start_packet();
1425
1426         /* This is an arbitrary limit. */
1427         append_string("PacketSize=4096");
1428         append_string(";swbreak+");
1429         finish_packet();
1430 }
1431
1432 static void
1433 gdb_query(const uint8_t *data, size_t len)
1434 {
1435
1436         /*
1437          * TODO:
1438          * - qSearch
1439          */
1440         if (command_equals(data, len, "qAttached")) {
1441                 start_packet();
1442                 append_char('1');
1443                 finish_packet();
1444         } else if (command_equals(data, len, "qC")) {
1445                 start_packet();
1446                 append_string("QC");
1447                 append_integer(cur_vcpu + 1);
1448                 finish_packet();
1449         } else if (command_equals(data, len, "qfThreadInfo")) {
1450                 cpuset_t mask;
1451                 bool first;
1452                 int vcpu;
1453
1454                 if (CPU_EMPTY(&vcpus_active)) {
1455                         send_error(EINVAL);
1456                         return;
1457                 }
1458                 mask = vcpus_active;
1459                 start_packet();
1460                 append_char('m');
1461                 first = true;
1462                 while (!CPU_EMPTY(&mask)) {
1463                         vcpu = CPU_FFS(&mask) - 1;
1464                         CPU_CLR(vcpu, &mask);
1465                         if (first)
1466                                 first = false;
1467                         else
1468                                 append_char(',');
1469                         append_integer(vcpu + 1);
1470                 }
1471                 finish_packet();
1472         } else if (command_equals(data, len, "qsThreadInfo")) {
1473                 start_packet();
1474                 append_char('l');
1475                 finish_packet();
1476         } else if (command_equals(data, len, "qSupported")) {
1477                 data += strlen("qSupported");
1478                 len -= strlen("qSupported");
1479                 check_features(data, len);
1480         } else if (command_equals(data, len, "qThreadExtraInfo")) {
1481                 char buf[16];
1482                 int tid;
1483
1484                 data += strlen("qThreadExtraInfo");
1485                 len -= strlen("qThreadExtraInfo");
1486                 if (*data != ',') {
1487                         send_error(EINVAL);
1488                         return;
1489                 }
1490                 tid = parse_threadid(data + 1, len - 1);
1491                 if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1492                         send_error(EINVAL);
1493                         return;
1494                 }
1495
1496                 snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1497                 start_packet();
1498                 append_asciihex(buf);
1499                 finish_packet();
1500         } else
1501                 send_empty_response();
1502 }
1503
1504 static void
1505 handle_command(const uint8_t *data, size_t len)
1506 {
1507
1508         /* Reject packets with a sequence-id. */
1509         if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1510             data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1511                 send_empty_response();
1512                 return;
1513         }
1514
1515         switch (*data) {
1516         case 'c':
1517                 if (len != 1) {
1518                         send_error(EINVAL);
1519                         break;
1520                 }
1521
1522                 discard_stop();
1523                 gdb_resume_vcpus();
1524                 break;
1525         case 'D':
1526                 send_ok();
1527
1528                 /* TODO: Resume any stopped CPUs. */
1529                 break;
1530         case 'g': {
1531                 gdb_read_regs();
1532                 break;
1533         }
1534         case 'H': {
1535                 int tid;
1536
1537                 if (data[1] != 'g' && data[1] != 'c') {
1538                         send_error(EINVAL);
1539                         break;
1540                 }
1541                 tid = parse_threadid(data + 2, len - 2);
1542                 if (tid == -2) {
1543                         send_error(EINVAL);
1544                         break;
1545                 }
1546
1547                 if (CPU_EMPTY(&vcpus_active)) {
1548                         send_error(EINVAL);
1549                         break;
1550                 }
1551                 if (tid == -1 || tid == 0)
1552                         cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1553                 else if (CPU_ISSET(tid - 1, &vcpus_active))
1554                         cur_vcpu = tid - 1;
1555                 else {
1556                         send_error(EINVAL);
1557                         break;
1558                 }
1559                 send_ok();
1560                 break;
1561         }
1562         case 'm':
1563                 gdb_read_mem(data, len);
1564                 break;
1565         case 'M':
1566                 gdb_write_mem(data, len);
1567                 break;
1568         case 'T': {
1569                 int tid;
1570
1571                 tid = parse_threadid(data + 1, len - 1);
1572                 if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1573                         send_error(EINVAL);
1574                         return;
1575                 }
1576                 send_ok();
1577                 break;
1578         }
1579         case 'q':
1580                 gdb_query(data, len);
1581                 break;
1582         case 's':
1583                 if (len != 1) {
1584                         send_error(EINVAL);
1585                         break;
1586                 }
1587
1588                 /* Don't send a reply until a stop occurs. */
1589                 if (!gdb_step_vcpu(cur_vcpu)) {
1590                         send_error(EOPNOTSUPP);
1591                         break;
1592                 }
1593                 break;
1594         case 'z':
1595         case 'Z':
1596                 parse_breakpoint(data, len);
1597                 break;
1598         case '?':
1599                 report_stop(false);
1600                 break;
1601         case 'G': /* TODO */
1602         case 'v':
1603                 /* Handle 'vCont' */
1604                 /* 'vCtrlC' */
1605         case 'p': /* TODO */
1606         case 'P': /* TODO */
1607         case 'Q': /* TODO */
1608         case 't': /* TODO */
1609         case 'X': /* TODO */
1610         default:
1611                 send_empty_response();
1612         }
1613 }
1614
1615 /* Check for a valid packet in the command buffer. */
1616 static void
1617 check_command(int fd)
1618 {
1619         uint8_t *head, *hash, *p, sum;
1620         size_t avail, plen;
1621
1622         for (;;) {
1623                 avail = cur_comm.len;
1624                 if (avail == 0)
1625                         return;
1626                 head = io_buffer_head(&cur_comm);
1627                 switch (*head) {
1628                 case 0x03:
1629                         debug("<- Ctrl-C\n");
1630                         io_buffer_consume(&cur_comm, 1);
1631
1632                         gdb_suspend_vcpus();
1633                         break;
1634                 case '+':
1635                         /* ACK of previous response. */
1636                         debug("<- +\n");
1637                         if (response_pending())
1638                                 io_buffer_reset(&cur_resp);
1639                         io_buffer_consume(&cur_comm, 1);
1640                         if (stopped_vcpu != -1 && report_next_stop) {
1641                                 report_stop(true);
1642                                 send_pending_data(fd);
1643                         }
1644                         break;
1645                 case '-':
1646                         /* NACK of previous response. */
1647                         debug("<- -\n");
1648                         if (response_pending()) {
1649                                 cur_resp.len += cur_resp.start;
1650                                 cur_resp.start = 0;
1651                                 if (cur_resp.data[0] == '+')
1652                                         io_buffer_advance(&cur_resp, 1);
1653                                 debug("-> %.*s\n", (int)cur_resp.len,
1654                                     io_buffer_head(&cur_resp));
1655                         }
1656                         io_buffer_consume(&cur_comm, 1);
1657                         send_pending_data(fd);
1658                         break;
1659                 case '$':
1660                         /* Packet. */
1661
1662                         if (response_pending()) {
1663                                 warnx("New GDB command while response in "
1664                                     "progress");
1665                                 io_buffer_reset(&cur_resp);
1666                         }
1667
1668                         /* Is packet complete? */
1669                         hash = memchr(head, '#', avail);
1670                         if (hash == NULL)
1671                                 return;
1672                         plen = (hash - head + 1) + 2;
1673                         if (avail < plen)
1674                                 return;
1675                         debug("<- %.*s\n", (int)plen, head);
1676
1677                         /* Verify checksum. */
1678                         for (sum = 0, p = head + 1; p < hash; p++)
1679                                 sum += *p;
1680                         if (sum != parse_byte(hash + 1)) {
1681                                 io_buffer_consume(&cur_comm, plen);
1682                                 debug("-> -\n");
1683                                 send_char('-');
1684                                 send_pending_data(fd);
1685                                 break;
1686                         }
1687                         send_char('+');
1688
1689                         handle_command(head + 1, hash - (head + 1));
1690                         io_buffer_consume(&cur_comm, plen);
1691                         if (!response_pending())
1692                                 debug("-> +\n");
1693                         send_pending_data(fd);
1694                         break;
1695                 default:
1696                         /* XXX: Possibly drop connection instead. */
1697                         debug("-> %02x\n", *head);
1698                         io_buffer_consume(&cur_comm, 1);
1699                         break;
1700                 }
1701         }
1702 }
1703
1704 static void
1705 gdb_readable(int fd, enum ev_type event, void *arg)
1706 {
1707         ssize_t nread;
1708         int pending;
1709
1710         if (ioctl(fd, FIONREAD, &pending) == -1) {
1711                 warn("FIONREAD on GDB socket");
1712                 return;
1713         }
1714
1715         /*
1716          * 'pending' might be zero due to EOF.  We need to call read
1717          * with a non-zero length to detect EOF.
1718          */
1719         if (pending == 0)
1720                 pending = 1;
1721
1722         /* Ensure there is room in the command buffer. */
1723         io_buffer_grow(&cur_comm, pending);
1724         assert(io_buffer_avail(&cur_comm) >= pending);
1725
1726         nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1727         if (nread == 0) {
1728                 close_connection();
1729         } else if (nread == -1) {
1730                 if (errno == EAGAIN)
1731                         return;
1732
1733                 warn("Read from GDB socket");
1734                 close_connection();
1735         } else {
1736                 cur_comm.len += nread;
1737                 pthread_mutex_lock(&gdb_lock);
1738                 check_command(fd);
1739                 pthread_mutex_unlock(&gdb_lock);
1740         }
1741 }
1742
1743 static void
1744 gdb_writable(int fd, enum ev_type event, void *arg)
1745 {
1746
1747         send_pending_data(fd);
1748 }
1749
1750 static void
1751 new_connection(int fd, enum ev_type event, void *arg)
1752 {
1753         int optval, s;
1754
1755         s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1756         if (s == -1) {
1757                 if (arg != NULL)
1758                         err(1, "Failed accepting initial GDB connection");
1759
1760                 /* Silently ignore errors post-startup. */
1761                 return;
1762         }
1763
1764         optval = 1;
1765         if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1766             -1) {
1767                 warn("Failed to disable SIGPIPE for GDB connection");
1768                 close(s);
1769                 return;
1770         }
1771
1772         pthread_mutex_lock(&gdb_lock);
1773         if (cur_fd != -1) {
1774                 close(s);
1775                 warnx("Ignoring additional GDB connection.");
1776         }
1777
1778         read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1779         if (read_event == NULL) {
1780                 if (arg != NULL)
1781                         err(1, "Failed to setup initial GDB connection");
1782                 pthread_mutex_unlock(&gdb_lock);
1783                 return;
1784         }
1785         write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1786         if (write_event == NULL) {
1787                 if (arg != NULL)
1788                         err(1, "Failed to setup initial GDB connection");
1789                 mevent_delete_close(read_event);
1790                 read_event = NULL;
1791         }
1792
1793         cur_fd = s;
1794         cur_vcpu = 0;
1795         stopped_vcpu = -1;
1796
1797         /* Break on attach. */
1798         first_stop = true;
1799         report_next_stop = false;
1800         gdb_suspend_vcpus();
1801         pthread_mutex_unlock(&gdb_lock);
1802 }
1803
1804 #ifndef WITHOUT_CAPSICUM
1805 void
1806 limit_gdb_socket(int s)
1807 {
1808         cap_rights_t rights;
1809         unsigned long ioctls[] = { FIONREAD };
1810
1811         cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1812             CAP_SETSOCKOPT, CAP_IOCTL);
1813         if (caph_rights_limit(s, &rights) == -1)
1814                 errx(EX_OSERR, "Unable to apply rights for sandbox");
1815         if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1816                 errx(EX_OSERR, "Unable to apply rights for sandbox");
1817 }
1818 #endif
1819
1820 void
1821 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1822 {
1823         struct sockaddr_in sin;
1824         int error, flags, s;
1825
1826         debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1827
1828         error = pthread_mutex_init(&gdb_lock, NULL);
1829         if (error != 0)
1830                 errc(1, error, "gdb mutex init");
1831         error = pthread_cond_init(&idle_vcpus, NULL);
1832         if (error != 0)
1833                 errc(1, error, "gdb cv init");
1834
1835         ctx = _ctx;
1836         s = socket(PF_INET, SOCK_STREAM, 0);
1837         if (s < 0)
1838                 err(1, "gdb socket create");
1839
1840         sin.sin_len = sizeof(sin);
1841         sin.sin_family = AF_INET;
1842         sin.sin_addr.s_addr = htonl(INADDR_ANY);
1843         sin.sin_port = htons(sport);
1844
1845         if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1846                 err(1, "gdb socket bind");
1847
1848         if (listen(s, 1) < 0)
1849                 err(1, "gdb socket listen");
1850
1851         stopped_vcpu = -1;
1852         TAILQ_INIT(&breakpoints);
1853         vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1854         if (wait) {
1855                 /*
1856                  * Set vcpu 0 in vcpus_suspended.  This will trigger the
1857                  * logic in gdb_cpu_add() to suspend the first vcpu before
1858                  * it starts execution.  The vcpu will remain suspended
1859                  * until a debugger connects.
1860                  */
1861                 CPU_SET(0, &vcpus_suspended);
1862                 stopped_vcpu = 0;
1863         }
1864
1865         flags = fcntl(s, F_GETFL);
1866         if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1867                 err(1, "Failed to mark gdb socket non-blocking");
1868
1869 #ifndef WITHOUT_CAPSICUM
1870         limit_gdb_socket(s);
1871 #endif
1872         mevent_add(s, EVF_READ, new_connection, NULL);
1873         gdb_active = true;
1874 }