]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/bhyve/gdb.c
Optionally bind ktls threads to NUMA domains
[FreeBSD/FreeBSD.git] / usr.sbin / bhyve / gdb.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/endian.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/queue.h>
39 #include <sys/socket.h>
40 #include <machine/atomic.h>
41 #include <machine/specialreg.h>
42 #include <machine/vmm.h>
43 #include <netinet/in.h>
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <pthread.h>
52 #include <pthread_np.h>
53 #include <stdbool.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <sysexits.h>
58 #include <unistd.h>
59 #include <vmmapi.h>
60
61 #include "bhyverun.h"
62 #include "gdb.h"
63 #include "mem.h"
64 #include "mevent.h"
65
66 /*
67  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
68  * use SIGTRAP.
69  */
70 #define GDB_SIGNAL_TRAP         5
71
72 static void gdb_resume_vcpus(void);
73 static void check_command(int fd);
74
75 static struct mevent *read_event, *write_event;
76
77 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
78 static pthread_mutex_t gdb_lock;
79 static pthread_cond_t idle_vcpus;
80 static bool first_stop, report_next_stop, swbreak_enabled;
81
82 /*
83  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
84  * read buffer, 'start' is unused and 'len' contains the number of
85  * valid bytes in the buffer.  For a write buffer, 'start' is set to
86  * the index of the next byte in 'data' to send, and 'len' contains
87  * the remaining number of valid bytes to send.
88  */
89 struct io_buffer {
90         uint8_t *data;
91         size_t capacity;
92         size_t start;
93         size_t len;
94 };
95
96 struct breakpoint {
97         uint64_t gpa;
98         uint8_t shadow_inst;
99         TAILQ_ENTRY(breakpoint) link;
100 };
101
102 /*
103  * When a vCPU stops to due to an event that should be reported to the
104  * debugger, information about the event is stored in this structure.
105  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
106  * and stops other vCPUs so the event can be reported.  The
107  * report_stop() function reports the event for the 'stopped_vcpu'
108  * vCPU.  When the debugger resumes execution via continue or step,
109  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
110  * event handlers until the associated event is reported or disabled.
111  *
112  * An idle vCPU will have all of the boolean fields set to false.
113  *
114  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
115  * released to execute the stepped instruction.  When the vCPU reports
116  * the stepping trap, 'stepped' is set.
117  *
118  * When a vCPU hits a breakpoint set by the debug server,
119  * 'hit_swbreak' is set to true.
120  */
121 struct vcpu_state {
122         bool stepping;
123         bool stepped;
124         bool hit_swbreak;
125 };
126
127 static struct io_buffer cur_comm, cur_resp;
128 static uint8_t cur_csum;
129 static struct vmctx *ctx;
130 static int cur_fd = -1;
131 static TAILQ_HEAD(, breakpoint) breakpoints;
132 static struct vcpu_state *vcpu_state;
133 static int cur_vcpu, stopped_vcpu;
134
135 const int gdb_regset[] = {
136         VM_REG_GUEST_RAX,
137         VM_REG_GUEST_RBX,
138         VM_REG_GUEST_RCX,
139         VM_REG_GUEST_RDX,
140         VM_REG_GUEST_RSI,
141         VM_REG_GUEST_RDI,
142         VM_REG_GUEST_RBP,
143         VM_REG_GUEST_RSP,
144         VM_REG_GUEST_R8,
145         VM_REG_GUEST_R9,
146         VM_REG_GUEST_R10,
147         VM_REG_GUEST_R11,
148         VM_REG_GUEST_R12,
149         VM_REG_GUEST_R13,
150         VM_REG_GUEST_R14,
151         VM_REG_GUEST_R15,
152         VM_REG_GUEST_RIP,
153         VM_REG_GUEST_RFLAGS,
154         VM_REG_GUEST_CS,
155         VM_REG_GUEST_SS,
156         VM_REG_GUEST_DS,
157         VM_REG_GUEST_ES,
158         VM_REG_GUEST_FS,
159         VM_REG_GUEST_GS
160 };
161
162 const int gdb_regsize[] = {
163         8,
164         8,
165         8,
166         8,
167         8,
168         8,
169         8,
170         8,
171         8,
172         8,
173         8,
174         8,
175         8,
176         8,
177         8,
178         8,
179         8,
180         4,
181         4,
182         4,
183         4,
184         4,
185         4,
186         4
187 };
188
189 #ifdef GDB_LOG
190 #include <stdarg.h>
191 #include <stdio.h>
192
193 static void __printflike(1, 2)
194 debug(const char *fmt, ...)
195 {
196         static FILE *logfile;
197         va_list ap;
198
199         if (logfile == NULL) {
200                 logfile = fopen("/tmp/bhyve_gdb.log", "w");
201                 if (logfile == NULL)
202                         return;
203 #ifndef WITHOUT_CAPSICUM
204                 if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
205                         fclose(logfile);
206                         logfile = NULL;
207                         return;
208                 }
209 #endif
210                 setlinebuf(logfile);
211         }
212         va_start(ap, fmt);
213         vfprintf(logfile, fmt, ap);
214         va_end(ap);
215 }
216 #else
217 #define debug(...)
218 #endif
219
220 static void     remove_all_sw_breakpoints(void);
221
222 static int
223 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
224 {
225         uint64_t regs[4];
226         const int regset[4] = {
227                 VM_REG_GUEST_CR0,
228                 VM_REG_GUEST_CR3,
229                 VM_REG_GUEST_CR4,
230                 VM_REG_GUEST_EFER
231         };
232
233         if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
234                 return (-1);
235
236         /*
237          * For the debugger, always pretend to be the kernel (CPL 0),
238          * and if long-mode is enabled, always parse addresses as if
239          * in 64-bit mode.
240          */
241         paging->cr3 = regs[1];
242         paging->cpl = 0;
243         if (regs[3] & EFER_LMA)
244                 paging->cpu_mode = CPU_MODE_64BIT;
245         else if (regs[0] & CR0_PE)
246                 paging->cpu_mode = CPU_MODE_PROTECTED;
247         else
248                 paging->cpu_mode = CPU_MODE_REAL;
249         if (!(regs[0] & CR0_PG))
250                 paging->paging_mode = PAGING_MODE_FLAT;
251         else if (!(regs[2] & CR4_PAE))
252                 paging->paging_mode = PAGING_MODE_32;
253         else if (regs[3] & EFER_LME)
254                 paging->paging_mode = (regs[2] & CR4_LA57) ?
255                     PAGING_MODE_64_LA57 :  PAGING_MODE_64;
256         else
257                 paging->paging_mode = PAGING_MODE_PAE;
258         return (0);
259 }
260
261 /*
262  * Map a guest virtual address to a physical address (for a given vcpu).
263  * If a guest virtual address is valid, return 1.  If the address is
264  * not valid, return 0.  If an error occurs obtaining the mapping,
265  * return -1.
266  */
267 static int
268 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
269 {
270         struct vm_guest_paging paging;
271         int fault;
272
273         if (guest_paging_info(vcpu, &paging) == -1)
274                 return (-1);
275
276         /*
277          * Always use PROT_READ.  We really care if the VA is
278          * accessible, not if the current vCPU can write.
279          */
280         if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
281             &fault) == -1)
282                 return (-1);
283         if (fault)
284                 return (0);
285         return (1);
286 }
287
288 static void
289 io_buffer_reset(struct io_buffer *io)
290 {
291
292         io->start = 0;
293         io->len = 0;
294 }
295
296 /* Available room for adding data. */
297 static size_t
298 io_buffer_avail(struct io_buffer *io)
299 {
300
301         return (io->capacity - (io->start + io->len));
302 }
303
304 static uint8_t *
305 io_buffer_head(struct io_buffer *io)
306 {
307
308         return (io->data + io->start);
309 }
310
311 static uint8_t *
312 io_buffer_tail(struct io_buffer *io)
313 {
314
315         return (io->data + io->start + io->len);
316 }
317
318 static void
319 io_buffer_advance(struct io_buffer *io, size_t amount)
320 {
321
322         assert(amount <= io->len);
323         io->start += amount;
324         io->len -= amount;
325 }
326
327 static void
328 io_buffer_consume(struct io_buffer *io, size_t amount)
329 {
330
331         io_buffer_advance(io, amount);
332         if (io->len == 0) {
333                 io->start = 0;
334                 return;
335         }
336
337         /*
338          * XXX: Consider making this move optional and compacting on a
339          * future read() before realloc().
340          */
341         memmove(io->data, io_buffer_head(io), io->len);
342         io->start = 0;
343 }
344
345 static void
346 io_buffer_grow(struct io_buffer *io, size_t newsize)
347 {
348         uint8_t *new_data;
349         size_t avail, new_cap;
350
351         avail = io_buffer_avail(io);
352         if (newsize <= avail)
353                 return;
354
355         new_cap = io->capacity + (newsize - avail);
356         new_data = realloc(io->data, new_cap);
357         if (new_data == NULL)
358                 err(1, "Failed to grow GDB I/O buffer");
359         io->data = new_data;
360         io->capacity = new_cap;
361 }
362
363 static bool
364 response_pending(void)
365 {
366
367         if (cur_resp.start == 0 && cur_resp.len == 0)
368                 return (false);
369         if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
370                 return (false);
371         return (true);
372 }
373
374 static void
375 close_connection(void)
376 {
377
378         /*
379          * XXX: This triggers a warning because mevent does the close
380          * before the EV_DELETE.
381          */
382         pthread_mutex_lock(&gdb_lock);
383         mevent_delete(write_event);
384         mevent_delete_close(read_event);
385         write_event = NULL;
386         read_event = NULL;
387         io_buffer_reset(&cur_comm);
388         io_buffer_reset(&cur_resp);
389         cur_fd = -1;
390
391         remove_all_sw_breakpoints();
392
393         /* Clear any pending events. */
394         memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
395
396         /* Resume any stopped vCPUs. */
397         gdb_resume_vcpus();
398         pthread_mutex_unlock(&gdb_lock);
399 }
400
401 static uint8_t
402 hex_digit(uint8_t nibble)
403 {
404
405         if (nibble <= 9)
406                 return (nibble + '0');
407         else
408                 return (nibble + 'a' - 10);
409 }
410
411 static uint8_t
412 parse_digit(uint8_t v)
413 {
414
415         if (v >= '0' && v <= '9')
416                 return (v - '0');
417         if (v >= 'a' && v <= 'f')
418                 return (v - 'a' + 10);
419         if (v >= 'A' && v <= 'F')
420                 return (v - 'A' + 10);
421         return (0xF);
422 }
423
424 /* Parses big-endian hexadecimal. */
425 static uintmax_t
426 parse_integer(const uint8_t *p, size_t len)
427 {
428         uintmax_t v;
429
430         v = 0;
431         while (len > 0) {
432                 v <<= 4;
433                 v |= parse_digit(*p);
434                 p++;
435                 len--;
436         }
437         return (v);
438 }
439
440 static uint8_t
441 parse_byte(const uint8_t *p)
442 {
443
444         return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
445 }
446
447 static void
448 send_pending_data(int fd)
449 {
450         ssize_t nwritten;
451
452         if (cur_resp.len == 0) {
453                 mevent_disable(write_event);
454                 return;
455         }
456         nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
457         if (nwritten == -1) {
458                 warn("Write to GDB socket failed");
459                 close_connection();
460         } else {
461                 io_buffer_advance(&cur_resp, nwritten);
462                 if (cur_resp.len == 0)
463                         mevent_disable(write_event);
464                 else
465                         mevent_enable(write_event);
466         }
467 }
468
469 /* Append a single character to the output buffer. */
470 static void
471 send_char(uint8_t data)
472 {
473         io_buffer_grow(&cur_resp, 1);
474         *io_buffer_tail(&cur_resp) = data;
475         cur_resp.len++;
476 }
477
478 /* Append an array of bytes to the output buffer. */
479 static void
480 send_data(const uint8_t *data, size_t len)
481 {
482
483         io_buffer_grow(&cur_resp, len);
484         memcpy(io_buffer_tail(&cur_resp), data, len);
485         cur_resp.len += len;
486 }
487
488 static void
489 format_byte(uint8_t v, uint8_t *buf)
490 {
491
492         buf[0] = hex_digit(v >> 4);
493         buf[1] = hex_digit(v & 0xf);
494 }
495
496 /*
497  * Append a single byte (formatted as two hex characters) to the
498  * output buffer.
499  */
500 static void
501 send_byte(uint8_t v)
502 {
503         uint8_t buf[2];
504
505         format_byte(v, buf);
506         send_data(buf, sizeof(buf));
507 }
508
509 static void
510 start_packet(void)
511 {
512
513         send_char('$');
514         cur_csum = 0;
515 }
516
517 static void
518 finish_packet(void)
519 {
520
521         send_char('#');
522         send_byte(cur_csum);
523         debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
524 }
525
526 /*
527  * Append a single character (for the packet payload) and update the
528  * checksum.
529  */
530 static void
531 append_char(uint8_t v)
532 {
533
534         send_char(v);
535         cur_csum += v;
536 }
537
538 /*
539  * Append an array of bytes (for the packet payload) and update the
540  * checksum.
541  */
542 static void
543 append_packet_data(const uint8_t *data, size_t len)
544 {
545
546         send_data(data, len);
547         while (len > 0) {
548                 cur_csum += *data;
549                 data++;
550                 len--;
551         }
552 }
553
554 static void
555 append_string(const char *str)
556 {
557
558         append_packet_data(str, strlen(str));
559 }
560
561 static void
562 append_byte(uint8_t v)
563 {
564         uint8_t buf[2];
565
566         format_byte(v, buf);
567         append_packet_data(buf, sizeof(buf));
568 }
569
570 static void
571 append_unsigned_native(uintmax_t value, size_t len)
572 {
573         size_t i;
574
575         for (i = 0; i < len; i++) {
576                 append_byte(value);
577                 value >>= 8;
578         }
579 }
580
581 static void
582 append_unsigned_be(uintmax_t value, size_t len)
583 {
584         char buf[len * 2];
585         size_t i;
586
587         for (i = 0; i < len; i++) {
588                 format_byte(value, buf + (len - i - 1) * 2);
589                 value >>= 8;
590         }
591         append_packet_data(buf, sizeof(buf));
592 }
593
594 static void
595 append_integer(unsigned int value)
596 {
597
598         if (value == 0)
599                 append_char('0');
600         else
601                 append_unsigned_be(value, (fls(value) + 7) / 8);
602 }
603
604 static void
605 append_asciihex(const char *str)
606 {
607
608         while (*str != '\0') {
609                 append_byte(*str);
610                 str++;
611         }
612 }
613
614 static void
615 send_empty_response(void)
616 {
617
618         start_packet();
619         finish_packet();
620 }
621
622 static void
623 send_error(int error)
624 {
625
626         start_packet();
627         append_char('E');
628         append_byte(error);
629         finish_packet();
630 }
631
632 static void
633 send_ok(void)
634 {
635
636         start_packet();
637         append_string("OK");
638         finish_packet();
639 }
640
641 static int
642 parse_threadid(const uint8_t *data, size_t len)
643 {
644
645         if (len == 1 && *data == '0')
646                 return (0);
647         if (len == 2 && memcmp(data, "-1", 2) == 0)
648                 return (-1);
649         if (len == 0)
650                 return (-2);
651         return (parse_integer(data, len));
652 }
653
654 /*
655  * Report the current stop event to the debugger.  If the stop is due
656  * to an event triggered on a specific vCPU such as a breakpoint or
657  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
658  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
659  * the reporting vCPU for vCPU events.
660  */
661 static void
662 report_stop(bool set_cur_vcpu)
663 {
664         struct vcpu_state *vs;
665
666         start_packet();
667         if (stopped_vcpu == -1) {
668                 append_char('S');
669                 append_byte(GDB_SIGNAL_TRAP);
670         } else {
671                 vs = &vcpu_state[stopped_vcpu];
672                 if (set_cur_vcpu)
673                         cur_vcpu = stopped_vcpu;
674                 append_char('T');
675                 append_byte(GDB_SIGNAL_TRAP);
676                 append_string("thread:");
677                 append_integer(stopped_vcpu + 1);
678                 append_char(';');
679                 if (vs->hit_swbreak) {
680                         debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
681                         if (swbreak_enabled)
682                                 append_string("swbreak:;");
683                 } else if (vs->stepped)
684                         debug("$vCPU %d reporting step\n", stopped_vcpu);
685                 else
686                         debug("$vCPU %d reporting ???\n", stopped_vcpu);
687         }
688         finish_packet();
689         report_next_stop = false;
690 }
691
692 /*
693  * If this stop is due to a vCPU event, clear that event to mark it as
694  * acknowledged.
695  */
696 static void
697 discard_stop(void)
698 {
699         struct vcpu_state *vs;
700
701         if (stopped_vcpu != -1) {
702                 vs = &vcpu_state[stopped_vcpu];
703                 vs->hit_swbreak = false;
704                 vs->stepped = false;
705                 stopped_vcpu = -1;
706         }
707         report_next_stop = true;
708 }
709
710 static void
711 gdb_finish_suspend_vcpus(void)
712 {
713
714         if (first_stop) {
715                 first_stop = false;
716                 stopped_vcpu = -1;
717         } else if (report_next_stop) {
718                 assert(!response_pending());
719                 report_stop(true);
720                 send_pending_data(cur_fd);
721         }
722 }
723
724 /*
725  * vCPU threads invoke this function whenever the vCPU enters the
726  * debug server to pause or report an event.  vCPU threads wait here
727  * as long as the debug server keeps them suspended.
728  */
729 static void
730 _gdb_cpu_suspend(int vcpu, bool report_stop)
731 {
732
733         debug("$vCPU %d suspending\n", vcpu);
734         CPU_SET(vcpu, &vcpus_waiting);
735         if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
736                 gdb_finish_suspend_vcpus();
737         while (CPU_ISSET(vcpu, &vcpus_suspended))
738                 pthread_cond_wait(&idle_vcpus, &gdb_lock);
739         CPU_CLR(vcpu, &vcpus_waiting);
740         debug("$vCPU %d resuming\n", vcpu);
741 }
742
743 /*
744  * Invoked at the start of a vCPU thread's execution to inform the
745  * debug server about the new thread.
746  */
747 void
748 gdb_cpu_add(int vcpu)
749 {
750
751         debug("$vCPU %d starting\n", vcpu);
752         pthread_mutex_lock(&gdb_lock);
753         assert(vcpu < guest_ncpus);
754         CPU_SET(vcpu, &vcpus_active);
755         if (!TAILQ_EMPTY(&breakpoints)) {
756                 vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1);
757                 debug("$vCPU %d enabled breakpoint exits\n", vcpu);
758         }
759
760         /*
761          * If a vcpu is added while vcpus are stopped, suspend the new
762          * vcpu so that it will pop back out with a debug exit before
763          * executing the first instruction.
764          */
765         if (!CPU_EMPTY(&vcpus_suspended)) {
766                 CPU_SET(vcpu, &vcpus_suspended);
767                 _gdb_cpu_suspend(vcpu, false);
768         }
769         pthread_mutex_unlock(&gdb_lock);
770 }
771
772 /*
773  * Invoked by vCPU before resuming execution.  This enables stepping
774  * if the vCPU is marked as stepping.
775  */
776 static void
777 gdb_cpu_resume(int vcpu)
778 {
779         struct vcpu_state *vs;
780         int error;
781
782         vs = &vcpu_state[vcpu];
783
784         /*
785          * Any pending event should already be reported before
786          * resuming.
787          */
788         assert(vs->hit_swbreak == false);
789         assert(vs->stepped == false);
790         if (vs->stepping) {
791                 error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
792                 assert(error == 0);
793         }
794 }
795
796 /*
797  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
798  * has been suspended due to an event on different vCPU or in response
799  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
800  */
801 void
802 gdb_cpu_suspend(int vcpu)
803 {
804
805         pthread_mutex_lock(&gdb_lock);
806         _gdb_cpu_suspend(vcpu, true);
807         gdb_cpu_resume(vcpu);
808         pthread_mutex_unlock(&gdb_lock);
809 }
810
811 static void
812 gdb_suspend_vcpus(void)
813 {
814
815         assert(pthread_mutex_isowned_np(&gdb_lock));
816         debug("suspending all CPUs\n");
817         vcpus_suspended = vcpus_active;
818         vm_suspend_cpu(ctx, -1);
819         if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
820                 gdb_finish_suspend_vcpus();
821 }
822
823 /*
824  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
825  * the VT-x-specific MTRAP exit.
826  */
827 void
828 gdb_cpu_mtrap(int vcpu)
829 {
830         struct vcpu_state *vs;
831
832         debug("$vCPU %d MTRAP\n", vcpu);
833         pthread_mutex_lock(&gdb_lock);
834         vs = &vcpu_state[vcpu];
835         if (vs->stepping) {
836                 vs->stepping = false;
837                 vs->stepped = true;
838                 vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
839                 while (vs->stepped) {
840                         if (stopped_vcpu == -1) {
841                                 debug("$vCPU %d reporting step\n", vcpu);
842                                 stopped_vcpu = vcpu;
843                                 gdb_suspend_vcpus();
844                         }
845                         _gdb_cpu_suspend(vcpu, true);
846                 }
847                 gdb_cpu_resume(vcpu);
848         }
849         pthread_mutex_unlock(&gdb_lock);
850 }
851
852 static struct breakpoint *
853 find_breakpoint(uint64_t gpa)
854 {
855         struct breakpoint *bp;
856
857         TAILQ_FOREACH(bp, &breakpoints, link) {
858                 if (bp->gpa == gpa)
859                         return (bp);
860         }
861         return (NULL);
862 }
863
864 void
865 gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit)
866 {
867         struct breakpoint *bp;
868         struct vcpu_state *vs;
869         uint64_t gpa;
870         int error;
871
872         pthread_mutex_lock(&gdb_lock);
873         error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
874         assert(error == 1);
875         bp = find_breakpoint(gpa);
876         if (bp != NULL) {
877                 vs = &vcpu_state[vcpu];
878                 assert(vs->stepping == false);
879                 assert(vs->stepped == false);
880                 assert(vs->hit_swbreak == false);
881                 vs->hit_swbreak = true;
882                 vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
883                 for (;;) {
884                         if (stopped_vcpu == -1) {
885                                 debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
886                                     vmexit->rip);
887                                 stopped_vcpu = vcpu;
888                                 gdb_suspend_vcpus();
889                         }
890                         _gdb_cpu_suspend(vcpu, true);
891                         if (!vs->hit_swbreak) {
892                                 /* Breakpoint reported. */
893                                 break;
894                         }
895                         bp = find_breakpoint(gpa);
896                         if (bp == NULL) {
897                                 /* Breakpoint was removed. */
898                                 vs->hit_swbreak = false;
899                                 break;
900                         }
901                 }
902                 gdb_cpu_resume(vcpu);
903         } else {
904                 debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu,
905                     vmexit->rip);
906                 error = vm_set_register(ctx, vcpu,
907                     VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length);
908                 assert(error == 0);
909                 error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0);
910                 assert(error == 0);
911         }
912         pthread_mutex_unlock(&gdb_lock);
913 }
914
915 static bool
916 gdb_step_vcpu(int vcpu)
917 {
918         int error, val;
919
920         debug("$vCPU %d step\n", vcpu);
921         error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
922         if (error < 0)
923                 return (false);
924
925         discard_stop();
926         vcpu_state[vcpu].stepping = true;
927         vm_resume_cpu(ctx, vcpu);
928         CPU_CLR(vcpu, &vcpus_suspended);
929         pthread_cond_broadcast(&idle_vcpus);
930         return (true);
931 }
932
933 static void
934 gdb_resume_vcpus(void)
935 {
936
937         assert(pthread_mutex_isowned_np(&gdb_lock));
938         vm_resume_cpu(ctx, -1);
939         debug("resuming all CPUs\n");
940         CPU_ZERO(&vcpus_suspended);
941         pthread_cond_broadcast(&idle_vcpus);
942 }
943
944 static void
945 gdb_read_regs(void)
946 {
947         uint64_t regvals[nitems(gdb_regset)];
948         int i;
949
950         if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
951             gdb_regset, regvals) == -1) {
952                 send_error(errno);
953                 return;
954         }
955         start_packet();
956         for (i = 0; i < nitems(regvals); i++)
957                 append_unsigned_native(regvals[i], gdb_regsize[i]);
958         finish_packet();
959 }
960
961 static void
962 gdb_read_mem(const uint8_t *data, size_t len)
963 {
964         uint64_t gpa, gva, val;
965         uint8_t *cp;
966         size_t resid, todo, bytes;
967         bool started;
968         int error;
969
970         /* Skip 'm' */
971         data += 1;
972         len -= 1;
973
974         /* Parse and consume address. */
975         cp = memchr(data, ',', len);
976         if (cp == NULL || cp == data) {
977                 send_error(EINVAL);
978                 return;
979         }
980         gva = parse_integer(data, cp - data);
981         len -= (cp - data) + 1;
982         data += (cp - data) + 1;
983
984         /* Parse length. */
985         resid = parse_integer(data, len);
986
987         started = false;
988         while (resid > 0) {
989                 error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
990                 if (error == -1) {
991                         if (started)
992                                 finish_packet();
993                         else
994                                 send_error(errno);
995                         return;
996                 }
997                 if (error == 0) {
998                         if (started)
999                                 finish_packet();
1000                         else
1001                                 send_error(EFAULT);
1002                         return;
1003                 }
1004
1005                 /* Read bytes from current page. */
1006                 todo = getpagesize() - gpa % getpagesize();
1007                 if (todo > resid)
1008                         todo = resid;
1009
1010                 cp = paddr_guest2host(ctx, gpa, todo);
1011                 if (cp != NULL) {
1012                         /*
1013                          * If this page is guest RAM, read it a byte
1014                          * at a time.
1015                          */
1016                         if (!started) {
1017                                 start_packet();
1018                                 started = true;
1019                         }
1020                         while (todo > 0) {
1021                                 append_byte(*cp);
1022                                 cp++;
1023                                 gpa++;
1024                                 gva++;
1025                                 resid--;
1026                                 todo--;
1027                         }
1028                 } else {
1029                         /*
1030                          * If this page isn't guest RAM, try to handle
1031                          * it via MMIO.  For MMIO requests, use
1032                          * aligned reads of words when possible.
1033                          */
1034                         while (todo > 0) {
1035                                 if (gpa & 1 || todo == 1)
1036                                         bytes = 1;
1037                                 else if (gpa & 2 || todo == 2)
1038                                         bytes = 2;
1039                                 else
1040                                         bytes = 4;
1041                                 error = read_mem(ctx, cur_vcpu, gpa, &val,
1042                                     bytes);
1043                                 if (error == 0) {
1044                                         if (!started) {
1045                                                 start_packet();
1046                                                 started = true;
1047                                         }
1048                                         gpa += bytes;
1049                                         gva += bytes;
1050                                         resid -= bytes;
1051                                         todo -= bytes;
1052                                         while (bytes > 0) {
1053                                                 append_byte(val);
1054                                                 val >>= 8;
1055                                                 bytes--;
1056                                         }
1057                                 } else {
1058                                         if (started)
1059                                                 finish_packet();
1060                                         else
1061                                                 send_error(EFAULT);
1062                                         return;
1063                                 }
1064                         }
1065                 }
1066                 assert(resid == 0 || gpa % getpagesize() == 0);
1067         }
1068         if (!started)
1069                 start_packet();
1070         finish_packet();
1071 }
1072
1073 static void
1074 gdb_write_mem(const uint8_t *data, size_t len)
1075 {
1076         uint64_t gpa, gva, val;
1077         uint8_t *cp;
1078         size_t resid, todo, bytes;
1079         int error;
1080
1081         /* Skip 'M' */
1082         data += 1;
1083         len -= 1;
1084
1085         /* Parse and consume address. */
1086         cp = memchr(data, ',', len);
1087         if (cp == NULL || cp == data) {
1088                 send_error(EINVAL);
1089                 return;
1090         }
1091         gva = parse_integer(data, cp - data);
1092         len -= (cp - data) + 1;
1093         data += (cp - data) + 1;
1094
1095         /* Parse and consume length. */
1096         cp = memchr(data, ':', len);
1097         if (cp == NULL || cp == data) {
1098                 send_error(EINVAL);
1099                 return;
1100         }
1101         resid = parse_integer(data, cp - data);
1102         len -= (cp - data) + 1;
1103         data += (cp - data) + 1;
1104
1105         /* Verify the available bytes match the length. */
1106         if (len != resid * 2) {
1107                 send_error(EINVAL);
1108                 return;
1109         }
1110
1111         while (resid > 0) {
1112                 error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1113                 if (error == -1) {
1114                         send_error(errno);
1115                         return;
1116                 }
1117                 if (error == 0) {
1118                         send_error(EFAULT);
1119                         return;
1120                 }
1121
1122                 /* Write bytes to current page. */
1123                 todo = getpagesize() - gpa % getpagesize();
1124                 if (todo > resid)
1125                         todo = resid;
1126
1127                 cp = paddr_guest2host(ctx, gpa, todo);
1128                 if (cp != NULL) {
1129                         /*
1130                          * If this page is guest RAM, write it a byte
1131                          * at a time.
1132                          */
1133                         while (todo > 0) {
1134                                 assert(len >= 2);
1135                                 *cp = parse_byte(data);
1136                                 data += 2;
1137                                 len -= 2;
1138                                 cp++;
1139                                 gpa++;
1140                                 gva++;
1141                                 resid--;
1142                                 todo--;
1143                         }
1144                 } else {
1145                         /*
1146                          * If this page isn't guest RAM, try to handle
1147                          * it via MMIO.  For MMIO requests, use
1148                          * aligned writes of words when possible.
1149                          */
1150                         while (todo > 0) {
1151                                 if (gpa & 1 || todo == 1) {
1152                                         bytes = 1;
1153                                         val = parse_byte(data);
1154                                 } else if (gpa & 2 || todo == 2) {
1155                                         bytes = 2;
1156                                         val = be16toh(parse_integer(data, 4));
1157                                 } else {
1158                                         bytes = 4;
1159                                         val = be32toh(parse_integer(data, 8));
1160                                 }
1161                                 error = write_mem(ctx, cur_vcpu, gpa, val,
1162                                     bytes);
1163                                 if (error == 0) {
1164                                         gpa += bytes;
1165                                         gva += bytes;
1166                                         resid -= bytes;
1167                                         todo -= bytes;
1168                                         data += 2 * bytes;
1169                                         len -= 2 * bytes;
1170                                 } else {
1171                                         send_error(EFAULT);
1172                                         return;
1173                                 }
1174                         }
1175                 }
1176                 assert(resid == 0 || gpa % getpagesize() == 0);
1177         }
1178         assert(len == 0);
1179         send_ok();
1180 }
1181
1182 static bool
1183 set_breakpoint_caps(bool enable)
1184 {
1185         cpuset_t mask;
1186         int vcpu;
1187
1188         mask = vcpus_active;
1189         while (!CPU_EMPTY(&mask)) {
1190                 vcpu = CPU_FFS(&mask) - 1;
1191                 CPU_CLR(vcpu, &mask);
1192                 if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT,
1193                     enable ? 1 : 0) < 0)
1194                         return (false);
1195                 debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1196                     enable ? "en" : "dis");
1197         }
1198         return (true);
1199 }
1200
1201 static void
1202 remove_all_sw_breakpoints(void)
1203 {
1204         struct breakpoint *bp, *nbp;
1205         uint8_t *cp;
1206
1207         if (TAILQ_EMPTY(&breakpoints))
1208                 return;
1209
1210         TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1211                 debug("remove breakpoint at %#lx\n", bp->gpa);
1212                 cp = paddr_guest2host(ctx, bp->gpa, 1);
1213                 *cp = bp->shadow_inst;
1214                 TAILQ_REMOVE(&breakpoints, bp, link);
1215                 free(bp);
1216         }
1217         TAILQ_INIT(&breakpoints);
1218         set_breakpoint_caps(false);
1219 }
1220
1221 static void
1222 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1223 {
1224         struct breakpoint *bp;
1225         uint64_t gpa;
1226         uint8_t *cp;
1227         int error;
1228
1229         if (kind != 1) {
1230                 send_error(EINVAL);
1231                 return;
1232         }
1233
1234         error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1235         if (error == -1) {
1236                 send_error(errno);
1237                 return;
1238         }
1239         if (error == 0) {
1240                 send_error(EFAULT);
1241                 return;
1242         }
1243
1244         cp = paddr_guest2host(ctx, gpa, 1);
1245
1246         /* Only permit breakpoints in guest RAM. */
1247         if (cp == NULL) {
1248                 send_error(EFAULT);
1249                 return;
1250         }
1251
1252         /* Find any existing breakpoint. */
1253         bp = find_breakpoint(gpa);
1254
1255         /*
1256          * Silently ignore duplicate commands since the protocol
1257          * requires these packets to be idempotent.
1258          */
1259         if (insert) {
1260                 if (bp == NULL) {
1261                         if (TAILQ_EMPTY(&breakpoints) &&
1262                             !set_breakpoint_caps(true)) {
1263                                 send_empty_response();
1264                                 return;
1265                         }
1266                         bp = malloc(sizeof(*bp));
1267                         bp->gpa = gpa;
1268                         bp->shadow_inst = *cp;
1269                         *cp = 0xcc;     /* INT 3 */
1270                         TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1271                         debug("new breakpoint at %#lx\n", gpa);
1272                 }
1273         } else {
1274                 if (bp != NULL) {
1275                         debug("remove breakpoint at %#lx\n", gpa);
1276                         *cp = bp->shadow_inst;
1277                         TAILQ_REMOVE(&breakpoints, bp, link);
1278                         free(bp);
1279                         if (TAILQ_EMPTY(&breakpoints))
1280                                 set_breakpoint_caps(false);
1281                 }
1282         }
1283         send_ok();
1284 }
1285
1286 static void
1287 parse_breakpoint(const uint8_t *data, size_t len)
1288 {
1289         uint64_t gva;
1290         uint8_t *cp;
1291         bool insert;
1292         int kind, type;
1293
1294         insert = data[0] == 'Z';
1295
1296         /* Skip 'Z/z' */
1297         data += 1;
1298         len -= 1;
1299
1300         /* Parse and consume type. */
1301         cp = memchr(data, ',', len);
1302         if (cp == NULL || cp == data) {
1303                 send_error(EINVAL);
1304                 return;
1305         }
1306         type = parse_integer(data, cp - data);
1307         len -= (cp - data) + 1;
1308         data += (cp - data) + 1;
1309
1310         /* Parse and consume address. */
1311         cp = memchr(data, ',', len);
1312         if (cp == NULL || cp == data) {
1313                 send_error(EINVAL);
1314                 return;
1315         }
1316         gva = parse_integer(data, cp - data);
1317         len -= (cp - data) + 1;
1318         data += (cp - data) + 1;
1319
1320         /* Parse and consume kind. */
1321         cp = memchr(data, ';', len);
1322         if (cp == data) {
1323                 send_error(EINVAL);
1324                 return;
1325         }
1326         if (cp != NULL) {
1327                 /*
1328                  * We do not advertise support for either the
1329                  * ConditionalBreakpoints or BreakpointCommands
1330                  * features, so we should not be getting conditions or
1331                  * commands from the remote end.
1332                  */
1333                 send_empty_response();
1334                 return;
1335         }
1336         kind = parse_integer(data, len);
1337         data += len;
1338         len = 0;
1339
1340         switch (type) {
1341         case 0:
1342                 update_sw_breakpoint(gva, kind, insert);
1343                 break;
1344         default:
1345                 send_empty_response();
1346                 break;
1347         }
1348 }
1349
1350 static bool
1351 command_equals(const uint8_t *data, size_t len, const char *cmd)
1352 {
1353
1354         if (strlen(cmd) > len)
1355                 return (false);
1356         return (memcmp(data, cmd, strlen(cmd)) == 0);
1357 }
1358
1359 static void
1360 check_features(const uint8_t *data, size_t len)
1361 {
1362         char *feature, *next_feature, *str, *value;
1363         bool supported;
1364
1365         str = malloc(len + 1);
1366         memcpy(str, data, len);
1367         str[len] = '\0';
1368         next_feature = str;
1369
1370         while ((feature = strsep(&next_feature, ";")) != NULL) {
1371                 /*
1372                  * Null features shouldn't exist, but skip if they
1373                  * do.
1374                  */
1375                 if (strcmp(feature, "") == 0)
1376                         continue;
1377
1378                 /*
1379                  * Look for the value or supported / not supported
1380                  * flag.
1381                  */
1382                 value = strchr(feature, '=');
1383                 if (value != NULL) {
1384                         *value = '\0';
1385                         value++;
1386                         supported = true;
1387                 } else {
1388                         value = feature + strlen(feature) - 1;
1389                         switch (*value) {
1390                         case '+':
1391                                 supported = true;
1392                                 break;
1393                         case '-':
1394                                 supported = false;
1395                                 break;
1396                         default:
1397                                 /*
1398                                  * This is really a protocol error,
1399                                  * but we just ignore malformed
1400                                  * features for ease of
1401                                  * implementation.
1402                                  */
1403                                 continue;
1404                         }
1405                         value = NULL;
1406                 }
1407
1408                 if (strcmp(feature, "swbreak") == 0)
1409                         swbreak_enabled = supported;
1410         }
1411         free(str);
1412
1413         start_packet();
1414
1415         /* This is an arbitrary limit. */
1416         append_string("PacketSize=4096");
1417         append_string(";swbreak+");
1418         finish_packet();
1419 }
1420
1421 static void
1422 gdb_query(const uint8_t *data, size_t len)
1423 {
1424
1425         /*
1426          * TODO:
1427          * - qSearch
1428          */
1429         if (command_equals(data, len, "qAttached")) {
1430                 start_packet();
1431                 append_char('1');
1432                 finish_packet();
1433         } else if (command_equals(data, len, "qC")) {
1434                 start_packet();
1435                 append_string("QC");
1436                 append_integer(cur_vcpu + 1);
1437                 finish_packet();
1438         } else if (command_equals(data, len, "qfThreadInfo")) {
1439                 cpuset_t mask;
1440                 bool first;
1441                 int vcpu;
1442
1443                 if (CPU_EMPTY(&vcpus_active)) {
1444                         send_error(EINVAL);
1445                         return;
1446                 }
1447                 mask = vcpus_active;
1448                 start_packet();
1449                 append_char('m');
1450                 first = true;
1451                 while (!CPU_EMPTY(&mask)) {
1452                         vcpu = CPU_FFS(&mask) - 1;
1453                         CPU_CLR(vcpu, &mask);
1454                         if (first)
1455                                 first = false;
1456                         else
1457                                 append_char(',');
1458                         append_integer(vcpu + 1);
1459                 }
1460                 finish_packet();
1461         } else if (command_equals(data, len, "qsThreadInfo")) {
1462                 start_packet();
1463                 append_char('l');
1464                 finish_packet();
1465         } else if (command_equals(data, len, "qSupported")) {
1466                 data += strlen("qSupported");
1467                 len -= strlen("qSupported");
1468                 check_features(data, len);
1469         } else if (command_equals(data, len, "qThreadExtraInfo")) {
1470                 char buf[16];
1471                 int tid;
1472
1473                 data += strlen("qThreadExtraInfo");
1474                 len -= strlen("qThreadExtraInfo");
1475                 if (*data != ',') {
1476                         send_error(EINVAL);
1477                         return;
1478                 }
1479                 tid = parse_threadid(data + 1, len - 1);
1480                 if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1481                         send_error(EINVAL);
1482                         return;
1483                 }
1484
1485                 snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1486                 start_packet();
1487                 append_asciihex(buf);
1488                 finish_packet();
1489         } else
1490                 send_empty_response();
1491 }
1492
1493 static void
1494 handle_command(const uint8_t *data, size_t len)
1495 {
1496
1497         /* Reject packets with a sequence-id. */
1498         if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1499             data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1500                 send_empty_response();
1501                 return;
1502         }
1503
1504         switch (*data) {
1505         case 'c':
1506                 if (len != 1) {
1507                         send_error(EINVAL);
1508                         break;
1509                 }
1510
1511                 discard_stop();
1512                 gdb_resume_vcpus();
1513                 break;
1514         case 'D':
1515                 send_ok();
1516
1517                 /* TODO: Resume any stopped CPUs. */
1518                 break;
1519         case 'g': {
1520                 gdb_read_regs();
1521                 break;
1522         }
1523         case 'H': {
1524                 int tid;
1525
1526                 if (data[1] != 'g' && data[1] != 'c') {
1527                         send_error(EINVAL);
1528                         break;
1529                 }
1530                 tid = parse_threadid(data + 2, len - 2);
1531                 if (tid == -2) {
1532                         send_error(EINVAL);
1533                         break;
1534                 }
1535
1536                 if (CPU_EMPTY(&vcpus_active)) {
1537                         send_error(EINVAL);
1538                         break;
1539                 }
1540                 if (tid == -1 || tid == 0)
1541                         cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1542                 else if (CPU_ISSET(tid - 1, &vcpus_active))
1543                         cur_vcpu = tid - 1;
1544                 else {
1545                         send_error(EINVAL);
1546                         break;
1547                 }
1548                 send_ok();
1549                 break;
1550         }
1551         case 'm':
1552                 gdb_read_mem(data, len);
1553                 break;
1554         case 'M':
1555                 gdb_write_mem(data, len);
1556                 break;
1557         case 'T': {
1558                 int tid;
1559
1560                 tid = parse_threadid(data + 1, len - 1);
1561                 if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1562                         send_error(EINVAL);
1563                         return;
1564                 }
1565                 send_ok();
1566                 break;
1567         }
1568         case 'q':
1569                 gdb_query(data, len);
1570                 break;
1571         case 's':
1572                 if (len != 1) {
1573                         send_error(EINVAL);
1574                         break;
1575                 }
1576
1577                 /* Don't send a reply until a stop occurs. */
1578                 if (!gdb_step_vcpu(cur_vcpu)) {
1579                         send_error(EOPNOTSUPP);
1580                         break;
1581                 }
1582                 break;
1583         case 'z':
1584         case 'Z':
1585                 parse_breakpoint(data, len);
1586                 break;
1587         case '?':
1588                 report_stop(false);
1589                 break;
1590         case 'G': /* TODO */
1591         case 'v':
1592                 /* Handle 'vCont' */
1593                 /* 'vCtrlC' */
1594         case 'p': /* TODO */
1595         case 'P': /* TODO */
1596         case 'Q': /* TODO */
1597         case 't': /* TODO */
1598         case 'X': /* TODO */
1599         default:
1600                 send_empty_response();
1601         }
1602 }
1603
1604 /* Check for a valid packet in the command buffer. */
1605 static void
1606 check_command(int fd)
1607 {
1608         uint8_t *head, *hash, *p, sum;
1609         size_t avail, plen;
1610
1611         for (;;) {
1612                 avail = cur_comm.len;
1613                 if (avail == 0)
1614                         return;
1615                 head = io_buffer_head(&cur_comm);
1616                 switch (*head) {
1617                 case 0x03:
1618                         debug("<- Ctrl-C\n");
1619                         io_buffer_consume(&cur_comm, 1);
1620
1621                         gdb_suspend_vcpus();
1622                         break;
1623                 case '+':
1624                         /* ACK of previous response. */
1625                         debug("<- +\n");
1626                         if (response_pending())
1627                                 io_buffer_reset(&cur_resp);
1628                         io_buffer_consume(&cur_comm, 1);
1629                         if (stopped_vcpu != -1 && report_next_stop) {
1630                                 report_stop(true);
1631                                 send_pending_data(fd);
1632                         }
1633                         break;
1634                 case '-':
1635                         /* NACK of previous response. */
1636                         debug("<- -\n");
1637                         if (response_pending()) {
1638                                 cur_resp.len += cur_resp.start;
1639                                 cur_resp.start = 0;
1640                                 if (cur_resp.data[0] == '+')
1641                                         io_buffer_advance(&cur_resp, 1);
1642                                 debug("-> %.*s\n", (int)cur_resp.len,
1643                                     io_buffer_head(&cur_resp));
1644                         }
1645                         io_buffer_consume(&cur_comm, 1);
1646                         send_pending_data(fd);
1647                         break;
1648                 case '$':
1649                         /* Packet. */
1650
1651                         if (response_pending()) {
1652                                 warnx("New GDB command while response in "
1653                                     "progress");
1654                                 io_buffer_reset(&cur_resp);
1655                         }
1656
1657                         /* Is packet complete? */
1658                         hash = memchr(head, '#', avail);
1659                         if (hash == NULL)
1660                                 return;
1661                         plen = (hash - head + 1) + 2;
1662                         if (avail < plen)
1663                                 return;
1664                         debug("<- %.*s\n", (int)plen, head);
1665
1666                         /* Verify checksum. */
1667                         for (sum = 0, p = head + 1; p < hash; p++)
1668                                 sum += *p;
1669                         if (sum != parse_byte(hash + 1)) {
1670                                 io_buffer_consume(&cur_comm, plen);
1671                                 debug("-> -\n");
1672                                 send_char('-');
1673                                 send_pending_data(fd);
1674                                 break;
1675                         }
1676                         send_char('+');
1677
1678                         handle_command(head + 1, hash - (head + 1));
1679                         io_buffer_consume(&cur_comm, plen);
1680                         if (!response_pending())
1681                                 debug("-> +\n");
1682                         send_pending_data(fd);
1683                         break;
1684                 default:
1685                         /* XXX: Possibly drop connection instead. */
1686                         debug("-> %02x\n", *head);
1687                         io_buffer_consume(&cur_comm, 1);
1688                         break;
1689                 }
1690         }
1691 }
1692
1693 static void
1694 gdb_readable(int fd, enum ev_type event, void *arg)
1695 {
1696         ssize_t nread;
1697         int pending;
1698
1699         if (ioctl(fd, FIONREAD, &pending) == -1) {
1700                 warn("FIONREAD on GDB socket");
1701                 return;
1702         }
1703
1704         /*
1705          * 'pending' might be zero due to EOF.  We need to call read
1706          * with a non-zero length to detect EOF.
1707          */
1708         if (pending == 0)
1709                 pending = 1;
1710
1711         /* Ensure there is room in the command buffer. */
1712         io_buffer_grow(&cur_comm, pending);
1713         assert(io_buffer_avail(&cur_comm) >= pending);
1714
1715         nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1716         if (nread == 0) {
1717                 close_connection();
1718         } else if (nread == -1) {
1719                 if (errno == EAGAIN)
1720                         return;
1721
1722                 warn("Read from GDB socket");
1723                 close_connection();
1724         } else {
1725                 cur_comm.len += nread;
1726                 pthread_mutex_lock(&gdb_lock);
1727                 check_command(fd);
1728                 pthread_mutex_unlock(&gdb_lock);
1729         }
1730 }
1731
1732 static void
1733 gdb_writable(int fd, enum ev_type event, void *arg)
1734 {
1735
1736         send_pending_data(fd);
1737 }
1738
1739 static void
1740 new_connection(int fd, enum ev_type event, void *arg)
1741 {
1742         int optval, s;
1743
1744         s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1745         if (s == -1) {
1746                 if (arg != NULL)
1747                         err(1, "Failed accepting initial GDB connection");
1748
1749                 /* Silently ignore errors post-startup. */
1750                 return;
1751         }
1752
1753         optval = 1;
1754         if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1755             -1) {
1756                 warn("Failed to disable SIGPIPE for GDB connection");
1757                 close(s);
1758                 return;
1759         }
1760
1761         pthread_mutex_lock(&gdb_lock);
1762         if (cur_fd != -1) {
1763                 close(s);
1764                 warnx("Ignoring additional GDB connection.");
1765         }
1766
1767         read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1768         if (read_event == NULL) {
1769                 if (arg != NULL)
1770                         err(1, "Failed to setup initial GDB connection");
1771                 pthread_mutex_unlock(&gdb_lock);
1772                 return;
1773         }
1774         write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1775         if (write_event == NULL) {
1776                 if (arg != NULL)
1777                         err(1, "Failed to setup initial GDB connection");
1778                 mevent_delete_close(read_event);
1779                 read_event = NULL;
1780         }
1781
1782         cur_fd = s;
1783         cur_vcpu = 0;
1784         stopped_vcpu = -1;
1785
1786         /* Break on attach. */
1787         first_stop = true;
1788         report_next_stop = false;
1789         gdb_suspend_vcpus();
1790         pthread_mutex_unlock(&gdb_lock);
1791 }
1792
1793 #ifndef WITHOUT_CAPSICUM
1794 void
1795 limit_gdb_socket(int s)
1796 {
1797         cap_rights_t rights;
1798         unsigned long ioctls[] = { FIONREAD };
1799
1800         cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1801             CAP_SETSOCKOPT, CAP_IOCTL);
1802         if (caph_rights_limit(s, &rights) == -1)
1803                 errx(EX_OSERR, "Unable to apply rights for sandbox");
1804         if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1805                 errx(EX_OSERR, "Unable to apply rights for sandbox");
1806 }
1807 #endif
1808
1809 void
1810 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1811 {
1812         struct sockaddr_in sin;
1813         int error, flags, s;
1814
1815         debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1816
1817         error = pthread_mutex_init(&gdb_lock, NULL);
1818         if (error != 0)
1819                 errc(1, error, "gdb mutex init");
1820         error = pthread_cond_init(&idle_vcpus, NULL);
1821         if (error != 0)
1822                 errc(1, error, "gdb cv init");
1823
1824         ctx = _ctx;
1825         s = socket(PF_INET, SOCK_STREAM, 0);
1826         if (s < 0)
1827                 err(1, "gdb socket create");
1828
1829         sin.sin_len = sizeof(sin);
1830         sin.sin_family = AF_INET;
1831         sin.sin_addr.s_addr = htonl(INADDR_ANY);
1832         sin.sin_port = htons(sport);
1833
1834         if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1835                 err(1, "gdb socket bind");
1836
1837         if (listen(s, 1) < 0)
1838                 err(1, "gdb socket listen");
1839
1840         stopped_vcpu = -1;
1841         TAILQ_INIT(&breakpoints);
1842         vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1843         if (wait) {
1844                 /*
1845                  * Set vcpu 0 in vcpus_suspended.  This will trigger the
1846                  * logic in gdb_cpu_add() to suspend the first vcpu before
1847                  * it starts execution.  The vcpu will remain suspended
1848                  * until a debugger connects.
1849                  */
1850                 CPU_SET(0, &vcpus_suspended);
1851                 stopped_vcpu = 0;
1852         }
1853
1854         flags = fcntl(s, F_GETFL);
1855         if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1856                 err(1, "Failed to mark gdb socket non-blocking");
1857
1858 #ifndef WITHOUT_CAPSICUM
1859         limit_gdb_socket(s);
1860 #endif
1861         mevent_add(s, EVF_READ, new_connection, NULL);
1862 }