]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/bhyve/gdb.c
bhyve: Cast away const when fetching a config nvlist
[FreeBSD/FreeBSD.git] / usr.sbin / bhyve / gdb.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/endian.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/queue.h>
39 #include <sys/socket.h>
40 #include <machine/atomic.h>
41 #include <machine/specialreg.h>
42 #include <machine/vmm.h>
43 #include <netinet/in.h>
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <netdb.h>
52 #include <pthread.h>
53 #include <pthread_np.h>
54 #include <stdbool.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <sysexits.h>
59 #include <unistd.h>
60 #include <vmmapi.h>
61
62 #include "bhyverun.h"
63 #include "config.h"
64 #include "gdb.h"
65 #include "mem.h"
66 #include "mevent.h"
67
68 /*
69  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
70  * use SIGTRAP.
71  */
72 #define GDB_SIGNAL_TRAP         5
73
74 static void gdb_resume_vcpus(void);
75 static void check_command(int fd);
76
77 static struct mevent *read_event, *write_event;
78
79 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
80 static pthread_mutex_t gdb_lock;
81 static pthread_cond_t idle_vcpus;
82 static bool first_stop, report_next_stop, swbreak_enabled;
83
84 /*
85  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
86  * read buffer, 'start' is unused and 'len' contains the number of
87  * valid bytes in the buffer.  For a write buffer, 'start' is set to
88  * the index of the next byte in 'data' to send, and 'len' contains
89  * the remaining number of valid bytes to send.
90  */
91 struct io_buffer {
92         uint8_t *data;
93         size_t capacity;
94         size_t start;
95         size_t len;
96 };
97
98 struct breakpoint {
99         uint64_t gpa;
100         uint8_t shadow_inst;
101         TAILQ_ENTRY(breakpoint) link;
102 };
103
104 /*
105  * When a vCPU stops to due to an event that should be reported to the
106  * debugger, information about the event is stored in this structure.
107  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
108  * and stops other vCPUs so the event can be reported.  The
109  * report_stop() function reports the event for the 'stopped_vcpu'
110  * vCPU.  When the debugger resumes execution via continue or step,
111  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
112  * event handlers until the associated event is reported or disabled.
113  *
114  * An idle vCPU will have all of the boolean fields set to false.
115  *
116  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
117  * released to execute the stepped instruction.  When the vCPU reports
118  * the stepping trap, 'stepped' is set.
119  *
120  * When a vCPU hits a breakpoint set by the debug server,
121  * 'hit_swbreak' is set to true.
122  */
123 struct vcpu_state {
124         bool stepping;
125         bool stepped;
126         bool hit_swbreak;
127 };
128
129 static struct io_buffer cur_comm, cur_resp;
130 static uint8_t cur_csum;
131 static struct vmctx *ctx;
132 static int cur_fd = -1;
133 static TAILQ_HEAD(, breakpoint) breakpoints;
134 static struct vcpu_state *vcpu_state;
135 static int cur_vcpu, stopped_vcpu;
136 static bool gdb_active = false;
137
138 static const int gdb_regset[] = {
139         VM_REG_GUEST_RAX,
140         VM_REG_GUEST_RBX,
141         VM_REG_GUEST_RCX,
142         VM_REG_GUEST_RDX,
143         VM_REG_GUEST_RSI,
144         VM_REG_GUEST_RDI,
145         VM_REG_GUEST_RBP,
146         VM_REG_GUEST_RSP,
147         VM_REG_GUEST_R8,
148         VM_REG_GUEST_R9,
149         VM_REG_GUEST_R10,
150         VM_REG_GUEST_R11,
151         VM_REG_GUEST_R12,
152         VM_REG_GUEST_R13,
153         VM_REG_GUEST_R14,
154         VM_REG_GUEST_R15,
155         VM_REG_GUEST_RIP,
156         VM_REG_GUEST_RFLAGS,
157         VM_REG_GUEST_CS,
158         VM_REG_GUEST_SS,
159         VM_REG_GUEST_DS,
160         VM_REG_GUEST_ES,
161         VM_REG_GUEST_FS,
162         VM_REG_GUEST_GS
163 };
164
165 static const int gdb_regsize[] = {
166         8,
167         8,
168         8,
169         8,
170         8,
171         8,
172         8,
173         8,
174         8,
175         8,
176         8,
177         8,
178         8,
179         8,
180         8,
181         8,
182         8,
183         4,
184         4,
185         4,
186         4,
187         4,
188         4,
189         4
190 };
191
192 #ifdef GDB_LOG
193 #include <stdarg.h>
194 #include <stdio.h>
195
196 static void __printflike(1, 2)
197 debug(const char *fmt, ...)
198 {
199         static FILE *logfile;
200         va_list ap;
201
202         if (logfile == NULL) {
203                 logfile = fopen("/tmp/bhyve_gdb.log", "w");
204                 if (logfile == NULL)
205                         return;
206 #ifndef WITHOUT_CAPSICUM
207                 if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
208                         fclose(logfile);
209                         logfile = NULL;
210                         return;
211                 }
212 #endif
213                 setlinebuf(logfile);
214         }
215         va_start(ap, fmt);
216         vfprintf(logfile, fmt, ap);
217         va_end(ap);
218 }
219 #else
220 #define debug(...)
221 #endif
222
223 static void     remove_all_sw_breakpoints(void);
224
225 static int
226 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
227 {
228         uint64_t regs[4];
229         const int regset[4] = {
230                 VM_REG_GUEST_CR0,
231                 VM_REG_GUEST_CR3,
232                 VM_REG_GUEST_CR4,
233                 VM_REG_GUEST_EFER
234         };
235
236         if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
237                 return (-1);
238
239         /*
240          * For the debugger, always pretend to be the kernel (CPL 0),
241          * and if long-mode is enabled, always parse addresses as if
242          * in 64-bit mode.
243          */
244         paging->cr3 = regs[1];
245         paging->cpl = 0;
246         if (regs[3] & EFER_LMA)
247                 paging->cpu_mode = CPU_MODE_64BIT;
248         else if (regs[0] & CR0_PE)
249                 paging->cpu_mode = CPU_MODE_PROTECTED;
250         else
251                 paging->cpu_mode = CPU_MODE_REAL;
252         if (!(regs[0] & CR0_PG))
253                 paging->paging_mode = PAGING_MODE_FLAT;
254         else if (!(regs[2] & CR4_PAE))
255                 paging->paging_mode = PAGING_MODE_32;
256         else if (regs[3] & EFER_LME)
257                 paging->paging_mode = (regs[2] & CR4_LA57) ?
258                     PAGING_MODE_64_LA57 :  PAGING_MODE_64;
259         else
260                 paging->paging_mode = PAGING_MODE_PAE;
261         return (0);
262 }
263
264 /*
265  * Map a guest virtual address to a physical address (for a given vcpu).
266  * If a guest virtual address is valid, return 1.  If the address is
267  * not valid, return 0.  If an error occurs obtaining the mapping,
268  * return -1.
269  */
270 static int
271 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
272 {
273         struct vm_guest_paging paging;
274         int fault;
275
276         if (guest_paging_info(vcpu, &paging) == -1)
277                 return (-1);
278
279         /*
280          * Always use PROT_READ.  We really care if the VA is
281          * accessible, not if the current vCPU can write.
282          */
283         if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
284             &fault) == -1)
285                 return (-1);
286         if (fault)
287                 return (0);
288         return (1);
289 }
290
291 static void
292 io_buffer_reset(struct io_buffer *io)
293 {
294
295         io->start = 0;
296         io->len = 0;
297 }
298
299 /* Available room for adding data. */
300 static size_t
301 io_buffer_avail(struct io_buffer *io)
302 {
303
304         return (io->capacity - (io->start + io->len));
305 }
306
307 static uint8_t *
308 io_buffer_head(struct io_buffer *io)
309 {
310
311         return (io->data + io->start);
312 }
313
314 static uint8_t *
315 io_buffer_tail(struct io_buffer *io)
316 {
317
318         return (io->data + io->start + io->len);
319 }
320
321 static void
322 io_buffer_advance(struct io_buffer *io, size_t amount)
323 {
324
325         assert(amount <= io->len);
326         io->start += amount;
327         io->len -= amount;
328 }
329
330 static void
331 io_buffer_consume(struct io_buffer *io, size_t amount)
332 {
333
334         io_buffer_advance(io, amount);
335         if (io->len == 0) {
336                 io->start = 0;
337                 return;
338         }
339
340         /*
341          * XXX: Consider making this move optional and compacting on a
342          * future read() before realloc().
343          */
344         memmove(io->data, io_buffer_head(io), io->len);
345         io->start = 0;
346 }
347
348 static void
349 io_buffer_grow(struct io_buffer *io, size_t newsize)
350 {
351         uint8_t *new_data;
352         size_t avail, new_cap;
353
354         avail = io_buffer_avail(io);
355         if (newsize <= avail)
356                 return;
357
358         new_cap = io->capacity + (newsize - avail);
359         new_data = realloc(io->data, new_cap);
360         if (new_data == NULL)
361                 err(1, "Failed to grow GDB I/O buffer");
362         io->data = new_data;
363         io->capacity = new_cap;
364 }
365
366 static bool
367 response_pending(void)
368 {
369
370         if (cur_resp.start == 0 && cur_resp.len == 0)
371                 return (false);
372         if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
373                 return (false);
374         return (true);
375 }
376
377 static void
378 close_connection(void)
379 {
380
381         /*
382          * XXX: This triggers a warning because mevent does the close
383          * before the EV_DELETE.
384          */
385         pthread_mutex_lock(&gdb_lock);
386         mevent_delete(write_event);
387         mevent_delete_close(read_event);
388         write_event = NULL;
389         read_event = NULL;
390         io_buffer_reset(&cur_comm);
391         io_buffer_reset(&cur_resp);
392         cur_fd = -1;
393
394         remove_all_sw_breakpoints();
395
396         /* Clear any pending events. */
397         memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
398
399         /* Resume any stopped vCPUs. */
400         gdb_resume_vcpus();
401         pthread_mutex_unlock(&gdb_lock);
402 }
403
404 static uint8_t
405 hex_digit(uint8_t nibble)
406 {
407
408         if (nibble <= 9)
409                 return (nibble + '0');
410         else
411                 return (nibble + 'a' - 10);
412 }
413
414 static uint8_t
415 parse_digit(uint8_t v)
416 {
417
418         if (v >= '0' && v <= '9')
419                 return (v - '0');
420         if (v >= 'a' && v <= 'f')
421                 return (v - 'a' + 10);
422         if (v >= 'A' && v <= 'F')
423                 return (v - 'A' + 10);
424         return (0xF);
425 }
426
427 /* Parses big-endian hexadecimal. */
428 static uintmax_t
429 parse_integer(const uint8_t *p, size_t len)
430 {
431         uintmax_t v;
432
433         v = 0;
434         while (len > 0) {
435                 v <<= 4;
436                 v |= parse_digit(*p);
437                 p++;
438                 len--;
439         }
440         return (v);
441 }
442
443 static uint8_t
444 parse_byte(const uint8_t *p)
445 {
446
447         return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
448 }
449
450 static void
451 send_pending_data(int fd)
452 {
453         ssize_t nwritten;
454
455         if (cur_resp.len == 0) {
456                 mevent_disable(write_event);
457                 return;
458         }
459         nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
460         if (nwritten == -1) {
461                 warn("Write to GDB socket failed");
462                 close_connection();
463         } else {
464                 io_buffer_advance(&cur_resp, nwritten);
465                 if (cur_resp.len == 0)
466                         mevent_disable(write_event);
467                 else
468                         mevent_enable(write_event);
469         }
470 }
471
472 /* Append a single character to the output buffer. */
473 static void
474 send_char(uint8_t data)
475 {
476         io_buffer_grow(&cur_resp, 1);
477         *io_buffer_tail(&cur_resp) = data;
478         cur_resp.len++;
479 }
480
481 /* Append an array of bytes to the output buffer. */
482 static void
483 send_data(const uint8_t *data, size_t len)
484 {
485
486         io_buffer_grow(&cur_resp, len);
487         memcpy(io_buffer_tail(&cur_resp), data, len);
488         cur_resp.len += len;
489 }
490
491 static void
492 format_byte(uint8_t v, uint8_t *buf)
493 {
494
495         buf[0] = hex_digit(v >> 4);
496         buf[1] = hex_digit(v & 0xf);
497 }
498
499 /*
500  * Append a single byte (formatted as two hex characters) to the
501  * output buffer.
502  */
503 static void
504 send_byte(uint8_t v)
505 {
506         uint8_t buf[2];
507
508         format_byte(v, buf);
509         send_data(buf, sizeof(buf));
510 }
511
512 static void
513 start_packet(void)
514 {
515
516         send_char('$');
517         cur_csum = 0;
518 }
519
520 static void
521 finish_packet(void)
522 {
523
524         send_char('#');
525         send_byte(cur_csum);
526         debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
527 }
528
529 /*
530  * Append a single character (for the packet payload) and update the
531  * checksum.
532  */
533 static void
534 append_char(uint8_t v)
535 {
536
537         send_char(v);
538         cur_csum += v;
539 }
540
541 /*
542  * Append an array of bytes (for the packet payload) and update the
543  * checksum.
544  */
545 static void
546 append_packet_data(const uint8_t *data, size_t len)
547 {
548
549         send_data(data, len);
550         while (len > 0) {
551                 cur_csum += *data;
552                 data++;
553                 len--;
554         }
555 }
556
557 static void
558 append_string(const char *str)
559 {
560
561         append_packet_data(str, strlen(str));
562 }
563
564 static void
565 append_byte(uint8_t v)
566 {
567         uint8_t buf[2];
568
569         format_byte(v, buf);
570         append_packet_data(buf, sizeof(buf));
571 }
572
573 static void
574 append_unsigned_native(uintmax_t value, size_t len)
575 {
576         size_t i;
577
578         for (i = 0; i < len; i++) {
579                 append_byte(value);
580                 value >>= 8;
581         }
582 }
583
584 static void
585 append_unsigned_be(uintmax_t value, size_t len)
586 {
587         char buf[len * 2];
588         size_t i;
589
590         for (i = 0; i < len; i++) {
591                 format_byte(value, buf + (len - i - 1) * 2);
592                 value >>= 8;
593         }
594         append_packet_data(buf, sizeof(buf));
595 }
596
597 static void
598 append_integer(unsigned int value)
599 {
600
601         if (value == 0)
602                 append_char('0');
603         else
604                 append_unsigned_be(value, (fls(value) + 7) / 8);
605 }
606
607 static void
608 append_asciihex(const char *str)
609 {
610
611         while (*str != '\0') {
612                 append_byte(*str);
613                 str++;
614         }
615 }
616
617 static void
618 send_empty_response(void)
619 {
620
621         start_packet();
622         finish_packet();
623 }
624
625 static void
626 send_error(int error)
627 {
628
629         start_packet();
630         append_char('E');
631         append_byte(error);
632         finish_packet();
633 }
634
635 static void
636 send_ok(void)
637 {
638
639         start_packet();
640         append_string("OK");
641         finish_packet();
642 }
643
644 static int
645 parse_threadid(const uint8_t *data, size_t len)
646 {
647
648         if (len == 1 && *data == '0')
649                 return (0);
650         if (len == 2 && memcmp(data, "-1", 2) == 0)
651                 return (-1);
652         if (len == 0)
653                 return (-2);
654         return (parse_integer(data, len));
655 }
656
657 /*
658  * Report the current stop event to the debugger.  If the stop is due
659  * to an event triggered on a specific vCPU such as a breakpoint or
660  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
661  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
662  * the reporting vCPU for vCPU events.
663  */
664 static void
665 report_stop(bool set_cur_vcpu)
666 {
667         struct vcpu_state *vs;
668
669         start_packet();
670         if (stopped_vcpu == -1) {
671                 append_char('S');
672                 append_byte(GDB_SIGNAL_TRAP);
673         } else {
674                 vs = &vcpu_state[stopped_vcpu];
675                 if (set_cur_vcpu)
676                         cur_vcpu = stopped_vcpu;
677                 append_char('T');
678                 append_byte(GDB_SIGNAL_TRAP);
679                 append_string("thread:");
680                 append_integer(stopped_vcpu + 1);
681                 append_char(';');
682                 if (vs->hit_swbreak) {
683                         debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
684                         if (swbreak_enabled)
685                                 append_string("swbreak:;");
686                 } else if (vs->stepped)
687                         debug("$vCPU %d reporting step\n", stopped_vcpu);
688                 else
689                         debug("$vCPU %d reporting ???\n", stopped_vcpu);
690         }
691         finish_packet();
692         report_next_stop = false;
693 }
694
695 /*
696  * If this stop is due to a vCPU event, clear that event to mark it as
697  * acknowledged.
698  */
699 static void
700 discard_stop(void)
701 {
702         struct vcpu_state *vs;
703
704         if (stopped_vcpu != -1) {
705                 vs = &vcpu_state[stopped_vcpu];
706                 vs->hit_swbreak = false;
707                 vs->stepped = false;
708                 stopped_vcpu = -1;
709         }
710         report_next_stop = true;
711 }
712
713 static void
714 gdb_finish_suspend_vcpus(void)
715 {
716
717         if (first_stop) {
718                 first_stop = false;
719                 stopped_vcpu = -1;
720         } else if (report_next_stop) {
721                 assert(!response_pending());
722                 report_stop(true);
723                 send_pending_data(cur_fd);
724         }
725 }
726
727 /*
728  * vCPU threads invoke this function whenever the vCPU enters the
729  * debug server to pause or report an event.  vCPU threads wait here
730  * as long as the debug server keeps them suspended.
731  */
732 static void
733 _gdb_cpu_suspend(int vcpu, bool report_stop)
734 {
735
736         debug("$vCPU %d suspending\n", vcpu);
737         CPU_SET(vcpu, &vcpus_waiting);
738         if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
739                 gdb_finish_suspend_vcpus();
740         while (CPU_ISSET(vcpu, &vcpus_suspended))
741                 pthread_cond_wait(&idle_vcpus, &gdb_lock);
742         CPU_CLR(vcpu, &vcpus_waiting);
743         debug("$vCPU %d resuming\n", vcpu);
744 }
745
746 /*
747  * Invoked at the start of a vCPU thread's execution to inform the
748  * debug server about the new thread.
749  */
750 void
751 gdb_cpu_add(int vcpu)
752 {
753
754         if (!gdb_active)
755                 return;
756         debug("$vCPU %d starting\n", vcpu);
757         pthread_mutex_lock(&gdb_lock);
758         assert(vcpu < guest_ncpus);
759         CPU_SET(vcpu, &vcpus_active);
760         if (!TAILQ_EMPTY(&breakpoints)) {
761                 vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1);
762                 debug("$vCPU %d enabled breakpoint exits\n", vcpu);
763         }
764
765         /*
766          * If a vcpu is added while vcpus are stopped, suspend the new
767          * vcpu so that it will pop back out with a debug exit before
768          * executing the first instruction.
769          */
770         if (!CPU_EMPTY(&vcpus_suspended)) {
771                 CPU_SET(vcpu, &vcpus_suspended);
772                 _gdb_cpu_suspend(vcpu, false);
773         }
774         pthread_mutex_unlock(&gdb_lock);
775 }
776
777 /*
778  * Invoked by vCPU before resuming execution.  This enables stepping
779  * if the vCPU is marked as stepping.
780  */
781 static void
782 gdb_cpu_resume(int vcpu)
783 {
784         struct vcpu_state *vs;
785         int error;
786
787         vs = &vcpu_state[vcpu];
788
789         /*
790          * Any pending event should already be reported before
791          * resuming.
792          */
793         assert(vs->hit_swbreak == false);
794         assert(vs->stepped == false);
795         if (vs->stepping) {
796                 error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
797                 assert(error == 0);
798         }
799 }
800
801 /*
802  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
803  * has been suspended due to an event on different vCPU or in response
804  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
805  */
806 void
807 gdb_cpu_suspend(int vcpu)
808 {
809
810         if (!gdb_active)
811                 return;
812         pthread_mutex_lock(&gdb_lock);
813         _gdb_cpu_suspend(vcpu, true);
814         gdb_cpu_resume(vcpu);
815         pthread_mutex_unlock(&gdb_lock);
816 }
817
818 static void
819 gdb_suspend_vcpus(void)
820 {
821
822         assert(pthread_mutex_isowned_np(&gdb_lock));
823         debug("suspending all CPUs\n");
824         vcpus_suspended = vcpus_active;
825         vm_suspend_cpu(ctx, -1);
826         if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
827                 gdb_finish_suspend_vcpus();
828 }
829
830 /*
831  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
832  * the VT-x-specific MTRAP exit.
833  */
834 void
835 gdb_cpu_mtrap(int vcpu)
836 {
837         struct vcpu_state *vs;
838
839         if (!gdb_active)
840                 return;
841         debug("$vCPU %d MTRAP\n", vcpu);
842         pthread_mutex_lock(&gdb_lock);
843         vs = &vcpu_state[vcpu];
844         if (vs->stepping) {
845                 vs->stepping = false;
846                 vs->stepped = true;
847                 vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
848                 while (vs->stepped) {
849                         if (stopped_vcpu == -1) {
850                                 debug("$vCPU %d reporting step\n", vcpu);
851                                 stopped_vcpu = vcpu;
852                                 gdb_suspend_vcpus();
853                         }
854                         _gdb_cpu_suspend(vcpu, true);
855                 }
856                 gdb_cpu_resume(vcpu);
857         }
858         pthread_mutex_unlock(&gdb_lock);
859 }
860
861 static struct breakpoint *
862 find_breakpoint(uint64_t gpa)
863 {
864         struct breakpoint *bp;
865
866         TAILQ_FOREACH(bp, &breakpoints, link) {
867                 if (bp->gpa == gpa)
868                         return (bp);
869         }
870         return (NULL);
871 }
872
873 void
874 gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit)
875 {
876         struct breakpoint *bp;
877         struct vcpu_state *vs;
878         uint64_t gpa;
879         int error;
880
881         if (!gdb_active) {
882                 fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
883                 exit(4);
884         }
885         pthread_mutex_lock(&gdb_lock);
886         error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
887         assert(error == 1);
888         bp = find_breakpoint(gpa);
889         if (bp != NULL) {
890                 vs = &vcpu_state[vcpu];
891                 assert(vs->stepping == false);
892                 assert(vs->stepped == false);
893                 assert(vs->hit_swbreak == false);
894                 vs->hit_swbreak = true;
895                 vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
896                 for (;;) {
897                         if (stopped_vcpu == -1) {
898                                 debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
899                                     vmexit->rip);
900                                 stopped_vcpu = vcpu;
901                                 gdb_suspend_vcpus();
902                         }
903                         _gdb_cpu_suspend(vcpu, true);
904                         if (!vs->hit_swbreak) {
905                                 /* Breakpoint reported. */
906                                 break;
907                         }
908                         bp = find_breakpoint(gpa);
909                         if (bp == NULL) {
910                                 /* Breakpoint was removed. */
911                                 vs->hit_swbreak = false;
912                                 break;
913                         }
914                 }
915                 gdb_cpu_resume(vcpu);
916         } else {
917                 debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu,
918                     vmexit->rip);
919                 error = vm_set_register(ctx, vcpu,
920                     VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length);
921                 assert(error == 0);
922                 error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0);
923                 assert(error == 0);
924         }
925         pthread_mutex_unlock(&gdb_lock);
926 }
927
928 static bool
929 gdb_step_vcpu(int vcpu)
930 {
931         int error, val;
932
933         debug("$vCPU %d step\n", vcpu);
934         error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
935         if (error < 0)
936                 return (false);
937
938         discard_stop();
939         vcpu_state[vcpu].stepping = true;
940         vm_resume_cpu(ctx, vcpu);
941         CPU_CLR(vcpu, &vcpus_suspended);
942         pthread_cond_broadcast(&idle_vcpus);
943         return (true);
944 }
945
946 static void
947 gdb_resume_vcpus(void)
948 {
949
950         assert(pthread_mutex_isowned_np(&gdb_lock));
951         vm_resume_cpu(ctx, -1);
952         debug("resuming all CPUs\n");
953         CPU_ZERO(&vcpus_suspended);
954         pthread_cond_broadcast(&idle_vcpus);
955 }
956
957 static void
958 gdb_read_regs(void)
959 {
960         uint64_t regvals[nitems(gdb_regset)];
961
962         if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
963             gdb_regset, regvals) == -1) {
964                 send_error(errno);
965                 return;
966         }
967         start_packet();
968         for (size_t i = 0; i < nitems(regvals); i++)
969                 append_unsigned_native(regvals[i], gdb_regsize[i]);
970         finish_packet();
971 }
972
973 static void
974 gdb_read_mem(const uint8_t *data, size_t len)
975 {
976         uint64_t gpa, gva, val;
977         uint8_t *cp;
978         size_t resid, todo, bytes;
979         bool started;
980         int error;
981
982         /* Skip 'm' */
983         data += 1;
984         len -= 1;
985
986         /* Parse and consume address. */
987         cp = memchr(data, ',', len);
988         if (cp == NULL || cp == data) {
989                 send_error(EINVAL);
990                 return;
991         }
992         gva = parse_integer(data, cp - data);
993         len -= (cp - data) + 1;
994         data += (cp - data) + 1;
995
996         /* Parse length. */
997         resid = parse_integer(data, len);
998
999         started = false;
1000         while (resid > 0) {
1001                 error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1002                 if (error == -1) {
1003                         if (started)
1004                                 finish_packet();
1005                         else
1006                                 send_error(errno);
1007                         return;
1008                 }
1009                 if (error == 0) {
1010                         if (started)
1011                                 finish_packet();
1012                         else
1013                                 send_error(EFAULT);
1014                         return;
1015                 }
1016
1017                 /* Read bytes from current page. */
1018                 todo = getpagesize() - gpa % getpagesize();
1019                 if (todo > resid)
1020                         todo = resid;
1021
1022                 cp = paddr_guest2host(ctx, gpa, todo);
1023                 if (cp != NULL) {
1024                         /*
1025                          * If this page is guest RAM, read it a byte
1026                          * at a time.
1027                          */
1028                         if (!started) {
1029                                 start_packet();
1030                                 started = true;
1031                         }
1032                         while (todo > 0) {
1033                                 append_byte(*cp);
1034                                 cp++;
1035                                 gpa++;
1036                                 gva++;
1037                                 resid--;
1038                                 todo--;
1039                         }
1040                 } else {
1041                         /*
1042                          * If this page isn't guest RAM, try to handle
1043                          * it via MMIO.  For MMIO requests, use
1044                          * aligned reads of words when possible.
1045                          */
1046                         while (todo > 0) {
1047                                 if (gpa & 1 || todo == 1)
1048                                         bytes = 1;
1049                                 else if (gpa & 2 || todo == 2)
1050                                         bytes = 2;
1051                                 else
1052                                         bytes = 4;
1053                                 error = read_mem(ctx, cur_vcpu, gpa, &val,
1054                                     bytes);
1055                                 if (error == 0) {
1056                                         if (!started) {
1057                                                 start_packet();
1058                                                 started = true;
1059                                         }
1060                                         gpa += bytes;
1061                                         gva += bytes;
1062                                         resid -= bytes;
1063                                         todo -= bytes;
1064                                         while (bytes > 0) {
1065                                                 append_byte(val);
1066                                                 val >>= 8;
1067                                                 bytes--;
1068                                         }
1069                                 } else {
1070                                         if (started)
1071                                                 finish_packet();
1072                                         else
1073                                                 send_error(EFAULT);
1074                                         return;
1075                                 }
1076                         }
1077                 }
1078                 assert(resid == 0 || gpa % getpagesize() == 0);
1079         }
1080         if (!started)
1081                 start_packet();
1082         finish_packet();
1083 }
1084
1085 static void
1086 gdb_write_mem(const uint8_t *data, size_t len)
1087 {
1088         uint64_t gpa, gva, val;
1089         uint8_t *cp;
1090         size_t resid, todo, bytes;
1091         int error;
1092
1093         /* Skip 'M' */
1094         data += 1;
1095         len -= 1;
1096
1097         /* Parse and consume address. */
1098         cp = memchr(data, ',', len);
1099         if (cp == NULL || cp == data) {
1100                 send_error(EINVAL);
1101                 return;
1102         }
1103         gva = parse_integer(data, cp - data);
1104         len -= (cp - data) + 1;
1105         data += (cp - data) + 1;
1106
1107         /* Parse and consume length. */
1108         cp = memchr(data, ':', len);
1109         if (cp == NULL || cp == data) {
1110                 send_error(EINVAL);
1111                 return;
1112         }
1113         resid = parse_integer(data, cp - data);
1114         len -= (cp - data) + 1;
1115         data += (cp - data) + 1;
1116
1117         /* Verify the available bytes match the length. */
1118         if (len != resid * 2) {
1119                 send_error(EINVAL);
1120                 return;
1121         }
1122
1123         while (resid > 0) {
1124                 error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1125                 if (error == -1) {
1126                         send_error(errno);
1127                         return;
1128                 }
1129                 if (error == 0) {
1130                         send_error(EFAULT);
1131                         return;
1132                 }
1133
1134                 /* Write bytes to current page. */
1135                 todo = getpagesize() - gpa % getpagesize();
1136                 if (todo > resid)
1137                         todo = resid;
1138
1139                 cp = paddr_guest2host(ctx, gpa, todo);
1140                 if (cp != NULL) {
1141                         /*
1142                          * If this page is guest RAM, write it a byte
1143                          * at a time.
1144                          */
1145                         while (todo > 0) {
1146                                 assert(len >= 2);
1147                                 *cp = parse_byte(data);
1148                                 data += 2;
1149                                 len -= 2;
1150                                 cp++;
1151                                 gpa++;
1152                                 gva++;
1153                                 resid--;
1154                                 todo--;
1155                         }
1156                 } else {
1157                         /*
1158                          * If this page isn't guest RAM, try to handle
1159                          * it via MMIO.  For MMIO requests, use
1160                          * aligned writes of words when possible.
1161                          */
1162                         while (todo > 0) {
1163                                 if (gpa & 1 || todo == 1) {
1164                                         bytes = 1;
1165                                         val = parse_byte(data);
1166                                 } else if (gpa & 2 || todo == 2) {
1167                                         bytes = 2;
1168                                         val = be16toh(parse_integer(data, 4));
1169                                 } else {
1170                                         bytes = 4;
1171                                         val = be32toh(parse_integer(data, 8));
1172                                 }
1173                                 error = write_mem(ctx, cur_vcpu, gpa, val,
1174                                     bytes);
1175                                 if (error == 0) {
1176                                         gpa += bytes;
1177                                         gva += bytes;
1178                                         resid -= bytes;
1179                                         todo -= bytes;
1180                                         data += 2 * bytes;
1181                                         len -= 2 * bytes;
1182                                 } else {
1183                                         send_error(EFAULT);
1184                                         return;
1185                                 }
1186                         }
1187                 }
1188                 assert(resid == 0 || gpa % getpagesize() == 0);
1189         }
1190         assert(len == 0);
1191         send_ok();
1192 }
1193
1194 static bool
1195 set_breakpoint_caps(bool enable)
1196 {
1197         cpuset_t mask;
1198         int vcpu;
1199
1200         mask = vcpus_active;
1201         while (!CPU_EMPTY(&mask)) {
1202                 vcpu = CPU_FFS(&mask) - 1;
1203                 CPU_CLR(vcpu, &mask);
1204                 if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT,
1205                     enable ? 1 : 0) < 0)
1206                         return (false);
1207                 debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1208                     enable ? "en" : "dis");
1209         }
1210         return (true);
1211 }
1212
1213 static void
1214 remove_all_sw_breakpoints(void)
1215 {
1216         struct breakpoint *bp, *nbp;
1217         uint8_t *cp;
1218
1219         if (TAILQ_EMPTY(&breakpoints))
1220                 return;
1221
1222         TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1223                 debug("remove breakpoint at %#lx\n", bp->gpa);
1224                 cp = paddr_guest2host(ctx, bp->gpa, 1);
1225                 *cp = bp->shadow_inst;
1226                 TAILQ_REMOVE(&breakpoints, bp, link);
1227                 free(bp);
1228         }
1229         TAILQ_INIT(&breakpoints);
1230         set_breakpoint_caps(false);
1231 }
1232
1233 static void
1234 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1235 {
1236         struct breakpoint *bp;
1237         uint64_t gpa;
1238         uint8_t *cp;
1239         int error;
1240
1241         if (kind != 1) {
1242                 send_error(EINVAL);
1243                 return;
1244         }
1245
1246         error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1247         if (error == -1) {
1248                 send_error(errno);
1249                 return;
1250         }
1251         if (error == 0) {
1252                 send_error(EFAULT);
1253                 return;
1254         }
1255
1256         cp = paddr_guest2host(ctx, gpa, 1);
1257
1258         /* Only permit breakpoints in guest RAM. */
1259         if (cp == NULL) {
1260                 send_error(EFAULT);
1261                 return;
1262         }
1263
1264         /* Find any existing breakpoint. */
1265         bp = find_breakpoint(gpa);
1266
1267         /*
1268          * Silently ignore duplicate commands since the protocol
1269          * requires these packets to be idempotent.
1270          */
1271         if (insert) {
1272                 if (bp == NULL) {
1273                         if (TAILQ_EMPTY(&breakpoints) &&
1274                             !set_breakpoint_caps(true)) {
1275                                 send_empty_response();
1276                                 return;
1277                         }
1278                         bp = malloc(sizeof(*bp));
1279                         bp->gpa = gpa;
1280                         bp->shadow_inst = *cp;
1281                         *cp = 0xcc;     /* INT 3 */
1282                         TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1283                         debug("new breakpoint at %#lx\n", gpa);
1284                 }
1285         } else {
1286                 if (bp != NULL) {
1287                         debug("remove breakpoint at %#lx\n", gpa);
1288                         *cp = bp->shadow_inst;
1289                         TAILQ_REMOVE(&breakpoints, bp, link);
1290                         free(bp);
1291                         if (TAILQ_EMPTY(&breakpoints))
1292                                 set_breakpoint_caps(false);
1293                 }
1294         }
1295         send_ok();
1296 }
1297
1298 static void
1299 parse_breakpoint(const uint8_t *data, size_t len)
1300 {
1301         uint64_t gva;
1302         uint8_t *cp;
1303         bool insert;
1304         int kind, type;
1305
1306         insert = data[0] == 'Z';
1307
1308         /* Skip 'Z/z' */
1309         data += 1;
1310         len -= 1;
1311
1312         /* Parse and consume type. */
1313         cp = memchr(data, ',', len);
1314         if (cp == NULL || cp == data) {
1315                 send_error(EINVAL);
1316                 return;
1317         }
1318         type = parse_integer(data, cp - data);
1319         len -= (cp - data) + 1;
1320         data += (cp - data) + 1;
1321
1322         /* Parse and consume address. */
1323         cp = memchr(data, ',', len);
1324         if (cp == NULL || cp == data) {
1325                 send_error(EINVAL);
1326                 return;
1327         }
1328         gva = parse_integer(data, cp - data);
1329         len -= (cp - data) + 1;
1330         data += (cp - data) + 1;
1331
1332         /* Parse and consume kind. */
1333         cp = memchr(data, ';', len);
1334         if (cp == data) {
1335                 send_error(EINVAL);
1336                 return;
1337         }
1338         if (cp != NULL) {
1339                 /*
1340                  * We do not advertise support for either the
1341                  * ConditionalBreakpoints or BreakpointCommands
1342                  * features, so we should not be getting conditions or
1343                  * commands from the remote end.
1344                  */
1345                 send_empty_response();
1346                 return;
1347         }
1348         kind = parse_integer(data, len);
1349         data += len;
1350         len = 0;
1351
1352         switch (type) {
1353         case 0:
1354                 update_sw_breakpoint(gva, kind, insert);
1355                 break;
1356         default:
1357                 send_empty_response();
1358                 break;
1359         }
1360 }
1361
1362 static bool
1363 command_equals(const uint8_t *data, size_t len, const char *cmd)
1364 {
1365
1366         if (strlen(cmd) > len)
1367                 return (false);
1368         return (memcmp(data, cmd, strlen(cmd)) == 0);
1369 }
1370
1371 static void
1372 check_features(const uint8_t *data, size_t len)
1373 {
1374         char *feature, *next_feature, *str, *value;
1375         bool supported;
1376
1377         str = malloc(len + 1);
1378         memcpy(str, data, len);
1379         str[len] = '\0';
1380         next_feature = str;
1381
1382         while ((feature = strsep(&next_feature, ";")) != NULL) {
1383                 /*
1384                  * Null features shouldn't exist, but skip if they
1385                  * do.
1386                  */
1387                 if (strcmp(feature, "") == 0)
1388                         continue;
1389
1390                 /*
1391                  * Look for the value or supported / not supported
1392                  * flag.
1393                  */
1394                 value = strchr(feature, '=');
1395                 if (value != NULL) {
1396                         *value = '\0';
1397                         value++;
1398                         supported = true;
1399                 } else {
1400                         value = feature + strlen(feature) - 1;
1401                         switch (*value) {
1402                         case '+':
1403                                 supported = true;
1404                                 break;
1405                         case '-':
1406                                 supported = false;
1407                                 break;
1408                         default:
1409                                 /*
1410                                  * This is really a protocol error,
1411                                  * but we just ignore malformed
1412                                  * features for ease of
1413                                  * implementation.
1414                                  */
1415                                 continue;
1416                         }
1417                         value = NULL;
1418                 }
1419
1420                 if (strcmp(feature, "swbreak") == 0)
1421                         swbreak_enabled = supported;
1422         }
1423         free(str);
1424
1425         start_packet();
1426
1427         /* This is an arbitrary limit. */
1428         append_string("PacketSize=4096");
1429         append_string(";swbreak+");
1430         finish_packet();
1431 }
1432
1433 static void
1434 gdb_query(const uint8_t *data, size_t len)
1435 {
1436
1437         /*
1438          * TODO:
1439          * - qSearch
1440          */
1441         if (command_equals(data, len, "qAttached")) {
1442                 start_packet();
1443                 append_char('1');
1444                 finish_packet();
1445         } else if (command_equals(data, len, "qC")) {
1446                 start_packet();
1447                 append_string("QC");
1448                 append_integer(cur_vcpu + 1);
1449                 finish_packet();
1450         } else if (command_equals(data, len, "qfThreadInfo")) {
1451                 cpuset_t mask;
1452                 bool first;
1453                 int vcpu;
1454
1455                 if (CPU_EMPTY(&vcpus_active)) {
1456                         send_error(EINVAL);
1457                         return;
1458                 }
1459                 mask = vcpus_active;
1460                 start_packet();
1461                 append_char('m');
1462                 first = true;
1463                 while (!CPU_EMPTY(&mask)) {
1464                         vcpu = CPU_FFS(&mask) - 1;
1465                         CPU_CLR(vcpu, &mask);
1466                         if (first)
1467                                 first = false;
1468                         else
1469                                 append_char(',');
1470                         append_integer(vcpu + 1);
1471                 }
1472                 finish_packet();
1473         } else if (command_equals(data, len, "qsThreadInfo")) {
1474                 start_packet();
1475                 append_char('l');
1476                 finish_packet();
1477         } else if (command_equals(data, len, "qSupported")) {
1478                 data += strlen("qSupported");
1479                 len -= strlen("qSupported");
1480                 check_features(data, len);
1481         } else if (command_equals(data, len, "qThreadExtraInfo")) {
1482                 char buf[16];
1483                 int tid;
1484
1485                 data += strlen("qThreadExtraInfo");
1486                 len -= strlen("qThreadExtraInfo");
1487                 if (*data != ',') {
1488                         send_error(EINVAL);
1489                         return;
1490                 }
1491                 tid = parse_threadid(data + 1, len - 1);
1492                 if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1493                         send_error(EINVAL);
1494                         return;
1495                 }
1496
1497                 snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1498                 start_packet();
1499                 append_asciihex(buf);
1500                 finish_packet();
1501         } else
1502                 send_empty_response();
1503 }
1504
1505 static void
1506 handle_command(const uint8_t *data, size_t len)
1507 {
1508
1509         /* Reject packets with a sequence-id. */
1510         if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1511             data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1512                 send_empty_response();
1513                 return;
1514         }
1515
1516         switch (*data) {
1517         case 'c':
1518                 if (len != 1) {
1519                         send_error(EINVAL);
1520                         break;
1521                 }
1522
1523                 discard_stop();
1524                 gdb_resume_vcpus();
1525                 break;
1526         case 'D':
1527                 send_ok();
1528
1529                 /* TODO: Resume any stopped CPUs. */
1530                 break;
1531         case 'g': {
1532                 gdb_read_regs();
1533                 break;
1534         }
1535         case 'H': {
1536                 int tid;
1537
1538                 if (data[1] != 'g' && data[1] != 'c') {
1539                         send_error(EINVAL);
1540                         break;
1541                 }
1542                 tid = parse_threadid(data + 2, len - 2);
1543                 if (tid == -2) {
1544                         send_error(EINVAL);
1545                         break;
1546                 }
1547
1548                 if (CPU_EMPTY(&vcpus_active)) {
1549                         send_error(EINVAL);
1550                         break;
1551                 }
1552                 if (tid == -1 || tid == 0)
1553                         cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1554                 else if (CPU_ISSET(tid - 1, &vcpus_active))
1555                         cur_vcpu = tid - 1;
1556                 else {
1557                         send_error(EINVAL);
1558                         break;
1559                 }
1560                 send_ok();
1561                 break;
1562         }
1563         case 'm':
1564                 gdb_read_mem(data, len);
1565                 break;
1566         case 'M':
1567                 gdb_write_mem(data, len);
1568                 break;
1569         case 'T': {
1570                 int tid;
1571
1572                 tid = parse_threadid(data + 1, len - 1);
1573                 if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1574                         send_error(EINVAL);
1575                         return;
1576                 }
1577                 send_ok();
1578                 break;
1579         }
1580         case 'q':
1581                 gdb_query(data, len);
1582                 break;
1583         case 's':
1584                 if (len != 1) {
1585                         send_error(EINVAL);
1586                         break;
1587                 }
1588
1589                 /* Don't send a reply until a stop occurs. */
1590                 if (!gdb_step_vcpu(cur_vcpu)) {
1591                         send_error(EOPNOTSUPP);
1592                         break;
1593                 }
1594                 break;
1595         case 'z':
1596         case 'Z':
1597                 parse_breakpoint(data, len);
1598                 break;
1599         case '?':
1600                 report_stop(false);
1601                 break;
1602         case 'G': /* TODO */
1603         case 'v':
1604                 /* Handle 'vCont' */
1605                 /* 'vCtrlC' */
1606         case 'p': /* TODO */
1607         case 'P': /* TODO */
1608         case 'Q': /* TODO */
1609         case 't': /* TODO */
1610         case 'X': /* TODO */
1611         default:
1612                 send_empty_response();
1613         }
1614 }
1615
1616 /* Check for a valid packet in the command buffer. */
1617 static void
1618 check_command(int fd)
1619 {
1620         uint8_t *head, *hash, *p, sum;
1621         size_t avail, plen;
1622
1623         for (;;) {
1624                 avail = cur_comm.len;
1625                 if (avail == 0)
1626                         return;
1627                 head = io_buffer_head(&cur_comm);
1628                 switch (*head) {
1629                 case 0x03:
1630                         debug("<- Ctrl-C\n");
1631                         io_buffer_consume(&cur_comm, 1);
1632
1633                         gdb_suspend_vcpus();
1634                         break;
1635                 case '+':
1636                         /* ACK of previous response. */
1637                         debug("<- +\n");
1638                         if (response_pending())
1639                                 io_buffer_reset(&cur_resp);
1640                         io_buffer_consume(&cur_comm, 1);
1641                         if (stopped_vcpu != -1 && report_next_stop) {
1642                                 report_stop(true);
1643                                 send_pending_data(fd);
1644                         }
1645                         break;
1646                 case '-':
1647                         /* NACK of previous response. */
1648                         debug("<- -\n");
1649                         if (response_pending()) {
1650                                 cur_resp.len += cur_resp.start;
1651                                 cur_resp.start = 0;
1652                                 if (cur_resp.data[0] == '+')
1653                                         io_buffer_advance(&cur_resp, 1);
1654                                 debug("-> %.*s\n", (int)cur_resp.len,
1655                                     io_buffer_head(&cur_resp));
1656                         }
1657                         io_buffer_consume(&cur_comm, 1);
1658                         send_pending_data(fd);
1659                         break;
1660                 case '$':
1661                         /* Packet. */
1662
1663                         if (response_pending()) {
1664                                 warnx("New GDB command while response in "
1665                                     "progress");
1666                                 io_buffer_reset(&cur_resp);
1667                         }
1668
1669                         /* Is packet complete? */
1670                         hash = memchr(head, '#', avail);
1671                         if (hash == NULL)
1672                                 return;
1673                         plen = (hash - head + 1) + 2;
1674                         if (avail < plen)
1675                                 return;
1676                         debug("<- %.*s\n", (int)plen, head);
1677
1678                         /* Verify checksum. */
1679                         for (sum = 0, p = head + 1; p < hash; p++)
1680                                 sum += *p;
1681                         if (sum != parse_byte(hash + 1)) {
1682                                 io_buffer_consume(&cur_comm, plen);
1683                                 debug("-> -\n");
1684                                 send_char('-');
1685                                 send_pending_data(fd);
1686                                 break;
1687                         }
1688                         send_char('+');
1689
1690                         handle_command(head + 1, hash - (head + 1));
1691                         io_buffer_consume(&cur_comm, plen);
1692                         if (!response_pending())
1693                                 debug("-> +\n");
1694                         send_pending_data(fd);
1695                         break;
1696                 default:
1697                         /* XXX: Possibly drop connection instead. */
1698                         debug("-> %02x\n", *head);
1699                         io_buffer_consume(&cur_comm, 1);
1700                         break;
1701                 }
1702         }
1703 }
1704
1705 static void
1706 gdb_readable(int fd, enum ev_type event __unused, void *arg __unused)
1707 {
1708         size_t pending;
1709         ssize_t nread;
1710         int n;
1711
1712         if (ioctl(fd, FIONREAD, &n) == -1) {
1713                 warn("FIONREAD on GDB socket");
1714                 return;
1715         }
1716         assert(n >= 0);
1717         pending = n;
1718
1719         /*
1720          * 'pending' might be zero due to EOF.  We need to call read
1721          * with a non-zero length to detect EOF.
1722          */
1723         if (pending == 0)
1724                 pending = 1;
1725
1726         /* Ensure there is room in the command buffer. */
1727         io_buffer_grow(&cur_comm, pending);
1728         assert(io_buffer_avail(&cur_comm) >= pending);
1729
1730         nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1731         if (nread == 0) {
1732                 close_connection();
1733         } else if (nread == -1) {
1734                 if (errno == EAGAIN)
1735                         return;
1736
1737                 warn("Read from GDB socket");
1738                 close_connection();
1739         } else {
1740                 cur_comm.len += nread;
1741                 pthread_mutex_lock(&gdb_lock);
1742                 check_command(fd);
1743                 pthread_mutex_unlock(&gdb_lock);
1744         }
1745 }
1746
1747 static void
1748 gdb_writable(int fd, enum ev_type event __unused, void *arg __unused)
1749 {
1750
1751         send_pending_data(fd);
1752 }
1753
1754 static void
1755 new_connection(int fd, enum ev_type event __unused, void *arg)
1756 {
1757         int optval, s;
1758
1759         s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1760         if (s == -1) {
1761                 if (arg != NULL)
1762                         err(1, "Failed accepting initial GDB connection");
1763
1764                 /* Silently ignore errors post-startup. */
1765                 return;
1766         }
1767
1768         optval = 1;
1769         if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1770             -1) {
1771                 warn("Failed to disable SIGPIPE for GDB connection");
1772                 close(s);
1773                 return;
1774         }
1775
1776         pthread_mutex_lock(&gdb_lock);
1777         if (cur_fd != -1) {
1778                 close(s);
1779                 warnx("Ignoring additional GDB connection.");
1780         }
1781
1782         read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1783         if (read_event == NULL) {
1784                 if (arg != NULL)
1785                         err(1, "Failed to setup initial GDB connection");
1786                 pthread_mutex_unlock(&gdb_lock);
1787                 return;
1788         }
1789         write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1790         if (write_event == NULL) {
1791                 if (arg != NULL)
1792                         err(1, "Failed to setup initial GDB connection");
1793                 mevent_delete_close(read_event);
1794                 read_event = NULL;
1795         }
1796
1797         cur_fd = s;
1798         cur_vcpu = 0;
1799         stopped_vcpu = -1;
1800
1801         /* Break on attach. */
1802         first_stop = true;
1803         report_next_stop = false;
1804         gdb_suspend_vcpus();
1805         pthread_mutex_unlock(&gdb_lock);
1806 }
1807
1808 #ifndef WITHOUT_CAPSICUM
1809 static void
1810 limit_gdb_socket(int s)
1811 {
1812         cap_rights_t rights;
1813         unsigned long ioctls[] = { FIONREAD };
1814
1815         cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1816             CAP_SETSOCKOPT, CAP_IOCTL);
1817         if (caph_rights_limit(s, &rights) == -1)
1818                 errx(EX_OSERR, "Unable to apply rights for sandbox");
1819         if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1820                 errx(EX_OSERR, "Unable to apply rights for sandbox");
1821 }
1822 #endif
1823
1824 void
1825 init_gdb(struct vmctx *_ctx)
1826 {
1827         int error, flags, optval, s;
1828         struct addrinfo hints;
1829         struct addrinfo *gdbaddr;
1830         const char *saddr, *value;
1831         char *sport;
1832         bool wait;
1833
1834         value = get_config_value("gdb.port");
1835         if (value == NULL)
1836                 return;
1837         sport = strdup(value);
1838         if (sport == NULL)
1839                 errx(4, "Failed to allocate memory");
1840
1841         wait = get_config_bool_default("gdb.wait", false);
1842
1843         saddr = get_config_value("gdb.address");
1844         if (saddr == NULL) {
1845                 saddr = "localhost";
1846         }
1847
1848         debug("==> starting on %s:%s, %swaiting\n",
1849             saddr, sport, wait ? "" : "not ");
1850
1851         error = pthread_mutex_init(&gdb_lock, NULL);
1852         if (error != 0)
1853                 errc(1, error, "gdb mutex init");
1854         error = pthread_cond_init(&idle_vcpus, NULL);
1855         if (error != 0)
1856                 errc(1, error, "gdb cv init");
1857
1858         memset(&hints, 0, sizeof(hints));
1859         hints.ai_family = AF_UNSPEC;
1860         hints.ai_socktype = SOCK_STREAM;
1861         hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
1862
1863         error = getaddrinfo(saddr, sport, &hints, &gdbaddr);
1864         if (error != 0)
1865                 errx(1, "gdb address resolution: %s", gai_strerror(error));
1866
1867         ctx = _ctx;
1868         s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
1869         if (s < 0)
1870                 err(1, "gdb socket create");
1871
1872         optval = 1;
1873         (void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1874
1875         if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
1876                 err(1, "gdb socket bind");
1877
1878         if (listen(s, 1) < 0)
1879                 err(1, "gdb socket listen");
1880
1881         stopped_vcpu = -1;
1882         TAILQ_INIT(&breakpoints);
1883         vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1884         if (wait) {
1885                 /*
1886                  * Set vcpu 0 in vcpus_suspended.  This will trigger the
1887                  * logic in gdb_cpu_add() to suspend the first vcpu before
1888                  * it starts execution.  The vcpu will remain suspended
1889                  * until a debugger connects.
1890                  */
1891                 CPU_SET(0, &vcpus_suspended);
1892                 stopped_vcpu = 0;
1893         }
1894
1895         flags = fcntl(s, F_GETFL);
1896         if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1897                 err(1, "Failed to mark gdb socket non-blocking");
1898
1899 #ifndef WITHOUT_CAPSICUM
1900         limit_gdb_socket(s);
1901 #endif
1902         mevent_add(s, EVF_READ, new_connection, NULL);
1903         gdb_active = true;
1904         freeaddrinfo(gdbaddr);
1905         free(sport);
1906 }