]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/bhyve/gdb.c
bhyve: Set SO_REUSEADDR on the gdb stub socket
[FreeBSD/FreeBSD.git] / usr.sbin / bhyve / gdb.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/endian.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/queue.h>
39 #include <sys/socket.h>
40 #include <machine/atomic.h>
41 #include <machine/specialreg.h>
42 #include <machine/vmm.h>
43 #include <netinet/in.h>
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <pthread.h>
52 #include <pthread_np.h>
53 #include <stdbool.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <sysexits.h>
58 #include <unistd.h>
59 #include <vmmapi.h>
60
61 #include "bhyverun.h"
62 #include "gdb.h"
63 #include "mem.h"
64 #include "mevent.h"
65
66 /*
67  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
68  * use SIGTRAP.
69  */
70 #define GDB_SIGNAL_TRAP         5
71
72 static void gdb_resume_vcpus(void);
73 static void check_command(int fd);
74
75 static struct mevent *read_event, *write_event;
76
77 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
78 static pthread_mutex_t gdb_lock;
79 static pthread_cond_t idle_vcpus;
80 static bool first_stop, report_next_stop, swbreak_enabled;
81
82 /*
83  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
84  * read buffer, 'start' is unused and 'len' contains the number of
85  * valid bytes in the buffer.  For a write buffer, 'start' is set to
86  * the index of the next byte in 'data' to send, and 'len' contains
87  * the remaining number of valid bytes to send.
88  */
89 struct io_buffer {
90         uint8_t *data;
91         size_t capacity;
92         size_t start;
93         size_t len;
94 };
95
96 struct breakpoint {
97         uint64_t gpa;
98         uint8_t shadow_inst;
99         TAILQ_ENTRY(breakpoint) link;
100 };
101
102 /*
103  * When a vCPU stops to due to an event that should be reported to the
104  * debugger, information about the event is stored in this structure.
105  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
106  * and stops other vCPUs so the event can be reported.  The
107  * report_stop() function reports the event for the 'stopped_vcpu'
108  * vCPU.  When the debugger resumes execution via continue or step,
109  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
110  * event handlers until the associated event is reported or disabled.
111  *
112  * An idle vCPU will have all of the boolean fields set to false.
113  *
114  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
115  * released to execute the stepped instruction.  When the vCPU reports
116  * the stepping trap, 'stepped' is set.
117  *
118  * When a vCPU hits a breakpoint set by the debug server,
119  * 'hit_swbreak' is set to true.
120  */
121 struct vcpu_state {
122         bool stepping;
123         bool stepped;
124         bool hit_swbreak;
125 };
126
127 static struct io_buffer cur_comm, cur_resp;
128 static uint8_t cur_csum;
129 static struct vmctx *ctx;
130 static int cur_fd = -1;
131 static TAILQ_HEAD(, breakpoint) breakpoints;
132 static struct vcpu_state *vcpu_state;
133 static int cur_vcpu, stopped_vcpu;
134
135 const int gdb_regset[] = {
136         VM_REG_GUEST_RAX,
137         VM_REG_GUEST_RBX,
138         VM_REG_GUEST_RCX,
139         VM_REG_GUEST_RDX,
140         VM_REG_GUEST_RSI,
141         VM_REG_GUEST_RDI,
142         VM_REG_GUEST_RBP,
143         VM_REG_GUEST_RSP,
144         VM_REG_GUEST_R8,
145         VM_REG_GUEST_R9,
146         VM_REG_GUEST_R10,
147         VM_REG_GUEST_R11,
148         VM_REG_GUEST_R12,
149         VM_REG_GUEST_R13,
150         VM_REG_GUEST_R14,
151         VM_REG_GUEST_R15,
152         VM_REG_GUEST_RIP,
153         VM_REG_GUEST_RFLAGS,
154         VM_REG_GUEST_CS,
155         VM_REG_GUEST_SS,
156         VM_REG_GUEST_DS,
157         VM_REG_GUEST_ES,
158         VM_REG_GUEST_FS,
159         VM_REG_GUEST_GS
160 };
161
162 const int gdb_regsize[] = {
163         8,
164         8,
165         8,
166         8,
167         8,
168         8,
169         8,
170         8,
171         8,
172         8,
173         8,
174         8,
175         8,
176         8,
177         8,
178         8,
179         8,
180         4,
181         4,
182         4,
183         4,
184         4,
185         4,
186         4
187 };
188
189 #ifdef GDB_LOG
190 #include <stdarg.h>
191 #include <stdio.h>
192
193 static void __printflike(1, 2)
194 debug(const char *fmt, ...)
195 {
196         static FILE *logfile;
197         va_list ap;
198
199         if (logfile == NULL) {
200                 logfile = fopen("/tmp/bhyve_gdb.log", "w");
201                 if (logfile == NULL)
202                         return;
203 #ifndef WITHOUT_CAPSICUM
204                 if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
205                         fclose(logfile);
206                         logfile = NULL;
207                         return;
208                 }
209 #endif
210                 setlinebuf(logfile);
211         }
212         va_start(ap, fmt);
213         vfprintf(logfile, fmt, ap);
214         va_end(ap);
215 }
216 #else
217 #define debug(...)
218 #endif
219
220 static void     remove_all_sw_breakpoints(void);
221
222 static int
223 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
224 {
225         uint64_t regs[4];
226         const int regset[4] = {
227                 VM_REG_GUEST_CR0,
228                 VM_REG_GUEST_CR3,
229                 VM_REG_GUEST_CR4,
230                 VM_REG_GUEST_EFER
231         };
232
233         if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
234                 return (-1);
235
236         /*
237          * For the debugger, always pretend to be the kernel (CPL 0),
238          * and if long-mode is enabled, always parse addresses as if
239          * in 64-bit mode.
240          */
241         paging->cr3 = regs[1];
242         paging->cpl = 0;
243         if (regs[3] & EFER_LMA)
244                 paging->cpu_mode = CPU_MODE_64BIT;
245         else if (regs[0] & CR0_PE)
246                 paging->cpu_mode = CPU_MODE_PROTECTED;
247         else
248                 paging->cpu_mode = CPU_MODE_REAL;
249         if (!(regs[0] & CR0_PG))
250                 paging->paging_mode = PAGING_MODE_FLAT;
251         else if (!(regs[2] & CR4_PAE))
252                 paging->paging_mode = PAGING_MODE_32;
253         else if (regs[3] & EFER_LME)
254                 paging->paging_mode = PAGING_MODE_64;
255         else
256                 paging->paging_mode = PAGING_MODE_PAE;
257         return (0);
258 }
259
260 /*
261  * Map a guest virtual address to a physical address (for a given vcpu).
262  * If a guest virtual address is valid, return 1.  If the address is
263  * not valid, return 0.  If an error occurs obtaining the mapping,
264  * return -1.
265  */
266 static int
267 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
268 {
269         struct vm_guest_paging paging;
270         int fault;
271
272         if (guest_paging_info(vcpu, &paging) == -1)
273                 return (-1);
274
275         /*
276          * Always use PROT_READ.  We really care if the VA is
277          * accessible, not if the current vCPU can write.
278          */
279         if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
280             &fault) == -1)
281                 return (-1);
282         if (fault)
283                 return (0);
284         return (1);
285 }
286
287 static void
288 io_buffer_reset(struct io_buffer *io)
289 {
290
291         io->start = 0;
292         io->len = 0;
293 }
294
295 /* Available room for adding data. */
296 static size_t
297 io_buffer_avail(struct io_buffer *io)
298 {
299
300         return (io->capacity - (io->start + io->len));
301 }
302
303 static uint8_t *
304 io_buffer_head(struct io_buffer *io)
305 {
306
307         return (io->data + io->start);
308 }
309
310 static uint8_t *
311 io_buffer_tail(struct io_buffer *io)
312 {
313
314         return (io->data + io->start + io->len);
315 }
316
317 static void
318 io_buffer_advance(struct io_buffer *io, size_t amount)
319 {
320
321         assert(amount <= io->len);
322         io->start += amount;
323         io->len -= amount;
324 }
325
326 static void
327 io_buffer_consume(struct io_buffer *io, size_t amount)
328 {
329
330         io_buffer_advance(io, amount);
331         if (io->len == 0) {
332                 io->start = 0;
333                 return;
334         }
335
336         /*
337          * XXX: Consider making this move optional and compacting on a
338          * future read() before realloc().
339          */
340         memmove(io->data, io_buffer_head(io), io->len);
341         io->start = 0;
342 }
343
344 static void
345 io_buffer_grow(struct io_buffer *io, size_t newsize)
346 {
347         uint8_t *new_data;
348         size_t avail, new_cap;
349
350         avail = io_buffer_avail(io);
351         if (newsize <= avail)
352                 return;
353
354         new_cap = io->capacity + (newsize - avail);
355         new_data = realloc(io->data, new_cap);
356         if (new_data == NULL)
357                 err(1, "Failed to grow GDB I/O buffer");
358         io->data = new_data;
359         io->capacity = new_cap;
360 }
361
362 static bool
363 response_pending(void)
364 {
365
366         if (cur_resp.start == 0 && cur_resp.len == 0)
367                 return (false);
368         if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
369                 return (false);
370         return (true);
371 }
372
373 static void
374 close_connection(void)
375 {
376
377         /*
378          * XXX: This triggers a warning because mevent does the close
379          * before the EV_DELETE.
380          */
381         pthread_mutex_lock(&gdb_lock);
382         mevent_delete(write_event);
383         mevent_delete_close(read_event);
384         write_event = NULL;
385         read_event = NULL;
386         io_buffer_reset(&cur_comm);
387         io_buffer_reset(&cur_resp);
388         cur_fd = -1;
389
390         remove_all_sw_breakpoints();
391
392         /* Clear any pending events. */
393         memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
394
395         /* Resume any stopped vCPUs. */
396         gdb_resume_vcpus();
397         pthread_mutex_unlock(&gdb_lock);
398 }
399
400 static uint8_t
401 hex_digit(uint8_t nibble)
402 {
403
404         if (nibble <= 9)
405                 return (nibble + '0');
406         else
407                 return (nibble + 'a' - 10);
408 }
409
410 static uint8_t
411 parse_digit(uint8_t v)
412 {
413
414         if (v >= '0' && v <= '9')
415                 return (v - '0');
416         if (v >= 'a' && v <= 'f')
417                 return (v - 'a' + 10);
418         if (v >= 'A' && v <= 'F')
419                 return (v - 'A' + 10);
420         return (0xF);
421 }
422
423 /* Parses big-endian hexadecimal. */
424 static uintmax_t
425 parse_integer(const uint8_t *p, size_t len)
426 {
427         uintmax_t v;
428
429         v = 0;
430         while (len > 0) {
431                 v <<= 4;
432                 v |= parse_digit(*p);
433                 p++;
434                 len--;
435         }
436         return (v);
437 }
438
439 static uint8_t
440 parse_byte(const uint8_t *p)
441 {
442
443         return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
444 }
445
446 static void
447 send_pending_data(int fd)
448 {
449         ssize_t nwritten;
450
451         if (cur_resp.len == 0) {
452                 mevent_disable(write_event);
453                 return;
454         }
455         nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
456         if (nwritten == -1) {
457                 warn("Write to GDB socket failed");
458                 close_connection();
459         } else {
460                 io_buffer_advance(&cur_resp, nwritten);
461                 if (cur_resp.len == 0)
462                         mevent_disable(write_event);
463                 else
464                         mevent_enable(write_event);
465         }
466 }
467
468 /* Append a single character to the output buffer. */
469 static void
470 send_char(uint8_t data)
471 {
472         io_buffer_grow(&cur_resp, 1);
473         *io_buffer_tail(&cur_resp) = data;
474         cur_resp.len++;
475 }
476
477 /* Append an array of bytes to the output buffer. */
478 static void
479 send_data(const uint8_t *data, size_t len)
480 {
481
482         io_buffer_grow(&cur_resp, len);
483         memcpy(io_buffer_tail(&cur_resp), data, len);
484         cur_resp.len += len;
485 }
486
487 static void
488 format_byte(uint8_t v, uint8_t *buf)
489 {
490
491         buf[0] = hex_digit(v >> 4);
492         buf[1] = hex_digit(v & 0xf);
493 }
494
495 /*
496  * Append a single byte (formatted as two hex characters) to the
497  * output buffer.
498  */
499 static void
500 send_byte(uint8_t v)
501 {
502         uint8_t buf[2];
503
504         format_byte(v, buf);
505         send_data(buf, sizeof(buf));
506 }
507
508 static void
509 start_packet(void)
510 {
511
512         send_char('$');
513         cur_csum = 0;
514 }
515
516 static void
517 finish_packet(void)
518 {
519
520         send_char('#');
521         send_byte(cur_csum);
522         debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
523 }
524
525 /*
526  * Append a single character (for the packet payload) and update the
527  * checksum.
528  */
529 static void
530 append_char(uint8_t v)
531 {
532
533         send_char(v);
534         cur_csum += v;
535 }
536
537 /*
538  * Append an array of bytes (for the packet payload) and update the
539  * checksum.
540  */
541 static void
542 append_packet_data(const uint8_t *data, size_t len)
543 {
544
545         send_data(data, len);
546         while (len > 0) {
547                 cur_csum += *data;
548                 data++;
549                 len--;
550         }
551 }
552
553 static void
554 append_string(const char *str)
555 {
556
557         append_packet_data(str, strlen(str));
558 }
559
560 static void
561 append_byte(uint8_t v)
562 {
563         uint8_t buf[2];
564
565         format_byte(v, buf);
566         append_packet_data(buf, sizeof(buf));
567 }
568
569 static void
570 append_unsigned_native(uintmax_t value, size_t len)
571 {
572         size_t i;
573
574         for (i = 0; i < len; i++) {
575                 append_byte(value);
576                 value >>= 8;
577         }
578 }
579
580 static void
581 append_unsigned_be(uintmax_t value, size_t len)
582 {
583         char buf[len * 2];
584         size_t i;
585
586         for (i = 0; i < len; i++) {
587                 format_byte(value, buf + (len - i - 1) * 2);
588                 value >>= 8;
589         }
590         append_packet_data(buf, sizeof(buf));
591 }
592
593 static void
594 append_integer(unsigned int value)
595 {
596
597         if (value == 0)
598                 append_char('0');
599         else
600                 append_unsigned_be(value, (fls(value) + 7) / 8);
601 }
602
603 static void
604 append_asciihex(const char *str)
605 {
606
607         while (*str != '\0') {
608                 append_byte(*str);
609                 str++;
610         }
611 }
612
613 static void
614 send_empty_response(void)
615 {
616
617         start_packet();
618         finish_packet();
619 }
620
621 static void
622 send_error(int error)
623 {
624
625         start_packet();
626         append_char('E');
627         append_byte(error);
628         finish_packet();
629 }
630
631 static void
632 send_ok(void)
633 {
634
635         start_packet();
636         append_string("OK");
637         finish_packet();
638 }
639
640 static int
641 parse_threadid(const uint8_t *data, size_t len)
642 {
643
644         if (len == 1 && *data == '0')
645                 return (0);
646         if (len == 2 && memcmp(data, "-1", 2) == 0)
647                 return (-1);
648         if (len == 0)
649                 return (-2);
650         return (parse_integer(data, len));
651 }
652
653 /*
654  * Report the current stop event to the debugger.  If the stop is due
655  * to an event triggered on a specific vCPU such as a breakpoint or
656  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
657  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
658  * the reporting vCPU for vCPU events.
659  */
660 static void
661 report_stop(bool set_cur_vcpu)
662 {
663         struct vcpu_state *vs;
664
665         start_packet();
666         if (stopped_vcpu == -1) {
667                 append_char('S');
668                 append_byte(GDB_SIGNAL_TRAP);
669         } else {
670                 vs = &vcpu_state[stopped_vcpu];
671                 if (set_cur_vcpu)
672                         cur_vcpu = stopped_vcpu;
673                 append_char('T');
674                 append_byte(GDB_SIGNAL_TRAP);
675                 append_string("thread:");
676                 append_integer(stopped_vcpu + 1);
677                 append_char(';');
678                 if (vs->hit_swbreak) {
679                         debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
680                         if (swbreak_enabled)
681                                 append_string("swbreak:;");
682                 } else if (vs->stepped)
683                         debug("$vCPU %d reporting step\n", stopped_vcpu);
684                 else
685                         debug("$vCPU %d reporting ???\n", stopped_vcpu);
686         }
687         finish_packet();
688         report_next_stop = false;
689 }
690
691 /*
692  * If this stop is due to a vCPU event, clear that event to mark it as
693  * acknowledged.
694  */
695 static void
696 discard_stop(void)
697 {
698         struct vcpu_state *vs;
699
700         if (stopped_vcpu != -1) {
701                 vs = &vcpu_state[stopped_vcpu];
702                 vs->hit_swbreak = false;
703                 vs->stepped = false;
704                 stopped_vcpu = -1;
705         }
706         report_next_stop = true;
707 }
708
709 static void
710 gdb_finish_suspend_vcpus(void)
711 {
712
713         if (first_stop) {
714                 first_stop = false;
715                 stopped_vcpu = -1;
716         } else if (report_next_stop) {
717                 assert(!response_pending());
718                 report_stop(true);
719                 send_pending_data(cur_fd);
720         }
721 }
722
723 /*
724  * vCPU threads invoke this function whenever the vCPU enters the
725  * debug server to pause or report an event.  vCPU threads wait here
726  * as long as the debug server keeps them suspended.
727  */
728 static void
729 _gdb_cpu_suspend(int vcpu, bool report_stop)
730 {
731
732         debug("$vCPU %d suspending\n", vcpu);
733         CPU_SET(vcpu, &vcpus_waiting);
734         if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
735                 gdb_finish_suspend_vcpus();
736         while (CPU_ISSET(vcpu, &vcpus_suspended))
737                 pthread_cond_wait(&idle_vcpus, &gdb_lock);
738         CPU_CLR(vcpu, &vcpus_waiting);
739         debug("$vCPU %d resuming\n", vcpu);
740 }
741
742 /*
743  * Invoked at the start of a vCPU thread's execution to inform the
744  * debug server about the new thread.
745  */
746 void
747 gdb_cpu_add(int vcpu)
748 {
749
750         debug("$vCPU %d starting\n", vcpu);
751         pthread_mutex_lock(&gdb_lock);
752         assert(vcpu < guest_ncpus);
753         CPU_SET(vcpu, &vcpus_active);
754         if (!TAILQ_EMPTY(&breakpoints)) {
755                 vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1);
756                 debug("$vCPU %d enabled breakpoint exits\n", vcpu);
757         }
758
759         /*
760          * If a vcpu is added while vcpus are stopped, suspend the new
761          * vcpu so that it will pop back out with a debug exit before
762          * executing the first instruction.
763          */
764         if (!CPU_EMPTY(&vcpus_suspended)) {
765                 CPU_SET(vcpu, &vcpus_suspended);
766                 _gdb_cpu_suspend(vcpu, false);
767         }
768         pthread_mutex_unlock(&gdb_lock);
769 }
770
771 /*
772  * Invoked by vCPU before resuming execution.  This enables stepping
773  * if the vCPU is marked as stepping.
774  */
775 static void
776 gdb_cpu_resume(int vcpu)
777 {
778         struct vcpu_state *vs;
779         int error;
780
781         vs = &vcpu_state[vcpu];
782
783         /*
784          * Any pending event should already be reported before
785          * resuming.
786          */
787         assert(vs->hit_swbreak == false);
788         assert(vs->stepped == false);
789         if (vs->stepping) {
790                 error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
791                 assert(error == 0);
792         }
793 }
794
795 /*
796  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
797  * has been suspended due to an event on different vCPU or in response
798  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
799  */
800 void
801 gdb_cpu_suspend(int vcpu)
802 {
803
804         pthread_mutex_lock(&gdb_lock);
805         _gdb_cpu_suspend(vcpu, true);
806         gdb_cpu_resume(vcpu);
807         pthread_mutex_unlock(&gdb_lock);
808 }
809
810 static void
811 gdb_suspend_vcpus(void)
812 {
813
814         assert(pthread_mutex_isowned_np(&gdb_lock));
815         debug("suspending all CPUs\n");
816         vcpus_suspended = vcpus_active;
817         vm_suspend_cpu(ctx, -1);
818         if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
819                 gdb_finish_suspend_vcpus();
820 }
821
822 /*
823  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
824  * the VT-x-specific MTRAP exit.
825  */
826 void
827 gdb_cpu_mtrap(int vcpu)
828 {
829         struct vcpu_state *vs;
830
831         debug("$vCPU %d MTRAP\n", vcpu);
832         pthread_mutex_lock(&gdb_lock);
833         vs = &vcpu_state[vcpu];
834         if (vs->stepping) {
835                 vs->stepping = false;
836                 vs->stepped = true;
837                 vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
838                 while (vs->stepped) {
839                         if (stopped_vcpu == -1) {
840                                 debug("$vCPU %d reporting step\n", vcpu);
841                                 stopped_vcpu = vcpu;
842                                 gdb_suspend_vcpus();
843                         }
844                         _gdb_cpu_suspend(vcpu, true);
845                 }
846                 gdb_cpu_resume(vcpu);
847         }
848         pthread_mutex_unlock(&gdb_lock);
849 }
850
851 static struct breakpoint *
852 find_breakpoint(uint64_t gpa)
853 {
854         struct breakpoint *bp;
855
856         TAILQ_FOREACH(bp, &breakpoints, link) {
857                 if (bp->gpa == gpa)
858                         return (bp);
859         }
860         return (NULL);
861 }
862
863 void
864 gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit)
865 {
866         struct breakpoint *bp;
867         struct vcpu_state *vs;
868         uint64_t gpa;
869         int error;
870
871         pthread_mutex_lock(&gdb_lock);
872         error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
873         assert(error == 1);
874         bp = find_breakpoint(gpa);
875         if (bp != NULL) {
876                 vs = &vcpu_state[vcpu];
877                 assert(vs->stepping == false);
878                 assert(vs->stepped == false);
879                 assert(vs->hit_swbreak == false);
880                 vs->hit_swbreak = true;
881                 vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
882                 for (;;) {
883                         if (stopped_vcpu == -1) {
884                                 debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
885                                     vmexit->rip);
886                                 stopped_vcpu = vcpu;
887                                 gdb_suspend_vcpus();
888                         }
889                         _gdb_cpu_suspend(vcpu, true);
890                         if (!vs->hit_swbreak) {
891                                 /* Breakpoint reported. */
892                                 break;
893                         }
894                         bp = find_breakpoint(gpa);
895                         if (bp == NULL) {
896                                 /* Breakpoint was removed. */
897                                 vs->hit_swbreak = false;
898                                 break;
899                         }
900                 }
901                 gdb_cpu_resume(vcpu);
902         } else {
903                 debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu,
904                     vmexit->rip);
905                 error = vm_set_register(ctx, vcpu,
906                     VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length);
907                 assert(error == 0);
908                 error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0);
909                 assert(error == 0);
910         }
911         pthread_mutex_unlock(&gdb_lock);
912 }
913
914 static bool
915 gdb_step_vcpu(int vcpu)
916 {
917         int error, val;
918
919         debug("$vCPU %d step\n", vcpu);
920         error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
921         if (error < 0)
922                 return (false);
923
924         discard_stop();
925         vcpu_state[vcpu].stepping = true;
926         vm_resume_cpu(ctx, vcpu);
927         CPU_CLR(vcpu, &vcpus_suspended);
928         pthread_cond_broadcast(&idle_vcpus);
929         return (true);
930 }
931
932 static void
933 gdb_resume_vcpus(void)
934 {
935
936         assert(pthread_mutex_isowned_np(&gdb_lock));
937         vm_resume_cpu(ctx, -1);
938         debug("resuming all CPUs\n");
939         CPU_ZERO(&vcpus_suspended);
940         pthread_cond_broadcast(&idle_vcpus);
941 }
942
943 static void
944 gdb_read_regs(void)
945 {
946         uint64_t regvals[nitems(gdb_regset)];
947         int i;
948
949         if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
950             gdb_regset, regvals) == -1) {
951                 send_error(errno);
952                 return;
953         }
954         start_packet();
955         for (i = 0; i < nitems(regvals); i++)
956                 append_unsigned_native(regvals[i], gdb_regsize[i]);
957         finish_packet();
958 }
959
960 static void
961 gdb_read_mem(const uint8_t *data, size_t len)
962 {
963         uint64_t gpa, gva, val;
964         uint8_t *cp;
965         size_t resid, todo, bytes;
966         bool started;
967         int error;
968
969         /* Skip 'm' */
970         data += 1;
971         len -= 1;
972
973         /* Parse and consume address. */
974         cp = memchr(data, ',', len);
975         if (cp == NULL || cp == data) {
976                 send_error(EINVAL);
977                 return;
978         }
979         gva = parse_integer(data, cp - data);
980         len -= (cp - data) + 1;
981         data += (cp - data) + 1;
982
983         /* Parse length. */
984         resid = parse_integer(data, len);
985
986         started = false;
987         while (resid > 0) {
988                 error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
989                 if (error == -1) {
990                         if (started)
991                                 finish_packet();
992                         else
993                                 send_error(errno);
994                         return;
995                 }
996                 if (error == 0) {
997                         if (started)
998                                 finish_packet();
999                         else
1000                                 send_error(EFAULT);
1001                         return;
1002                 }
1003
1004                 /* Read bytes from current page. */
1005                 todo = getpagesize() - gpa % getpagesize();
1006                 if (todo > resid)
1007                         todo = resid;
1008
1009                 cp = paddr_guest2host(ctx, gpa, todo);
1010                 if (cp != NULL) {
1011                         /*
1012                          * If this page is guest RAM, read it a byte
1013                          * at a time.
1014                          */
1015                         if (!started) {
1016                                 start_packet();
1017                                 started = true;
1018                         }
1019                         while (todo > 0) {
1020                                 append_byte(*cp);
1021                                 cp++;
1022                                 gpa++;
1023                                 gva++;
1024                                 resid--;
1025                                 todo--;
1026                         }
1027                 } else {
1028                         /*
1029                          * If this page isn't guest RAM, try to handle
1030                          * it via MMIO.  For MMIO requests, use
1031                          * aligned reads of words when possible.
1032                          */
1033                         while (todo > 0) {
1034                                 if (gpa & 1 || todo == 1)
1035                                         bytes = 1;
1036                                 else if (gpa & 2 || todo == 2)
1037                                         bytes = 2;
1038                                 else
1039                                         bytes = 4;
1040                                 error = read_mem(ctx, cur_vcpu, gpa, &val,
1041                                     bytes);
1042                                 if (error == 0) {
1043                                         if (!started) {
1044                                                 start_packet();
1045                                                 started = true;
1046                                         }
1047                                         gpa += bytes;
1048                                         gva += bytes;
1049                                         resid -= bytes;
1050                                         todo -= bytes;
1051                                         while (bytes > 0) {
1052                                                 append_byte(val);
1053                                                 val >>= 8;
1054                                                 bytes--;
1055                                         }
1056                                 } else {
1057                                         if (started)
1058                                                 finish_packet();
1059                                         else
1060                                                 send_error(EFAULT);
1061                                         return;
1062                                 }
1063                         }
1064                 }
1065                 assert(resid == 0 || gpa % getpagesize() == 0);
1066         }
1067         if (!started)
1068                 start_packet();
1069         finish_packet();
1070 }
1071
1072 static void
1073 gdb_write_mem(const uint8_t *data, size_t len)
1074 {
1075         uint64_t gpa, gva, val;
1076         uint8_t *cp;
1077         size_t resid, todo, bytes;
1078         int error;
1079
1080         /* Skip 'M' */
1081         data += 1;
1082         len -= 1;
1083
1084         /* Parse and consume address. */
1085         cp = memchr(data, ',', len);
1086         if (cp == NULL || cp == data) {
1087                 send_error(EINVAL);
1088                 return;
1089         }
1090         gva = parse_integer(data, cp - data);
1091         len -= (cp - data) + 1;
1092         data += (cp - data) + 1;
1093
1094         /* Parse and consume length. */
1095         cp = memchr(data, ':', len);
1096         if (cp == NULL || cp == data) {
1097                 send_error(EINVAL);
1098                 return;
1099         }
1100         resid = parse_integer(data, cp - data);
1101         len -= (cp - data) + 1;
1102         data += (cp - data) + 1;
1103
1104         /* Verify the available bytes match the length. */
1105         if (len != resid * 2) {
1106                 send_error(EINVAL);
1107                 return;
1108         }
1109
1110         while (resid > 0) {
1111                 error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1112                 if (error == -1) {
1113                         send_error(errno);
1114                         return;
1115                 }
1116                 if (error == 0) {
1117                         send_error(EFAULT);
1118                         return;
1119                 }
1120
1121                 /* Write bytes to current page. */
1122                 todo = getpagesize() - gpa % getpagesize();
1123                 if (todo > resid)
1124                         todo = resid;
1125
1126                 cp = paddr_guest2host(ctx, gpa, todo);
1127                 if (cp != NULL) {
1128                         /*
1129                          * If this page is guest RAM, write it a byte
1130                          * at a time.
1131                          */
1132                         while (todo > 0) {
1133                                 assert(len >= 2);
1134                                 *cp = parse_byte(data);
1135                                 data += 2;
1136                                 len -= 2;
1137                                 cp++;
1138                                 gpa++;
1139                                 gva++;
1140                                 resid--;
1141                                 todo--;
1142                         }
1143                 } else {
1144                         /*
1145                          * If this page isn't guest RAM, try to handle
1146                          * it via MMIO.  For MMIO requests, use
1147                          * aligned writes of words when possible.
1148                          */
1149                         while (todo > 0) {
1150                                 if (gpa & 1 || todo == 1) {
1151                                         bytes = 1;
1152                                         val = parse_byte(data);
1153                                 } else if (gpa & 2 || todo == 2) {
1154                                         bytes = 2;
1155                                         val = be16toh(parse_integer(data, 4));
1156                                 } else {
1157                                         bytes = 4;
1158                                         val = be32toh(parse_integer(data, 8));
1159                                 }
1160                                 error = write_mem(ctx, cur_vcpu, gpa, val,
1161                                     bytes);
1162                                 if (error == 0) {
1163                                         gpa += bytes;
1164                                         gva += bytes;
1165                                         resid -= bytes;
1166                                         todo -= bytes;
1167                                         data += 2 * bytes;
1168                                         len -= 2 * bytes;
1169                                 } else {
1170                                         send_error(EFAULT);
1171                                         return;
1172                                 }
1173                         }
1174                 }
1175                 assert(resid == 0 || gpa % getpagesize() == 0);
1176         }
1177         assert(len == 0);
1178         send_ok();
1179 }
1180
1181 static bool
1182 set_breakpoint_caps(bool enable)
1183 {
1184         cpuset_t mask;
1185         int vcpu;
1186
1187         mask = vcpus_active;
1188         while (!CPU_EMPTY(&mask)) {
1189                 vcpu = CPU_FFS(&mask) - 1;
1190                 CPU_CLR(vcpu, &mask);
1191                 if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT,
1192                     enable ? 1 : 0) < 0)
1193                         return (false);
1194                 debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1195                     enable ? "en" : "dis");
1196         }
1197         return (true);
1198 }
1199
1200 static void
1201 remove_all_sw_breakpoints(void)
1202 {
1203         struct breakpoint *bp, *nbp;
1204         uint8_t *cp;
1205
1206         if (TAILQ_EMPTY(&breakpoints))
1207                 return;
1208
1209         TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1210                 debug("remove breakpoint at %#lx\n", bp->gpa);
1211                 cp = paddr_guest2host(ctx, bp->gpa, 1);
1212                 *cp = bp->shadow_inst;
1213                 TAILQ_REMOVE(&breakpoints, bp, link);
1214                 free(bp);
1215         }
1216         TAILQ_INIT(&breakpoints);
1217         set_breakpoint_caps(false);
1218 }
1219
1220 static void
1221 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1222 {
1223         struct breakpoint *bp;
1224         uint64_t gpa;
1225         uint8_t *cp;
1226         int error;
1227
1228         if (kind != 1) {
1229                 send_error(EINVAL);
1230                 return;
1231         }
1232
1233         error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1234         if (error == -1) {
1235                 send_error(errno);
1236                 return;
1237         }
1238         if (error == 0) {
1239                 send_error(EFAULT);
1240                 return;
1241         }
1242
1243         cp = paddr_guest2host(ctx, gpa, 1);
1244
1245         /* Only permit breakpoints in guest RAM. */
1246         if (cp == NULL) {
1247                 send_error(EFAULT);
1248                 return;
1249         }
1250
1251         /* Find any existing breakpoint. */
1252         bp = find_breakpoint(gpa);
1253
1254         /*
1255          * Silently ignore duplicate commands since the protocol
1256          * requires these packets to be idempotent.
1257          */
1258         if (insert) {
1259                 if (bp == NULL) {
1260                         if (TAILQ_EMPTY(&breakpoints) &&
1261                             !set_breakpoint_caps(true)) {
1262                                 send_empty_response();
1263                                 return;
1264                         }
1265                         bp = malloc(sizeof(*bp));
1266                         bp->gpa = gpa;
1267                         bp->shadow_inst = *cp;
1268                         *cp = 0xcc;     /* INT 3 */
1269                         TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1270                         debug("new breakpoint at %#lx\n", gpa);
1271                 }
1272         } else {
1273                 if (bp != NULL) {
1274                         debug("remove breakpoint at %#lx\n", gpa);
1275                         *cp = bp->shadow_inst;
1276                         TAILQ_REMOVE(&breakpoints, bp, link);
1277                         free(bp);
1278                         if (TAILQ_EMPTY(&breakpoints))
1279                                 set_breakpoint_caps(false);
1280                 }
1281         }
1282         send_ok();
1283 }
1284
1285 static void
1286 parse_breakpoint(const uint8_t *data, size_t len)
1287 {
1288         uint64_t gva;
1289         uint8_t *cp;
1290         bool insert;
1291         int kind, type;
1292
1293         insert = data[0] == 'Z';
1294
1295         /* Skip 'Z/z' */
1296         data += 1;
1297         len -= 1;
1298
1299         /* Parse and consume type. */
1300         cp = memchr(data, ',', len);
1301         if (cp == NULL || cp == data) {
1302                 send_error(EINVAL);
1303                 return;
1304         }
1305         type = parse_integer(data, cp - data);
1306         len -= (cp - data) + 1;
1307         data += (cp - data) + 1;
1308
1309         /* Parse and consume address. */
1310         cp = memchr(data, ',', len);
1311         if (cp == NULL || cp == data) {
1312                 send_error(EINVAL);
1313                 return;
1314         }
1315         gva = parse_integer(data, cp - data);
1316         len -= (cp - data) + 1;
1317         data += (cp - data) + 1;
1318
1319         /* Parse and consume kind. */
1320         cp = memchr(data, ';', len);
1321         if (cp == data) {
1322                 send_error(EINVAL);
1323                 return;
1324         }
1325         if (cp != NULL) {
1326                 /*
1327                  * We do not advertise support for either the
1328                  * ConditionalBreakpoints or BreakpointCommands
1329                  * features, so we should not be getting conditions or
1330                  * commands from the remote end.
1331                  */
1332                 send_empty_response();
1333                 return;
1334         }
1335         kind = parse_integer(data, len);
1336         data += len;
1337         len = 0;
1338
1339         switch (type) {
1340         case 0:
1341                 update_sw_breakpoint(gva, kind, insert);
1342                 break;
1343         default:
1344                 send_empty_response();
1345                 break;
1346         }
1347 }
1348
1349 static bool
1350 command_equals(const uint8_t *data, size_t len, const char *cmd)
1351 {
1352
1353         if (strlen(cmd) > len)
1354                 return (false);
1355         return (memcmp(data, cmd, strlen(cmd)) == 0);
1356 }
1357
1358 static void
1359 check_features(const uint8_t *data, size_t len)
1360 {
1361         char *feature, *next_feature, *str, *value;
1362         bool supported;
1363
1364         str = malloc(len + 1);
1365         memcpy(str, data, len);
1366         str[len] = '\0';
1367         next_feature = str;
1368
1369         while ((feature = strsep(&next_feature, ";")) != NULL) {
1370                 /*
1371                  * Null features shouldn't exist, but skip if they
1372                  * do.
1373                  */
1374                 if (strcmp(feature, "") == 0)
1375                         continue;
1376
1377                 /*
1378                  * Look for the value or supported / not supported
1379                  * flag.
1380                  */
1381                 value = strchr(feature, '=');
1382                 if (value != NULL) {
1383                         *value = '\0';
1384                         value++;
1385                         supported = true;
1386                 } else {
1387                         value = feature + strlen(feature) - 1;
1388                         switch (*value) {
1389                         case '+':
1390                                 supported = true;
1391                                 break;
1392                         case '-':
1393                                 supported = false;
1394                                 break;
1395                         default:
1396                                 /*
1397                                  * This is really a protocol error,
1398                                  * but we just ignore malformed
1399                                  * features for ease of
1400                                  * implementation.
1401                                  */
1402                                 continue;
1403                         }
1404                         value = NULL;
1405                 }
1406
1407                 if (strcmp(feature, "swbreak") == 0)
1408                         swbreak_enabled = supported;
1409         }
1410         free(str);
1411
1412         start_packet();
1413
1414         /* This is an arbitrary limit. */
1415         append_string("PacketSize=4096");
1416         append_string(";swbreak+");
1417         finish_packet();
1418 }
1419
1420 static void
1421 gdb_query(const uint8_t *data, size_t len)
1422 {
1423
1424         /*
1425          * TODO:
1426          * - qSearch
1427          */
1428         if (command_equals(data, len, "qAttached")) {
1429                 start_packet();
1430                 append_char('1');
1431                 finish_packet();
1432         } else if (command_equals(data, len, "qC")) {
1433                 start_packet();
1434                 append_string("QC");
1435                 append_integer(cur_vcpu + 1);
1436                 finish_packet();
1437         } else if (command_equals(data, len, "qfThreadInfo")) {
1438                 cpuset_t mask;
1439                 bool first;
1440                 int vcpu;
1441
1442                 if (CPU_EMPTY(&vcpus_active)) {
1443                         send_error(EINVAL);
1444                         return;
1445                 }
1446                 mask = vcpus_active;
1447                 start_packet();
1448                 append_char('m');
1449                 first = true;
1450                 while (!CPU_EMPTY(&mask)) {
1451                         vcpu = CPU_FFS(&mask) - 1;
1452                         CPU_CLR(vcpu, &mask);
1453                         if (first)
1454                                 first = false;
1455                         else
1456                                 append_char(',');
1457                         append_integer(vcpu + 1);
1458                 }
1459                 finish_packet();
1460         } else if (command_equals(data, len, "qsThreadInfo")) {
1461                 start_packet();
1462                 append_char('l');
1463                 finish_packet();
1464         } else if (command_equals(data, len, "qSupported")) {
1465                 data += strlen("qSupported");
1466                 len -= strlen("qSupported");
1467                 check_features(data, len);
1468         } else if (command_equals(data, len, "qThreadExtraInfo")) {
1469                 char buf[16];
1470                 int tid;
1471
1472                 data += strlen("qThreadExtraInfo");
1473                 len -= strlen("qThreadExtraInfo");
1474                 if (*data != ',') {
1475                         send_error(EINVAL);
1476                         return;
1477                 }
1478                 tid = parse_threadid(data + 1, len - 1);
1479                 if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1480                         send_error(EINVAL);
1481                         return;
1482                 }
1483
1484                 snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1485                 start_packet();
1486                 append_asciihex(buf);
1487                 finish_packet();
1488         } else
1489                 send_empty_response();
1490 }
1491
1492 static void
1493 handle_command(const uint8_t *data, size_t len)
1494 {
1495
1496         /* Reject packets with a sequence-id. */
1497         if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1498             data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1499                 send_empty_response();
1500                 return;
1501         }
1502
1503         switch (*data) {
1504         case 'c':
1505                 if (len != 1) {
1506                         send_error(EINVAL);
1507                         break;
1508                 }
1509
1510                 discard_stop();
1511                 gdb_resume_vcpus();
1512                 break;
1513         case 'D':
1514                 send_ok();
1515
1516                 /* TODO: Resume any stopped CPUs. */
1517                 break;
1518         case 'g': {
1519                 gdb_read_regs();
1520                 break;
1521         }
1522         case 'H': {
1523                 int tid;
1524
1525                 if (data[1] != 'g' && data[1] != 'c') {
1526                         send_error(EINVAL);
1527                         break;
1528                 }
1529                 tid = parse_threadid(data + 2, len - 2);
1530                 if (tid == -2) {
1531                         send_error(EINVAL);
1532                         break;
1533                 }
1534
1535                 if (CPU_EMPTY(&vcpus_active)) {
1536                         send_error(EINVAL);
1537                         break;
1538                 }
1539                 if (tid == -1 || tid == 0)
1540                         cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1541                 else if (CPU_ISSET(tid - 1, &vcpus_active))
1542                         cur_vcpu = tid - 1;
1543                 else {
1544                         send_error(EINVAL);
1545                         break;
1546                 }
1547                 send_ok();
1548                 break;
1549         }
1550         case 'm':
1551                 gdb_read_mem(data, len);
1552                 break;
1553         case 'M':
1554                 gdb_write_mem(data, len);
1555                 break;
1556         case 'T': {
1557                 int tid;
1558
1559                 tid = parse_threadid(data + 1, len - 1);
1560                 if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1561                         send_error(EINVAL);
1562                         return;
1563                 }
1564                 send_ok();
1565                 break;
1566         }
1567         case 'q':
1568                 gdb_query(data, len);
1569                 break;
1570         case 's':
1571                 if (len != 1) {
1572                         send_error(EINVAL);
1573                         break;
1574                 }
1575
1576                 /* Don't send a reply until a stop occurs. */
1577                 if (!gdb_step_vcpu(cur_vcpu)) {
1578                         send_error(EOPNOTSUPP);
1579                         break;
1580                 }
1581                 break;
1582         case 'z':
1583         case 'Z':
1584                 parse_breakpoint(data, len);
1585                 break;
1586         case '?':
1587                 report_stop(false);
1588                 break;
1589         case 'G': /* TODO */
1590         case 'v':
1591                 /* Handle 'vCont' */
1592                 /* 'vCtrlC' */
1593         case 'p': /* TODO */
1594         case 'P': /* TODO */
1595         case 'Q': /* TODO */
1596         case 't': /* TODO */
1597         case 'X': /* TODO */
1598         default:
1599                 send_empty_response();
1600         }
1601 }
1602
1603 /* Check for a valid packet in the command buffer. */
1604 static void
1605 check_command(int fd)
1606 {
1607         uint8_t *head, *hash, *p, sum;
1608         size_t avail, plen;
1609
1610         for (;;) {
1611                 avail = cur_comm.len;
1612                 if (avail == 0)
1613                         return;
1614                 head = io_buffer_head(&cur_comm);
1615                 switch (*head) {
1616                 case 0x03:
1617                         debug("<- Ctrl-C\n");
1618                         io_buffer_consume(&cur_comm, 1);
1619
1620                         gdb_suspend_vcpus();
1621                         break;
1622                 case '+':
1623                         /* ACK of previous response. */
1624                         debug("<- +\n");
1625                         if (response_pending())
1626                                 io_buffer_reset(&cur_resp);
1627                         io_buffer_consume(&cur_comm, 1);
1628                         if (stopped_vcpu != -1 && report_next_stop) {
1629                                 report_stop(true);
1630                                 send_pending_data(fd);
1631                         }
1632                         break;
1633                 case '-':
1634                         /* NACK of previous response. */
1635                         debug("<- -\n");
1636                         if (response_pending()) {
1637                                 cur_resp.len += cur_resp.start;
1638                                 cur_resp.start = 0;
1639                                 if (cur_resp.data[0] == '+')
1640                                         io_buffer_advance(&cur_resp, 1);
1641                                 debug("-> %.*s\n", (int)cur_resp.len,
1642                                     io_buffer_head(&cur_resp));
1643                         }
1644                         io_buffer_consume(&cur_comm, 1);
1645                         send_pending_data(fd);
1646                         break;
1647                 case '$':
1648                         /* Packet. */
1649
1650                         if (response_pending()) {
1651                                 warnx("New GDB command while response in "
1652                                     "progress");
1653                                 io_buffer_reset(&cur_resp);
1654                         }
1655
1656                         /* Is packet complete? */
1657                         hash = memchr(head, '#', avail);
1658                         if (hash == NULL)
1659                                 return;
1660                         plen = (hash - head + 1) + 2;
1661                         if (avail < plen)
1662                                 return;
1663                         debug("<- %.*s\n", (int)plen, head);
1664
1665                         /* Verify checksum. */
1666                         for (sum = 0, p = head + 1; p < hash; p++)
1667                                 sum += *p;
1668                         if (sum != parse_byte(hash + 1)) {
1669                                 io_buffer_consume(&cur_comm, plen);
1670                                 debug("-> -\n");
1671                                 send_char('-');
1672                                 send_pending_data(fd);
1673                                 break;
1674                         }
1675                         send_char('+');
1676
1677                         handle_command(head + 1, hash - (head + 1));
1678                         io_buffer_consume(&cur_comm, plen);
1679                         if (!response_pending())
1680                                 debug("-> +\n");
1681                         send_pending_data(fd);
1682                         break;
1683                 default:
1684                         /* XXX: Possibly drop connection instead. */
1685                         debug("-> %02x\n", *head);
1686                         io_buffer_consume(&cur_comm, 1);
1687                         break;
1688                 }
1689         }
1690 }
1691
1692 static void
1693 gdb_readable(int fd, enum ev_type event, void *arg)
1694 {
1695         ssize_t nread;
1696         int pending;
1697
1698         if (ioctl(fd, FIONREAD, &pending) == -1) {
1699                 warn("FIONREAD on GDB socket");
1700                 return;
1701         }
1702
1703         /*
1704          * 'pending' might be zero due to EOF.  We need to call read
1705          * with a non-zero length to detect EOF.
1706          */
1707         if (pending == 0)
1708                 pending = 1;
1709
1710         /* Ensure there is room in the command buffer. */
1711         io_buffer_grow(&cur_comm, pending);
1712         assert(io_buffer_avail(&cur_comm) >= pending);
1713
1714         nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1715         if (nread == 0) {
1716                 close_connection();
1717         } else if (nread == -1) {
1718                 if (errno == EAGAIN)
1719                         return;
1720
1721                 warn("Read from GDB socket");
1722                 close_connection();
1723         } else {
1724                 cur_comm.len += nread;
1725                 pthread_mutex_lock(&gdb_lock);
1726                 check_command(fd);
1727                 pthread_mutex_unlock(&gdb_lock);
1728         }
1729 }
1730
1731 static void
1732 gdb_writable(int fd, enum ev_type event, void *arg)
1733 {
1734
1735         send_pending_data(fd);
1736 }
1737
1738 static void
1739 new_connection(int fd, enum ev_type event, void *arg)
1740 {
1741         int optval, s;
1742
1743         s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1744         if (s == -1) {
1745                 if (arg != NULL)
1746                         err(1, "Failed accepting initial GDB connection");
1747
1748                 /* Silently ignore errors post-startup. */
1749                 return;
1750         }
1751
1752         optval = 1;
1753         if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1754             -1) {
1755                 warn("Failed to disable SIGPIPE for GDB connection");
1756                 close(s);
1757                 return;
1758         }
1759
1760         pthread_mutex_lock(&gdb_lock);
1761         if (cur_fd != -1) {
1762                 close(s);
1763                 warnx("Ignoring additional GDB connection.");
1764         }
1765
1766         read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1767         if (read_event == NULL) {
1768                 if (arg != NULL)
1769                         err(1, "Failed to setup initial GDB connection");
1770                 pthread_mutex_unlock(&gdb_lock);
1771                 return;
1772         }
1773         write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1774         if (write_event == NULL) {
1775                 if (arg != NULL)
1776                         err(1, "Failed to setup initial GDB connection");
1777                 mevent_delete_close(read_event);
1778                 read_event = NULL;
1779         }
1780
1781         cur_fd = s;
1782         cur_vcpu = 0;
1783         stopped_vcpu = -1;
1784
1785         /* Break on attach. */
1786         first_stop = true;
1787         report_next_stop = false;
1788         gdb_suspend_vcpus();
1789         pthread_mutex_unlock(&gdb_lock);
1790 }
1791
1792 #ifndef WITHOUT_CAPSICUM
1793 void
1794 limit_gdb_socket(int s)
1795 {
1796         cap_rights_t rights;
1797         unsigned long ioctls[] = { FIONREAD };
1798
1799         cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1800             CAP_SETSOCKOPT, CAP_IOCTL);
1801         if (caph_rights_limit(s, &rights) == -1)
1802                 errx(EX_OSERR, "Unable to apply rights for sandbox");
1803         if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1804                 errx(EX_OSERR, "Unable to apply rights for sandbox");
1805 }
1806 #endif
1807
1808 void
1809 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1810 {
1811         struct sockaddr_in sin;
1812         int error, flags, optval, s;
1813
1814         debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1815
1816         error = pthread_mutex_init(&gdb_lock, NULL);
1817         if (error != 0)
1818                 errc(1, error, "gdb mutex init");
1819         error = pthread_cond_init(&idle_vcpus, NULL);
1820         if (error != 0)
1821                 errc(1, error, "gdb cv init");
1822
1823         ctx = _ctx;
1824         s = socket(PF_INET, SOCK_STREAM, 0);
1825         if (s < 0)
1826                 err(1, "gdb socket create");
1827
1828         optval = 1;
1829         (void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1830
1831         sin.sin_len = sizeof(sin);
1832         sin.sin_family = AF_INET;
1833         sin.sin_addr.s_addr = htonl(INADDR_ANY);
1834         sin.sin_port = htons(sport);
1835
1836         if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1837                 err(1, "gdb socket bind");
1838
1839         if (listen(s, 1) < 0)
1840                 err(1, "gdb socket listen");
1841
1842         stopped_vcpu = -1;
1843         TAILQ_INIT(&breakpoints);
1844         vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1845         if (wait) {
1846                 /*
1847                  * Set vcpu 0 in vcpus_suspended.  This will trigger the
1848                  * logic in gdb_cpu_add() to suspend the first vcpu before
1849                  * it starts execution.  The vcpu will remain suspended
1850                  * until a debugger connects.
1851                  */
1852                 CPU_SET(0, &vcpus_suspended);
1853                 stopped_vcpu = 0;
1854         }
1855
1856         flags = fcntl(s, F_GETFL);
1857         if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1858                 err(1, "Failed to mark gdb socket non-blocking");
1859
1860 #ifndef WITHOUT_CAPSICUM
1861         limit_gdb_socket(s);
1862 #endif
1863         mevent_add(s, EVF_READ, new_connection, NULL);
1864 }