]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/bhyve/gdb.c
MFV r348573: 9993 zil writes can get delayed in zio pipeline
[FreeBSD/FreeBSD.git] / usr.sbin / bhyve / gdb.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/ioctl.h>
36 #include <sys/mman.h>
37 #include <sys/socket.h>
38 #include <machine/atomic.h>
39 #include <machine/specialreg.h>
40 #include <machine/vmm.h>
41 #include <netinet/in.h>
42 #include <assert.h>
43 #ifndef WITHOUT_CAPSICUM
44 #include <capsicum_helpers.h>
45 #endif
46 #include <err.h>
47 #include <errno.h>
48 #include <fcntl.h>
49 #include <pthread.h>
50 #include <pthread_np.h>
51 #include <stdbool.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <sysexits.h>
56 #include <unistd.h>
57 #include <vmmapi.h>
58
59 #include "bhyverun.h"
60 #include "mem.h"
61 #include "mevent.h"
62
63 /*
64  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
65  * use SIGTRAP.
66  */
67 #define GDB_SIGNAL_TRAP         5
68
69 static void gdb_resume_vcpus(void);
70 static void check_command(int fd);
71
72 static struct mevent *read_event, *write_event;
73
74 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
75 static pthread_mutex_t gdb_lock;
76 static pthread_cond_t idle_vcpus;
77 static bool stop_pending, first_stop;
78 static int stepping_vcpu, stopped_vcpu;
79
80 /*
81  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
82  * read buffer, 'start' is unused and 'len' contains the number of
83  * valid bytes in the buffer.  For a write buffer, 'start' is set to
84  * the index of the next byte in 'data' to send, and 'len' contains
85  * the remaining number of valid bytes to send.
86  */
87 struct io_buffer {
88         uint8_t *data;
89         size_t capacity;
90         size_t start;
91         size_t len;
92 };
93
94 static struct io_buffer cur_comm, cur_resp;
95 static uint8_t cur_csum;
96 static int cur_vcpu;
97 static struct vmctx *ctx;
98 static int cur_fd = -1;
99
100 const int gdb_regset[] = {
101         VM_REG_GUEST_RAX,
102         VM_REG_GUEST_RBX,
103         VM_REG_GUEST_RCX,
104         VM_REG_GUEST_RDX,
105         VM_REG_GUEST_RSI,
106         VM_REG_GUEST_RDI,
107         VM_REG_GUEST_RBP,
108         VM_REG_GUEST_RSP,
109         VM_REG_GUEST_R8,
110         VM_REG_GUEST_R9,
111         VM_REG_GUEST_R10,
112         VM_REG_GUEST_R11,
113         VM_REG_GUEST_R12,
114         VM_REG_GUEST_R13,
115         VM_REG_GUEST_R14,
116         VM_REG_GUEST_R15,
117         VM_REG_GUEST_RIP,
118         VM_REG_GUEST_RFLAGS,
119         VM_REG_GUEST_CS,
120         VM_REG_GUEST_SS,
121         VM_REG_GUEST_DS,
122         VM_REG_GUEST_ES,
123         VM_REG_GUEST_FS,
124         VM_REG_GUEST_GS
125 };
126
127 const int gdb_regsize[] = {
128         8,
129         8,
130         8,
131         8,
132         8,
133         8,
134         8,
135         8,
136         8,
137         8,
138         8,
139         8,
140         8,
141         8,
142         8,
143         8,
144         8,
145         4,
146         4,
147         4,
148         4,
149         4,
150         4,
151         4
152 };
153
154 #ifdef GDB_LOG
155 #include <stdarg.h>
156 #include <stdio.h>
157
158 static void __printflike(1, 2)
159 debug(const char *fmt, ...)
160 {
161         static FILE *logfile;
162         va_list ap;
163
164         if (logfile == NULL) {
165                 logfile = fopen("/tmp/bhyve_gdb.log", "w");
166                 if (logfile == NULL)
167                         return;
168 #ifndef WITHOUT_CAPSICUM
169                 if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
170                         fclose(logfile);
171                         logfile = NULL;
172                         return;
173                 }
174 #endif
175                 setlinebuf(logfile);
176         }
177         va_start(ap, fmt);
178         vfprintf(logfile, fmt, ap);
179         va_end(ap);
180 }
181 #else
182 #define debug(...)
183 #endif
184
185 static int
186 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
187 {
188         uint64_t regs[4];
189         const int regset[4] = {
190                 VM_REG_GUEST_CR0,
191                 VM_REG_GUEST_CR3,
192                 VM_REG_GUEST_CR4,
193                 VM_REG_GUEST_EFER
194         };
195
196         if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
197                 return (-1);
198
199         /*
200          * For the debugger, always pretend to be the kernel (CPL 0),
201          * and if long-mode is enabled, always parse addresses as if
202          * in 64-bit mode.
203          */
204         paging->cr3 = regs[1];
205         paging->cpl = 0;
206         if (regs[3] & EFER_LMA)
207                 paging->cpu_mode = CPU_MODE_64BIT;
208         else if (regs[0] & CR0_PE)
209                 paging->cpu_mode = CPU_MODE_PROTECTED;
210         else
211                 paging->cpu_mode = CPU_MODE_REAL;
212         if (!(regs[0] & CR0_PG))
213                 paging->paging_mode = PAGING_MODE_FLAT;
214         else if (!(regs[2] & CR4_PAE))
215                 paging->paging_mode = PAGING_MODE_32;
216         else if (regs[3] & EFER_LME)
217                 paging->paging_mode = PAGING_MODE_64;
218         else
219                 paging->paging_mode = PAGING_MODE_PAE;
220         return (0);
221 }
222
223 /*
224  * Map a guest virtual address to a physical address (for a given vcpu).
225  * If a guest virtual address is valid, return 1.  If the address is
226  * not valid, return 0.  If an error occurs obtaining the mapping,
227  * return -1.
228  */
229 static int
230 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
231 {
232         struct vm_guest_paging paging;
233         int fault;
234
235         if (guest_paging_info(vcpu, &paging) == -1)
236                 return (-1);
237
238         /*
239          * Always use PROT_READ.  We really care if the VA is
240          * accessible, not if the current vCPU can write.
241          */
242         if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
243             &fault) == -1)
244                 return (-1);
245         if (fault)
246                 return (0);
247         return (1);
248 }
249
250 static void
251 io_buffer_reset(struct io_buffer *io)
252 {
253
254         io->start = 0;
255         io->len = 0;
256 }
257
258 /* Available room for adding data. */
259 static size_t
260 io_buffer_avail(struct io_buffer *io)
261 {
262
263         return (io->capacity - (io->start + io->len));
264 }
265
266 static uint8_t *
267 io_buffer_head(struct io_buffer *io)
268 {
269
270         return (io->data + io->start);
271 }
272
273 static uint8_t *
274 io_buffer_tail(struct io_buffer *io)
275 {
276
277         return (io->data + io->start + io->len);
278 }
279
280 static void
281 io_buffer_advance(struct io_buffer *io, size_t amount)
282 {
283
284         assert(amount <= io->len);
285         io->start += amount;
286         io->len -= amount;
287 }
288
289 static void
290 io_buffer_consume(struct io_buffer *io, size_t amount)
291 {
292
293         io_buffer_advance(io, amount);
294         if (io->len == 0) {
295                 io->start = 0;
296                 return;
297         }
298
299         /*
300          * XXX: Consider making this move optional and compacting on a
301          * future read() before realloc().
302          */
303         memmove(io->data, io_buffer_head(io), io->len);
304         io->start = 0;
305 }
306
307 static void
308 io_buffer_grow(struct io_buffer *io, size_t newsize)
309 {
310         uint8_t *new_data;
311         size_t avail, new_cap;
312
313         avail = io_buffer_avail(io);
314         if (newsize <= avail)
315                 return;
316
317         new_cap = io->capacity + (newsize - avail);
318         new_data = realloc(io->data, new_cap);
319         if (new_data == NULL)
320                 err(1, "Failed to grow GDB I/O buffer");
321         io->data = new_data;
322         io->capacity = new_cap;
323 }
324
325 static bool
326 response_pending(void)
327 {
328
329         if (cur_resp.start == 0 && cur_resp.len == 0)
330                 return (false);
331         if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
332                 return (false);
333         return (true);
334 }
335
336 static void
337 close_connection(void)
338 {
339
340         /*
341          * XXX: This triggers a warning because mevent does the close
342          * before the EV_DELETE.
343          */
344         pthread_mutex_lock(&gdb_lock);
345         mevent_delete(write_event);
346         mevent_delete_close(read_event);
347         write_event = NULL;
348         read_event = NULL;
349         io_buffer_reset(&cur_comm);
350         io_buffer_reset(&cur_resp);
351         cur_fd = -1;
352
353         /* Resume any stopped vCPUs. */
354         gdb_resume_vcpus();
355         pthread_mutex_unlock(&gdb_lock);
356 }
357
358 static uint8_t
359 hex_digit(uint8_t nibble)
360 {
361
362         if (nibble <= 9)
363                 return (nibble + '0');
364         else
365                 return (nibble + 'a' - 10);
366 }
367
368 static uint8_t
369 parse_digit(uint8_t v)
370 {
371
372         if (v >= '0' && v <= '9')
373                 return (v - '0');
374         if (v >= 'a' && v <= 'f')
375                 return (v - 'a' + 10);
376         if (v >= 'A' && v <= 'F')
377                 return (v - 'A' + 10);
378         return (0xF);
379 }
380
381 /* Parses big-endian hexadecimal. */
382 static uintmax_t
383 parse_integer(const uint8_t *p, size_t len)
384 {
385         uintmax_t v;
386
387         v = 0;
388         while (len > 0) {
389                 v <<= 4;
390                 v |= parse_digit(*p);
391                 p++;
392                 len--;
393         }
394         return (v);
395 }
396
397 static uint8_t
398 parse_byte(const uint8_t *p)
399 {
400
401         return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
402 }
403
404 static void
405 send_pending_data(int fd)
406 {
407         ssize_t nwritten;
408
409         if (cur_resp.len == 0) {
410                 mevent_disable(write_event);
411                 return;
412         }
413         nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
414         if (nwritten == -1) {
415                 warn("Write to GDB socket failed");
416                 close_connection();
417         } else {
418                 io_buffer_advance(&cur_resp, nwritten);
419                 if (cur_resp.len == 0)
420                         mevent_disable(write_event);
421                 else
422                         mevent_enable(write_event);
423         }
424 }
425
426 /* Append a single character to the output buffer. */
427 static void
428 send_char(uint8_t data)
429 {
430         io_buffer_grow(&cur_resp, 1);
431         *io_buffer_tail(&cur_resp) = data;
432         cur_resp.len++;
433 }
434
435 /* Append an array of bytes to the output buffer. */
436 static void
437 send_data(const uint8_t *data, size_t len)
438 {
439
440         io_buffer_grow(&cur_resp, len);
441         memcpy(io_buffer_tail(&cur_resp), data, len);
442         cur_resp.len += len;
443 }
444
445 static void
446 format_byte(uint8_t v, uint8_t *buf)
447 {
448
449         buf[0] = hex_digit(v >> 4);
450         buf[1] = hex_digit(v & 0xf);
451 }
452
453 /*
454  * Append a single byte (formatted as two hex characters) to the
455  * output buffer.
456  */
457 static void
458 send_byte(uint8_t v)
459 {
460         uint8_t buf[2];
461
462         format_byte(v, buf);
463         send_data(buf, sizeof(buf));
464 }
465
466 static void
467 start_packet(void)
468 {
469
470         send_char('$');
471         cur_csum = 0;
472 }
473
474 static void
475 finish_packet(void)
476 {
477
478         send_char('#');
479         send_byte(cur_csum);
480         debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
481 }
482
483 /*
484  * Append a single character (for the packet payload) and update the
485  * checksum.
486  */
487 static void
488 append_char(uint8_t v)
489 {
490
491         send_char(v);
492         cur_csum += v;
493 }
494
495 /*
496  * Append an array of bytes (for the packet payload) and update the
497  * checksum.
498  */
499 static void
500 append_packet_data(const uint8_t *data, size_t len)
501 {
502
503         send_data(data, len);
504         while (len > 0) {
505                 cur_csum += *data;
506                 data++;
507                 len--;
508         }
509 }
510
511 static void
512 append_string(const char *str)
513 {
514
515         append_packet_data(str, strlen(str));
516 }
517
518 static void
519 append_byte(uint8_t v)
520 {
521         uint8_t buf[2];
522
523         format_byte(v, buf);
524         append_packet_data(buf, sizeof(buf));
525 }
526
527 static void
528 append_unsigned_native(uintmax_t value, size_t len)
529 {
530         size_t i;
531
532         for (i = 0; i < len; i++) {
533                 append_byte(value);
534                 value >>= 8;
535         }
536 }
537
538 static void
539 append_unsigned_be(uintmax_t value, size_t len)
540 {
541         char buf[len * 2];
542         size_t i;
543
544         for (i = 0; i < len; i++) {
545                 format_byte(value, buf + (len - i - 1) * 2);
546                 value >>= 8;
547         }
548         append_packet_data(buf, sizeof(buf));
549 }
550
551 static void
552 append_integer(unsigned int value)
553 {
554
555         if (value == 0)
556                 append_char('0');
557         else
558                 append_unsigned_be(value, fls(value) + 7 / 8);
559 }
560
561 static void
562 append_asciihex(const char *str)
563 {
564
565         while (*str != '\0') {
566                 append_byte(*str);
567                 str++;
568         }
569 }
570
571 static void
572 send_empty_response(void)
573 {
574
575         start_packet();
576         finish_packet();
577 }
578
579 static void
580 send_error(int error)
581 {
582
583         start_packet();
584         append_char('E');
585         append_byte(error);
586         finish_packet();
587 }
588
589 static void
590 send_ok(void)
591 {
592
593         start_packet();
594         append_string("OK");
595         finish_packet();
596 }
597
598 static int
599 parse_threadid(const uint8_t *data, size_t len)
600 {
601
602         if (len == 1 && *data == '0')
603                 return (0);
604         if (len == 2 && memcmp(data, "-1", 2) == 0)
605                 return (-1);
606         if (len == 0)
607                 return (-2);
608         return (parse_integer(data, len));
609 }
610
611 static void
612 report_stop(void)
613 {
614
615         start_packet();
616         if (stopped_vcpu == -1)
617                 append_char('S');
618         else
619                 append_char('T');
620         append_byte(GDB_SIGNAL_TRAP);
621         if (stopped_vcpu != -1) {
622                 append_string("thread:");
623                 append_integer(stopped_vcpu + 1);
624                 append_char(';');
625         }
626         stopped_vcpu = -1;
627         finish_packet();
628 }
629
630 static void
631 gdb_finish_suspend_vcpus(void)
632 {
633
634         if (first_stop) {
635                 first_stop = false;
636                 stopped_vcpu = -1;
637         } else if (response_pending())
638                 stop_pending = true;
639         else {
640                 report_stop();
641                 send_pending_data(cur_fd);
642         }
643 }
644
645 static void
646 _gdb_cpu_suspend(int vcpu, bool report_stop)
647 {
648
649         debug("$vCPU %d suspending\n", vcpu);
650         CPU_SET(vcpu, &vcpus_waiting);
651         if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
652                 gdb_finish_suspend_vcpus();
653         while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu)
654                 pthread_cond_wait(&idle_vcpus, &gdb_lock);
655         CPU_CLR(vcpu, &vcpus_waiting);
656         debug("$vCPU %d resuming\n", vcpu);
657 }
658
659 void
660 gdb_cpu_add(int vcpu)
661 {
662
663         debug("$vCPU %d starting\n", vcpu);
664         pthread_mutex_lock(&gdb_lock);
665         CPU_SET(vcpu, &vcpus_active);
666
667         /*
668          * If a vcpu is added while vcpus are stopped, suspend the new
669          * vcpu so that it will pop back out with a debug exit before
670          * executing the first instruction.
671          */
672         if (!CPU_EMPTY(&vcpus_suspended)) {
673                 CPU_SET(vcpu, &vcpus_suspended);
674                 _gdb_cpu_suspend(vcpu, false);
675         }
676         pthread_mutex_unlock(&gdb_lock);
677 }
678
679 void
680 gdb_cpu_suspend(int vcpu)
681 {
682
683         pthread_mutex_lock(&gdb_lock);
684         _gdb_cpu_suspend(vcpu, true);
685         pthread_mutex_unlock(&gdb_lock);
686 }
687
688 void
689 gdb_cpu_mtrap(int vcpu)
690 {
691
692         debug("$vCPU %d MTRAP\n", vcpu);
693         pthread_mutex_lock(&gdb_lock);
694         if (vcpu == stepping_vcpu) {
695                 stepping_vcpu = -1;
696                 vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
697                 vm_suspend_cpu(ctx, vcpu);
698                 assert(stopped_vcpu == -1);
699                 stopped_vcpu = vcpu;
700                 _gdb_cpu_suspend(vcpu, true);
701         }
702         pthread_mutex_unlock(&gdb_lock);
703 }
704
705 static void
706 gdb_suspend_vcpus(void)
707 {
708
709         assert(pthread_mutex_isowned_np(&gdb_lock));
710         debug("suspending all CPUs\n");
711         vcpus_suspended = vcpus_active;
712         vm_suspend_cpu(ctx, -1);
713         if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
714                 gdb_finish_suspend_vcpus();
715 }
716
717 static bool
718 gdb_step_vcpu(int vcpu)
719 {
720         int error, val;
721
722         debug("$vCPU %d step\n", vcpu);
723         error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
724         if (error < 0)
725                 return (false);
726         error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
727         vm_resume_cpu(ctx, vcpu);
728         stepping_vcpu = vcpu;
729         pthread_cond_broadcast(&idle_vcpus);
730         return (true);
731 }
732
733 static void
734 gdb_resume_vcpus(void)
735 {
736
737         assert(pthread_mutex_isowned_np(&gdb_lock));
738         vm_resume_cpu(ctx, -1);
739         debug("resuming all CPUs\n");
740         CPU_ZERO(&vcpus_suspended);
741         pthread_cond_broadcast(&idle_vcpus);
742 }
743
744 static void
745 gdb_read_regs(void)
746 {
747         uint64_t regvals[nitems(gdb_regset)];
748         int i;
749
750         if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
751             gdb_regset, regvals) == -1) {
752                 send_error(errno);
753                 return;
754         }
755         start_packet();
756         for (i = 0; i < nitems(regvals); i++)
757                 append_unsigned_native(regvals[i], gdb_regsize[i]);
758         finish_packet();
759 }
760
761 static void
762 gdb_read_mem(const uint8_t *data, size_t len)
763 {
764         uint64_t gpa, gva, val;
765         uint8_t *cp;
766         size_t resid, todo, bytes;
767         bool started;
768         int error;
769
770         /* Skip 'm' */
771         data += 1;
772         len -= 1;
773
774         /* Parse and consume address. */
775         cp = memchr(data, ',', len);
776         if (cp == NULL || cp == data) {
777                 send_error(EINVAL);
778                 return;
779         }
780         gva = parse_integer(data, cp - data);
781         len -= (cp - data) + 1;
782         data += (cp - data) + 1;
783
784         /* Parse length. */
785         resid = parse_integer(data, len);
786
787         started = false;
788         while (resid > 0) {
789                 error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
790                 if (error == -1) {
791                         if (started)
792                                 finish_packet();
793                         else
794                                 send_error(errno);
795                         return;
796                 }
797                 if (error == 0) {
798                         if (started)
799                                 finish_packet();
800                         else
801                                 send_error(EFAULT);
802                         return;
803                 }
804
805                 /* Read bytes from current page. */
806                 todo = getpagesize() - gpa % getpagesize();
807                 if (todo > resid)
808                         todo = resid;
809
810                 cp = paddr_guest2host(ctx, gpa, todo);
811                 if (cp != NULL) {
812                         /*
813                          * If this page is guest RAM, read it a byte
814                          * at a time.
815                          */
816                         if (!started) {
817                                 start_packet();
818                                 started = true;
819                         }
820                         while (todo > 0) {
821                                 append_byte(*cp);
822                                 cp++;
823                                 gpa++;
824                                 gva++;
825                                 resid--;
826                                 todo--;
827                         }
828                 } else {
829                         /*
830                          * If this page isn't guest RAM, try to handle
831                          * it via MMIO.  For MMIO requests, use
832                          * aligned reads of words when possible.
833                          */
834                         while (todo > 0) {
835                                 if (gpa & 1 || todo == 1)
836                                         bytes = 1;
837                                 else if (gpa & 2 || todo == 2)
838                                         bytes = 2;
839                                 else
840                                         bytes = 4;
841                                 error = read_mem(ctx, cur_vcpu, gpa, &val,
842                                     bytes);
843                                 if (error == 0) {
844                                         if (!started) {
845                                                 start_packet();
846                                                 started = true;
847                                         }
848                                         gpa += bytes;
849                                         gva += bytes;
850                                         resid -= bytes;
851                                         todo -= bytes;
852                                         while (bytes > 0) {
853                                                 append_byte(val);
854                                                 val >>= 8;
855                                                 bytes--;
856                                         }
857                                 } else {
858                                         if (started)
859                                                 finish_packet();
860                                         else
861                                                 send_error(EFAULT);
862                                         return;
863                                 }
864                         }
865                 }
866                 assert(resid == 0 || gpa % getpagesize() == 0);
867         }
868         if (!started)
869                 start_packet();
870         finish_packet();
871 }
872
873 static void
874 gdb_write_mem(const uint8_t *data, size_t len)
875 {
876         uint64_t gpa, gva, val;
877         uint8_t *cp;
878         size_t resid, todo, bytes;
879         int error;
880
881         /* Skip 'M' */
882         data += 1;
883         len -= 1;
884
885         /* Parse and consume address. */
886         cp = memchr(data, ',', len);
887         if (cp == NULL || cp == data) {
888                 send_error(EINVAL);
889                 return;
890         }
891         gva = parse_integer(data, cp - data);
892         len -= (cp - data) + 1;
893         data += (cp - data) + 1;
894
895         /* Parse and consume length. */
896         cp = memchr(data, ':', len);
897         if (cp == NULL || cp == data) {
898                 send_error(EINVAL);
899                 return;
900         }
901         resid = parse_integer(data, cp - data);
902         len -= (cp - data) + 1;
903         data += (cp - data) + 1;
904
905         /* Verify the available bytes match the length. */
906         if (len != resid * 2) {
907                 send_error(EINVAL);
908                 return;
909         }
910
911         while (resid > 0) {
912                 error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
913                 if (error == -1) {
914                         send_error(errno);
915                         return;
916                 }
917                 if (error == 0) {
918                         send_error(EFAULT);
919                         return;
920                 }
921
922                 /* Write bytes to current page. */
923                 todo = getpagesize() - gpa % getpagesize();
924                 if (todo > resid)
925                         todo = resid;
926
927                 cp = paddr_guest2host(ctx, gpa, todo);
928                 if (cp != NULL) {
929                         /*
930                          * If this page is guest RAM, write it a byte
931                          * at a time.
932                          */
933                         while (todo > 0) {
934                                 assert(len >= 2);
935                                 *cp = parse_byte(data);
936                                 data += 2;
937                                 len -= 2;
938                                 cp++;
939                                 gpa++;
940                                 gva++;
941                                 resid--;
942                                 todo--;
943                         }
944                 } else {
945                         /*
946                          * If this page isn't guest RAM, try to handle
947                          * it via MMIO.  For MMIO requests, use
948                          * aligned writes of words when possible.
949                          */
950                         while (todo > 0) {
951                                 if (gpa & 1 || todo == 1) {
952                                         bytes = 1;
953                                         val = parse_byte(data);
954                                 } else if (gpa & 2 || todo == 2) {
955                                         bytes = 2;
956                                         val = parse_byte(data) |
957                                             (parse_byte(data + 2) << 8);
958                                 } else {
959                                         bytes = 4;
960                                         val = parse_byte(data) |
961                                             (parse_byte(data + 2) << 8) |
962                                             (parse_byte(data + 4) << 16) |
963                                             (parse_byte(data + 6) << 24);
964                                 }
965                                 error = write_mem(ctx, cur_vcpu, gpa, val,
966                                     bytes);
967                                 if (error == 0) {
968                                         gpa += bytes;
969                                         gva += bytes;
970                                         resid -= bytes;
971                                         todo -= bytes;
972                                         data += 2 * bytes;
973                                         len -= 2 * bytes;
974                                 } else {
975                                         send_error(EFAULT);
976                                         return;
977                                 }
978                         }
979                 }
980                 assert(resid == 0 || gpa % getpagesize() == 0);
981         }
982         assert(len == 0);
983         send_ok();
984 }
985
986 static bool
987 command_equals(const uint8_t *data, size_t len, const char *cmd)
988 {
989
990         if (strlen(cmd) > len)
991                 return (false);
992         return (memcmp(data, cmd, strlen(cmd)) == 0);
993 }
994
995 static void
996 check_features(const uint8_t *data, size_t len)
997 {
998         char *feature, *next_feature, *str, *value;
999         bool supported;
1000
1001         str = malloc(len + 1);
1002         memcpy(str, data, len);
1003         str[len] = '\0';
1004         next_feature = str;
1005
1006         while ((feature = strsep(&next_feature, ";")) != NULL) {
1007                 /*
1008                  * Null features shouldn't exist, but skip if they
1009                  * do.
1010                  */
1011                 if (strcmp(feature, "") == 0)
1012                         continue;
1013
1014                 /*
1015                  * Look for the value or supported / not supported
1016                  * flag.
1017                  */
1018                 value = strchr(feature, '=');
1019                 if (value != NULL) {
1020                         *value = '\0';
1021                         value++;
1022                         supported = true;
1023                 } else {
1024                         value = feature + strlen(feature) - 1;
1025                         switch (*value) {
1026                         case '+':
1027                                 supported = true;
1028                                 break;
1029                         case '-':
1030                                 supported = false;
1031                                 break;
1032                         default:
1033                                 /*
1034                                  * This is really a protocol error,
1035                                  * but we just ignore malformed
1036                                  * features for ease of
1037                                  * implementation.
1038                                  */
1039                                 continue;
1040                         }
1041                         value = NULL;
1042                 }
1043
1044                 /* No currently supported features. */
1045         }
1046         free(str);
1047
1048         start_packet();
1049
1050         /* This is an arbitrary limit. */
1051         append_string("PacketSize=4096");
1052         finish_packet();
1053 }
1054
1055 static void
1056 gdb_query(const uint8_t *data, size_t len)
1057 {
1058
1059         /*
1060          * TODO:
1061          * - qSearch
1062          */
1063         if (command_equals(data, len, "qAttached")) {
1064                 start_packet();
1065                 append_char('1');
1066                 finish_packet();
1067         } else if (command_equals(data, len, "qC")) {
1068                 start_packet();
1069                 append_string("QC");
1070                 append_integer(cur_vcpu + 1);
1071                 finish_packet();
1072         } else if (command_equals(data, len, "qfThreadInfo")) {
1073                 cpuset_t mask;
1074                 bool first;
1075                 int vcpu;
1076
1077                 if (CPU_EMPTY(&vcpus_active)) {
1078                         send_error(EINVAL);
1079                         return;
1080                 }
1081                 mask = vcpus_active;
1082                 start_packet();
1083                 append_char('m');
1084                 first = true;
1085                 while (!CPU_EMPTY(&mask)) {
1086                         vcpu = CPU_FFS(&mask) - 1;
1087                         CPU_CLR(vcpu, &mask);
1088                         if (first)
1089                                 first = false;
1090                         else
1091                                 append_char(',');
1092                         append_integer(vcpu + 1);
1093                 }
1094                 finish_packet();
1095         } else if (command_equals(data, len, "qsThreadInfo")) {
1096                 start_packet();
1097                 append_char('l');
1098                 finish_packet();
1099         } else if (command_equals(data, len, "qSupported")) {
1100                 data += strlen("qSupported");
1101                 len -= strlen("qSupported");
1102                 check_features(data, len);
1103         } else if (command_equals(data, len, "qThreadExtraInfo")) {
1104                 char buf[16];
1105                 int tid;
1106
1107                 data += strlen("qThreadExtraInfo");
1108                 len -= strlen("qThreadExtraInfo");
1109                 if (*data != ',') {
1110                         send_error(EINVAL);
1111                         return;
1112                 }
1113                 tid = parse_threadid(data + 1, len - 1);
1114                 if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1115                         send_error(EINVAL);
1116                         return;
1117                 }
1118
1119                 snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1120                 start_packet();
1121                 append_asciihex(buf);
1122                 finish_packet();
1123         } else
1124                 send_empty_response();
1125 }
1126
1127 static void
1128 handle_command(const uint8_t *data, size_t len)
1129 {
1130
1131         /* Reject packets with a sequence-id. */
1132         if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1133             data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1134                 send_empty_response();
1135                 return;
1136         }
1137
1138         switch (*data) {
1139         case 'c':
1140                 if (len != 1) {
1141                         send_error(EINVAL);
1142                         break;
1143                 }
1144
1145                 /* Don't send a reply until a stop occurs. */
1146                 gdb_resume_vcpus();
1147                 break;
1148         case 'D':
1149                 send_ok();
1150
1151                 /* TODO: Resume any stopped CPUs. */
1152                 break;
1153         case 'g': {
1154                 gdb_read_regs();
1155                 break;
1156         }
1157         case 'H': {
1158                 int tid;
1159
1160                 if (data[1] != 'g' && data[1] != 'c') {
1161                         send_error(EINVAL);
1162                         break;
1163                 }
1164                 tid = parse_threadid(data + 2, len - 2);
1165                 if (tid == -2) {
1166                         send_error(EINVAL);
1167                         break;
1168                 }
1169
1170                 if (CPU_EMPTY(&vcpus_active)) {
1171                         send_error(EINVAL);
1172                         break;
1173                 }
1174                 if (tid == -1 || tid == 0)
1175                         cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1176                 else if (CPU_ISSET(tid - 1, &vcpus_active))
1177                         cur_vcpu = tid - 1;
1178                 else {
1179                         send_error(EINVAL);
1180                         break;
1181                 }
1182                 send_ok();
1183                 break;
1184         }
1185         case 'm':
1186                 gdb_read_mem(data, len);
1187                 break;
1188         case 'M':
1189                 gdb_write_mem(data, len);
1190                 break;
1191         case 'T': {
1192                 int tid;
1193
1194                 tid = parse_threadid(data + 1, len - 1);
1195                 if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1196                         send_error(EINVAL);
1197                         return;
1198                 }
1199                 send_ok();
1200                 break;
1201         }
1202         case 'q':
1203                 gdb_query(data, len);
1204                 break;
1205         case 's':
1206                 if (len != 1) {
1207                         send_error(EINVAL);
1208                         break;
1209                 }
1210
1211                 /* Don't send a reply until a stop occurs. */
1212                 if (!gdb_step_vcpu(cur_vcpu)) {
1213                         send_error(EOPNOTSUPP);
1214                         break;
1215                 }
1216                 break;
1217         case '?':
1218                 /* XXX: Only if stopped? */
1219                 /* For now, just report that we are always stopped. */
1220                 start_packet();
1221                 append_char('S');
1222                 append_byte(GDB_SIGNAL_TRAP);
1223                 finish_packet();
1224                 break;
1225         case 'G': /* TODO */
1226         case 'v':
1227                 /* Handle 'vCont' */
1228                 /* 'vCtrlC' */
1229         case 'p': /* TODO */
1230         case 'P': /* TODO */
1231         case 'Q': /* TODO */
1232         case 't': /* TODO */
1233         case 'X': /* TODO */
1234         case 'z': /* TODO */
1235         case 'Z': /* TODO */
1236         default:
1237                 send_empty_response();
1238         }
1239 }
1240
1241 /* Check for a valid packet in the command buffer. */
1242 static void
1243 check_command(int fd)
1244 {
1245         uint8_t *head, *hash, *p, sum;
1246         size_t avail, plen;
1247
1248         for (;;) {
1249                 avail = cur_comm.len;
1250                 if (avail == 0)
1251                         return;
1252                 head = io_buffer_head(&cur_comm);
1253                 switch (*head) {
1254                 case 0x03:
1255                         debug("<- Ctrl-C\n");
1256                         io_buffer_consume(&cur_comm, 1);
1257
1258                         gdb_suspend_vcpus();
1259                         break;
1260                 case '+':
1261                         /* ACK of previous response. */
1262                         debug("<- +\n");
1263                         if (response_pending())
1264                                 io_buffer_reset(&cur_resp);
1265                         io_buffer_consume(&cur_comm, 1);
1266                         if (stop_pending) {
1267                                 stop_pending = false;
1268                                 report_stop();
1269                                 send_pending_data(fd);
1270                         }
1271                         break;
1272                 case '-':
1273                         /* NACK of previous response. */
1274                         debug("<- -\n");
1275                         if (response_pending()) {
1276                                 cur_resp.len += cur_resp.start;
1277                                 cur_resp.start = 0;
1278                                 if (cur_resp.data[0] == '+')
1279                                         io_buffer_advance(&cur_resp, 1);
1280                                 debug("-> %.*s\n", (int)cur_resp.len,
1281                                     io_buffer_head(&cur_resp));
1282                         }
1283                         io_buffer_consume(&cur_comm, 1);
1284                         send_pending_data(fd);
1285                         break;
1286                 case '$':
1287                         /* Packet. */
1288
1289                         if (response_pending()) {
1290                                 warnx("New GDB command while response in "
1291                                     "progress");
1292                                 io_buffer_reset(&cur_resp);
1293                         }
1294
1295                         /* Is packet complete? */
1296                         hash = memchr(head, '#', avail);
1297                         if (hash == NULL)
1298                                 return;
1299                         plen = (hash - head + 1) + 2;
1300                         if (avail < plen)
1301                                 return;
1302                         debug("<- %.*s\n", (int)plen, head);
1303
1304                         /* Verify checksum. */
1305                         for (sum = 0, p = head + 1; p < hash; p++)
1306                                 sum += *p;
1307                         if (sum != parse_byte(hash + 1)) {
1308                                 io_buffer_consume(&cur_comm, plen);
1309                                 debug("-> -\n");
1310                                 send_char('-');
1311                                 send_pending_data(fd);
1312                                 break;
1313                         }
1314                         send_char('+');
1315
1316                         handle_command(head + 1, hash - (head + 1));
1317                         io_buffer_consume(&cur_comm, plen);
1318                         if (!response_pending())
1319                                 debug("-> +\n");
1320                         send_pending_data(fd);
1321                         break;
1322                 default:
1323                         /* XXX: Possibly drop connection instead. */
1324                         debug("-> %02x\n", *head);
1325                         io_buffer_consume(&cur_comm, 1);
1326                         break;
1327                 }
1328         }
1329 }
1330
1331 static void
1332 gdb_readable(int fd, enum ev_type event, void *arg)
1333 {
1334         ssize_t nread;
1335         int pending;
1336
1337         if (ioctl(fd, FIONREAD, &pending) == -1) {
1338                 warn("FIONREAD on GDB socket");
1339                 return;
1340         }
1341
1342         /*
1343          * 'pending' might be zero due to EOF.  We need to call read
1344          * with a non-zero length to detect EOF.
1345          */
1346         if (pending == 0)
1347                 pending = 1;
1348
1349         /* Ensure there is room in the command buffer. */
1350         io_buffer_grow(&cur_comm, pending);
1351         assert(io_buffer_avail(&cur_comm) >= pending);
1352
1353         nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1354         if (nread == 0) {
1355                 close_connection();
1356         } else if (nread == -1) {
1357                 if (errno == EAGAIN)
1358                         return;
1359
1360                 warn("Read from GDB socket");
1361                 close_connection();
1362         } else {
1363                 cur_comm.len += nread;
1364                 pthread_mutex_lock(&gdb_lock);
1365                 check_command(fd);
1366                 pthread_mutex_unlock(&gdb_lock);
1367         }
1368 }
1369
1370 static void
1371 gdb_writable(int fd, enum ev_type event, void *arg)
1372 {
1373
1374         send_pending_data(fd);
1375 }
1376
1377 static void
1378 new_connection(int fd, enum ev_type event, void *arg)
1379 {
1380         int optval, s;
1381
1382         s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1383         if (s == -1) {
1384                 if (arg != NULL)
1385                         err(1, "Failed accepting initial GDB connection");
1386
1387                 /* Silently ignore errors post-startup. */
1388                 return;
1389         }
1390
1391         optval = 1;
1392         if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1393             -1) {
1394                 warn("Failed to disable SIGPIPE for GDB connection");
1395                 close(s);
1396                 return;
1397         }
1398
1399         pthread_mutex_lock(&gdb_lock);
1400         if (cur_fd != -1) {
1401                 close(s);
1402                 warnx("Ignoring additional GDB connection.");
1403         }
1404
1405         read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1406         if (read_event == NULL) {
1407                 if (arg != NULL)
1408                         err(1, "Failed to setup initial GDB connection");
1409                 pthread_mutex_unlock(&gdb_lock);
1410                 return;
1411         }
1412         write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1413         if (write_event == NULL) {
1414                 if (arg != NULL)
1415                         err(1, "Failed to setup initial GDB connection");
1416                 mevent_delete_close(read_event);
1417                 read_event = NULL;
1418         }
1419
1420         cur_fd = s;
1421         cur_vcpu = 0;
1422         stepping_vcpu = -1;
1423         stopped_vcpu = -1;
1424         stop_pending = false;
1425
1426         /* Break on attach. */
1427         first_stop = true;
1428         gdb_suspend_vcpus();
1429         pthread_mutex_unlock(&gdb_lock);
1430 }
1431
1432 #ifndef WITHOUT_CAPSICUM
1433 void
1434 limit_gdb_socket(int s)
1435 {
1436         cap_rights_t rights;
1437         unsigned long ioctls[] = { FIONREAD };
1438
1439         cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1440             CAP_SETSOCKOPT, CAP_IOCTL);
1441         if (caph_rights_limit(s, &rights) == -1)
1442                 errx(EX_OSERR, "Unable to apply rights for sandbox");
1443         if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1444                 errx(EX_OSERR, "Unable to apply rights for sandbox");
1445 }
1446 #endif
1447
1448 void
1449 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1450 {
1451         struct sockaddr_in sin;
1452         int error, flags, s;
1453
1454         debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1455
1456         error = pthread_mutex_init(&gdb_lock, NULL);
1457         if (error != 0)
1458                 errc(1, error, "gdb mutex init");
1459         error = pthread_cond_init(&idle_vcpus, NULL);
1460         if (error != 0)
1461                 errc(1, error, "gdb cv init");
1462
1463         ctx = _ctx;
1464         s = socket(PF_INET, SOCK_STREAM, 0);
1465         if (s < 0)
1466                 err(1, "gdb socket create");
1467
1468         sin.sin_len = sizeof(sin);
1469         sin.sin_family = AF_INET;
1470         sin.sin_addr.s_addr = htonl(INADDR_ANY);
1471         sin.sin_port = htons(sport);
1472
1473         if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1474                 err(1, "gdb socket bind");
1475
1476         if (listen(s, 1) < 0)
1477                 err(1, "gdb socket listen");
1478
1479         if (wait) {
1480                 /*
1481                  * Set vcpu 0 in vcpus_suspended.  This will trigger the
1482                  * logic in gdb_cpu_add() to suspend the first vcpu before
1483                  * it starts execution.  The vcpu will remain suspended
1484                  * until a debugger connects.
1485                  */
1486                 stepping_vcpu = -1;
1487                 stopped_vcpu = -1;
1488                 CPU_SET(0, &vcpus_suspended);
1489         }
1490
1491         flags = fcntl(s, F_GETFL);
1492         if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1493                 err(1, "Failed to mark gdb socket non-blocking");
1494
1495 #ifndef WITHOUT_CAPSICUM
1496         limit_gdb_socket(s);
1497 #endif
1498         mevent_add(s, EVF_READ, new_connection, NULL);
1499 }