2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2011 NetApp, Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include "opt_bhyve_snapshot.h"
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
37 #include <sys/param.h>
38 #include <sys/sysctl.h>
39 #include <sys/systm.h>
45 #include <machine/segments.h>
46 #include <machine/vmm.h>
47 #include <machine/vmm_snapshot.h>
49 #include "vmx_cpufunc.h"
58 SYSCTL_DECL(_hw_vmm_vmx);
60 static int no_flush_rsb;
61 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
62 &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
65 vmcs_fix_regval(uint32_t encoding, uint64_t val)
70 val = vmx_fix_cr0(val);
73 val = vmx_fix_cr4(val);
82 vmcs_field_encoding(int ident)
85 case VM_REG_GUEST_CR0:
86 return (VMCS_GUEST_CR0);
87 case VM_REG_GUEST_CR3:
88 return (VMCS_GUEST_CR3);
89 case VM_REG_GUEST_CR4:
90 return (VMCS_GUEST_CR4);
91 case VM_REG_GUEST_DR7:
92 return (VMCS_GUEST_DR7);
93 case VM_REG_GUEST_RSP:
94 return (VMCS_GUEST_RSP);
95 case VM_REG_GUEST_RIP:
96 return (VMCS_GUEST_RIP);
97 case VM_REG_GUEST_RFLAGS:
98 return (VMCS_GUEST_RFLAGS);
100 return (VMCS_GUEST_ES_SELECTOR);
101 case VM_REG_GUEST_CS:
102 return (VMCS_GUEST_CS_SELECTOR);
103 case VM_REG_GUEST_SS:
104 return (VMCS_GUEST_SS_SELECTOR);
105 case VM_REG_GUEST_DS:
106 return (VMCS_GUEST_DS_SELECTOR);
107 case VM_REG_GUEST_FS:
108 return (VMCS_GUEST_FS_SELECTOR);
109 case VM_REG_GUEST_GS:
110 return (VMCS_GUEST_GS_SELECTOR);
111 case VM_REG_GUEST_TR:
112 return (VMCS_GUEST_TR_SELECTOR);
113 case VM_REG_GUEST_LDTR:
114 return (VMCS_GUEST_LDTR_SELECTOR);
115 case VM_REG_GUEST_EFER:
116 return (VMCS_GUEST_IA32_EFER);
117 case VM_REG_GUEST_PDPTE0:
118 return (VMCS_GUEST_PDPTE0);
119 case VM_REG_GUEST_PDPTE1:
120 return (VMCS_GUEST_PDPTE1);
121 case VM_REG_GUEST_PDPTE2:
122 return (VMCS_GUEST_PDPTE2);
123 case VM_REG_GUEST_PDPTE3:
124 return (VMCS_GUEST_PDPTE3);
125 case VM_REG_GUEST_ENTRY_INST_LENGTH:
126 return (VMCS_ENTRY_INST_LENGTH);
134 vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
138 case VM_REG_GUEST_ES:
139 *base = VMCS_GUEST_ES_BASE;
140 *lim = VMCS_GUEST_ES_LIMIT;
141 *acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
143 case VM_REG_GUEST_CS:
144 *base = VMCS_GUEST_CS_BASE;
145 *lim = VMCS_GUEST_CS_LIMIT;
146 *acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
148 case VM_REG_GUEST_SS:
149 *base = VMCS_GUEST_SS_BASE;
150 *lim = VMCS_GUEST_SS_LIMIT;
151 *acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
153 case VM_REG_GUEST_DS:
154 *base = VMCS_GUEST_DS_BASE;
155 *lim = VMCS_GUEST_DS_LIMIT;
156 *acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
158 case VM_REG_GUEST_FS:
159 *base = VMCS_GUEST_FS_BASE;
160 *lim = VMCS_GUEST_FS_LIMIT;
161 *acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
163 case VM_REG_GUEST_GS:
164 *base = VMCS_GUEST_GS_BASE;
165 *lim = VMCS_GUEST_GS_LIMIT;
166 *acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
168 case VM_REG_GUEST_TR:
169 *base = VMCS_GUEST_TR_BASE;
170 *lim = VMCS_GUEST_TR_LIMIT;
171 *acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
173 case VM_REG_GUEST_LDTR:
174 *base = VMCS_GUEST_LDTR_BASE;
175 *lim = VMCS_GUEST_LDTR_LIMIT;
176 *acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
178 case VM_REG_GUEST_IDTR:
179 *base = VMCS_GUEST_IDTR_BASE;
180 *lim = VMCS_GUEST_IDTR_LIMIT;
181 *acc = VMCS_INVALID_ENCODING;
183 case VM_REG_GUEST_GDTR:
184 *base = VMCS_GUEST_GDTR_BASE;
185 *lim = VMCS_GUEST_GDTR_LIMIT;
186 *acc = VMCS_INVALID_ENCODING;
196 vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
202 * If we need to get at vmx-specific state in the VMCS we can bypass
203 * the translation of 'ident' to 'encoding' by simply setting the
204 * sign bit. As it so happens the upper 16 bits are reserved (i.e
205 * set to 0) in the encodings for the VMCS so we are free to use the
209 encoding = ident & 0x7fffffff;
211 encoding = vmcs_field_encoding(ident);
213 if (encoding == (uint32_t)-1)
219 error = vmread(encoding, retval);
228 vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
234 encoding = ident & 0x7fffffff;
236 encoding = vmcs_field_encoding(ident);
238 if (encoding == (uint32_t)-1)
241 val = vmcs_fix_regval(encoding, val);
246 error = vmwrite(encoding, val);
255 vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
258 uint32_t base, limit, access;
260 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
262 panic("vmcs_setdesc: invalid segment register %d", seg);
266 if ((error = vmwrite(base, desc->base)) != 0)
269 if ((error = vmwrite(limit, desc->limit)) != 0)
272 if (access != VMCS_INVALID_ENCODING) {
273 if ((error = vmwrite(access, desc->access)) != 0)
283 vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
286 uint32_t base, limit, access;
289 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
291 panic("vmcs_getdesc: invalid segment register %d", seg);
295 if ((error = vmread(base, &u64)) != 0)
299 if ((error = vmread(limit, &u64)) != 0)
303 if (access != VMCS_INVALID_ENCODING) {
304 if ((error = vmread(access, &u64)) != 0)
315 vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
322 * Guest MSRs are saved in the VM-exit MSR-store area.
323 * Guest MSRs are loaded from the VM-entry MSR-load area.
324 * Both areas point to the same location in memory.
326 if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
328 if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
331 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
333 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
343 vmcs_init(struct vmcs *vmcs)
345 int error, codesel, datasel, tsssel;
346 u_long cr0, cr4, efer;
347 uint64_t pat, fsbase, idtrbase;
349 codesel = vmm_get_host_codesel();
350 datasel = vmm_get_host_datasel();
351 tsssel = vmm_get_host_tsssel();
354 * Make sure we have a "current" VMCS to work with.
360 /* Initialize host IA32_PAT MSR */
361 pat = vmm_get_host_pat();
362 if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
365 /* Load the IA32_EFER MSR */
366 efer = vmm_get_host_efer();
367 if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
370 /* Load the control registers */
372 cr0 = vmm_get_host_cr0();
373 if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
376 cr4 = vmm_get_host_cr4() | CR4_VMXE;
377 if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
380 /* Load the segment selectors */
381 if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
384 if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
387 if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
390 if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
393 if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
396 if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
399 if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
403 * Load the Base-Address for %fs and idtr.
405 * Note that we exclude %gs, tss and gdtr here because their base
406 * address is pcpu specific.
408 fsbase = vmm_get_host_fsbase();
409 if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
412 idtrbase = vmm_get_host_idtrbase();
413 if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
416 /* instruction pointer */
418 if ((error = vmwrite(VMCS_HOST_RIP,
419 (u_long)vmx_exit_guest)) != 0)
422 if ((error = vmwrite(VMCS_HOST_RIP,
423 (u_long)vmx_exit_guest_flush_rsb)) != 0)
428 if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
435 #ifdef BHYVE_SNAPSHOT
437 vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val)
444 error = vmread(ident, val);
453 vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val)
460 error = vmwrite(ident, val);
469 vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
470 struct vm_snapshot_meta *meta)
475 if (meta->op == VM_SNAPSHOT_SAVE) {
476 ret = vmcs_getreg(vmcs, running, ident, &val);
480 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
481 } else if (meta->op == VM_SNAPSHOT_RESTORE) {
482 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
484 ret = vmcs_setreg(vmcs, running, ident, val);
497 vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
498 struct vm_snapshot_meta *meta)
501 struct seg_desc desc;
503 if (meta->op == VM_SNAPSHOT_SAVE) {
504 ret = vmcs_getdesc(vmcs, running, seg, &desc);
508 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
509 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
510 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
511 } else if (meta->op == VM_SNAPSHOT_RESTORE) {
512 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
513 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
514 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
516 ret = vmcs_setdesc(vmcs, running, seg, &desc);
529 vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
530 struct vm_snapshot_meta *meta)
535 if (meta->op == VM_SNAPSHOT_SAVE) {
536 ret = vmcs_getany(vmcs, running, ident, &val);
540 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
541 } else if (meta->op == VM_SNAPSHOT_RESTORE) {
542 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
544 ret = vmcs_setany(vmcs, running, ident, val);
558 extern int vmxon_enabled[];
560 DB_SHOW_COMMAND(vmcs, db_show_vmcs)
562 uint64_t cur_vmcs, val;
565 if (!vmxon_enabled[curcpu]) {
566 db_printf("VMX not enabled\n");
571 db_printf("Only current VMCS supported\n");
576 if (cur_vmcs == VMCS_INITIAL) {
577 db_printf("No current VM context\n");
580 db_printf("VMCS: %jx\n", cur_vmcs);
581 db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
582 db_printf("Activity: ");
583 val = vmcs_read(VMCS_GUEST_ACTIVITY);
592 db_printf("Shutdown");
595 db_printf("Wait for SIPI");
598 db_printf("Unknown: %#lx", val);
601 exit = vmcs_read(VMCS_EXIT_REASON);
602 if (exit & 0x80000000)
603 db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
605 db_printf("Exit Reason: %u\n", exit & 0xffff);
606 db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
607 db_printf("Guest Linear Address: %#lx\n",
608 vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
609 switch (exit & 0x8000ffff) {
610 case EXIT_REASON_EXCEPTION:
611 case EXIT_REASON_EXT_INTR:
612 val = vmcs_read(VMCS_EXIT_INTR_INFO);
613 db_printf("Interrupt Type: ");
614 switch (val >> 8 & 0x7) {
616 db_printf("external");
622 db_printf("HW exception");
625 db_printf("SW exception");
628 db_printf("?? %lu", val >> 8 & 0x7);
631 db_printf(" Vector: %lu", val & 0xff);
633 db_printf(" Error Code: %lx",
634 vmcs_read(VMCS_EXIT_INTR_ERRCODE));
637 case EXIT_REASON_EPT_FAULT:
638 case EXIT_REASON_EPT_MISCONFIG:
639 db_printf("Guest Physical Address: %#lx\n",
640 vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
643 db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());