]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/amd64/vmm/intel/vmcs.c
Initial support for bhyve save and restore.
[FreeBSD/FreeBSD.git] / sys / amd64 / vmm / intel / vmcs.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30
31 #include "opt_bhyve_snapshot.h"
32 #include "opt_ddb.h"
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include <sys/param.h>
38 #include <sys/sysctl.h>
39 #include <sys/systm.h>
40 #include <sys/pcpu.h>
41
42 #include <vm/vm.h>
43 #include <vm/pmap.h>
44
45 #include <machine/segments.h>
46 #include <machine/vmm.h>
47 #include <machine/vmm_snapshot.h>
48 #include "vmm_host.h"
49 #include "vmx_cpufunc.h"
50 #include "vmcs.h"
51 #include "ept.h"
52 #include "vmx.h"
53
54 #ifdef DDB
55 #include <ddb/ddb.h>
56 #endif
57
58 SYSCTL_DECL(_hw_vmm_vmx);
59
60 static int no_flush_rsb;
61 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
62     &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
63
64 static uint64_t
65 vmcs_fix_regval(uint32_t encoding, uint64_t val)
66 {
67
68         switch (encoding) {
69         case VMCS_GUEST_CR0:
70                 val = vmx_fix_cr0(val);
71                 break;
72         case VMCS_GUEST_CR4:
73                 val = vmx_fix_cr4(val);
74                 break;
75         default:
76                 break;
77         }
78         return (val);
79 }
80
81 static uint32_t
82 vmcs_field_encoding(int ident)
83 {
84         switch (ident) {
85         case VM_REG_GUEST_CR0:
86                 return (VMCS_GUEST_CR0);
87         case VM_REG_GUEST_CR3:
88                 return (VMCS_GUEST_CR3);
89         case VM_REG_GUEST_CR4:
90                 return (VMCS_GUEST_CR4);
91         case VM_REG_GUEST_DR7:
92                 return (VMCS_GUEST_DR7);
93         case VM_REG_GUEST_RSP:
94                 return (VMCS_GUEST_RSP);
95         case VM_REG_GUEST_RIP:
96                 return (VMCS_GUEST_RIP);
97         case VM_REG_GUEST_RFLAGS:
98                 return (VMCS_GUEST_RFLAGS);
99         case VM_REG_GUEST_ES:
100                 return (VMCS_GUEST_ES_SELECTOR);
101         case VM_REG_GUEST_CS:
102                 return (VMCS_GUEST_CS_SELECTOR);
103         case VM_REG_GUEST_SS:
104                 return (VMCS_GUEST_SS_SELECTOR);
105         case VM_REG_GUEST_DS:
106                 return (VMCS_GUEST_DS_SELECTOR);
107         case VM_REG_GUEST_FS:
108                 return (VMCS_GUEST_FS_SELECTOR);
109         case VM_REG_GUEST_GS:
110                 return (VMCS_GUEST_GS_SELECTOR);
111         case VM_REG_GUEST_TR:
112                 return (VMCS_GUEST_TR_SELECTOR);
113         case VM_REG_GUEST_LDTR:
114                 return (VMCS_GUEST_LDTR_SELECTOR);
115         case VM_REG_GUEST_EFER:
116                 return (VMCS_GUEST_IA32_EFER);
117         case VM_REG_GUEST_PDPTE0:
118                 return (VMCS_GUEST_PDPTE0);
119         case VM_REG_GUEST_PDPTE1:
120                 return (VMCS_GUEST_PDPTE1);
121         case VM_REG_GUEST_PDPTE2:
122                 return (VMCS_GUEST_PDPTE2);
123         case VM_REG_GUEST_PDPTE3:
124                 return (VMCS_GUEST_PDPTE3);
125         case VM_REG_GUEST_ENTRY_INST_LENGTH:
126                 return (VMCS_ENTRY_INST_LENGTH);
127         default:
128                 return (-1);
129         }
130
131 }
132
133 static int
134 vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
135 {
136
137         switch (seg) {
138         case VM_REG_GUEST_ES:
139                 *base = VMCS_GUEST_ES_BASE;
140                 *lim = VMCS_GUEST_ES_LIMIT;
141                 *acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
142                 break;
143         case VM_REG_GUEST_CS:
144                 *base = VMCS_GUEST_CS_BASE;
145                 *lim = VMCS_GUEST_CS_LIMIT;
146                 *acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
147                 break;
148         case VM_REG_GUEST_SS:
149                 *base = VMCS_GUEST_SS_BASE;
150                 *lim = VMCS_GUEST_SS_LIMIT;
151                 *acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
152                 break;
153         case VM_REG_GUEST_DS:
154                 *base = VMCS_GUEST_DS_BASE;
155                 *lim = VMCS_GUEST_DS_LIMIT;
156                 *acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
157                 break;
158         case VM_REG_GUEST_FS:
159                 *base = VMCS_GUEST_FS_BASE;
160                 *lim = VMCS_GUEST_FS_LIMIT;
161                 *acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
162                 break;
163         case VM_REG_GUEST_GS:
164                 *base = VMCS_GUEST_GS_BASE;
165                 *lim = VMCS_GUEST_GS_LIMIT;
166                 *acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
167                 break;
168         case VM_REG_GUEST_TR:
169                 *base = VMCS_GUEST_TR_BASE;
170                 *lim = VMCS_GUEST_TR_LIMIT;
171                 *acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
172                 break;
173         case VM_REG_GUEST_LDTR:
174                 *base = VMCS_GUEST_LDTR_BASE;
175                 *lim = VMCS_GUEST_LDTR_LIMIT;
176                 *acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
177                 break;
178         case VM_REG_GUEST_IDTR:
179                 *base = VMCS_GUEST_IDTR_BASE;
180                 *lim = VMCS_GUEST_IDTR_LIMIT;
181                 *acc = VMCS_INVALID_ENCODING;
182                 break;
183         case VM_REG_GUEST_GDTR:
184                 *base = VMCS_GUEST_GDTR_BASE;
185                 *lim = VMCS_GUEST_GDTR_LIMIT;
186                 *acc = VMCS_INVALID_ENCODING;
187                 break;
188         default:
189                 return (EINVAL);
190         }
191
192         return (0);
193 }
194
195 int
196 vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
197 {
198         int error;
199         uint32_t encoding;
200
201         /*
202          * If we need to get at vmx-specific state in the VMCS we can bypass
203          * the translation of 'ident' to 'encoding' by simply setting the
204          * sign bit. As it so happens the upper 16 bits are reserved (i.e
205          * set to 0) in the encodings for the VMCS so we are free to use the
206          * sign bit.
207          */
208         if (ident < 0)
209                 encoding = ident & 0x7fffffff;
210         else
211                 encoding = vmcs_field_encoding(ident);
212
213         if (encoding == (uint32_t)-1)
214                 return (EINVAL);
215
216         if (!running)
217                 VMPTRLD(vmcs);
218
219         error = vmread(encoding, retval);
220
221         if (!running)
222                 VMCLEAR(vmcs);
223
224         return (error);
225 }
226
227 int
228 vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
229 {
230         int error;
231         uint32_t encoding;
232
233         if (ident < 0)
234                 encoding = ident & 0x7fffffff;
235         else
236                 encoding = vmcs_field_encoding(ident);
237
238         if (encoding == (uint32_t)-1)
239                 return (EINVAL);
240
241         val = vmcs_fix_regval(encoding, val);
242
243         if (!running)
244                 VMPTRLD(vmcs);
245
246         error = vmwrite(encoding, val);
247
248         if (!running)
249                 VMCLEAR(vmcs);
250
251         return (error);
252 }
253
254 int
255 vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
256 {
257         int error;
258         uint32_t base, limit, access;
259
260         error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
261         if (error != 0)
262                 panic("vmcs_setdesc: invalid segment register %d", seg);
263
264         if (!running)
265                 VMPTRLD(vmcs);
266         if ((error = vmwrite(base, desc->base)) != 0)
267                 goto done;
268
269         if ((error = vmwrite(limit, desc->limit)) != 0)
270                 goto done;
271
272         if (access != VMCS_INVALID_ENCODING) {
273                 if ((error = vmwrite(access, desc->access)) != 0)
274                         goto done;
275         }
276 done:
277         if (!running)
278                 VMCLEAR(vmcs);
279         return (error);
280 }
281
282 int
283 vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
284 {
285         int error;
286         uint32_t base, limit, access;
287         uint64_t u64;
288
289         error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
290         if (error != 0)
291                 panic("vmcs_getdesc: invalid segment register %d", seg);
292
293         if (!running)
294                 VMPTRLD(vmcs);
295         if ((error = vmread(base, &u64)) != 0)
296                 goto done;
297         desc->base = u64;
298
299         if ((error = vmread(limit, &u64)) != 0)
300                 goto done;
301         desc->limit = u64;
302
303         if (access != VMCS_INVALID_ENCODING) {
304                 if ((error = vmread(access, &u64)) != 0)
305                         goto done;
306                 desc->access = u64;
307         }
308 done:
309         if (!running)
310                 VMCLEAR(vmcs);
311         return (error);
312 }
313
314 int
315 vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
316 {
317         int error;
318
319         VMPTRLD(vmcs);
320
321         /*
322          * Guest MSRs are saved in the VM-exit MSR-store area.
323          * Guest MSRs are loaded from the VM-entry MSR-load area.
324          * Both areas point to the same location in memory.
325          */
326         if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
327                 goto done;
328         if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
329                 goto done;
330
331         if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
332                 goto done;
333         if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
334                 goto done;
335
336         error = 0;
337 done:
338         VMCLEAR(vmcs);
339         return (error);
340 }
341
342 int
343 vmcs_init(struct vmcs *vmcs)
344 {
345         int error, codesel, datasel, tsssel;
346         u_long cr0, cr4, efer;
347         uint64_t pat, fsbase, idtrbase;
348
349         codesel = vmm_get_host_codesel();
350         datasel = vmm_get_host_datasel();
351         tsssel = vmm_get_host_tsssel();
352
353         /*
354          * Make sure we have a "current" VMCS to work with.
355          */
356         VMPTRLD(vmcs);
357
358         /* Host state */
359
360         /* Initialize host IA32_PAT MSR */
361         pat = vmm_get_host_pat();
362         if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
363                 goto done;
364
365         /* Load the IA32_EFER MSR */
366         efer = vmm_get_host_efer();
367         if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
368                 goto done;
369
370         /* Load the control registers */
371
372         cr0 = vmm_get_host_cr0();
373         if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
374                 goto done;
375         
376         cr4 = vmm_get_host_cr4() | CR4_VMXE;
377         if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
378                 goto done;
379
380         /* Load the segment selectors */
381         if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
382                 goto done;
383
384         if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
385                 goto done;
386
387         if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
388                 goto done;
389
390         if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
391                 goto done;
392
393         if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
394                 goto done;
395
396         if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
397                 goto done;
398
399         if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
400                 goto done;
401
402         /*
403          * Load the Base-Address for %fs and idtr.
404          *
405          * Note that we exclude %gs, tss and gdtr here because their base
406          * address is pcpu specific.
407          */
408         fsbase = vmm_get_host_fsbase();
409         if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
410                 goto done;
411
412         idtrbase = vmm_get_host_idtrbase();
413         if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
414                 goto done;
415
416         /* instruction pointer */
417         if (no_flush_rsb) {
418                 if ((error = vmwrite(VMCS_HOST_RIP,
419                     (u_long)vmx_exit_guest)) != 0)
420                         goto done;
421         } else {
422                 if ((error = vmwrite(VMCS_HOST_RIP,
423                     (u_long)vmx_exit_guest_flush_rsb)) != 0)
424                         goto done;
425         }
426
427         /* link pointer */
428         if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
429                 goto done;
430 done:
431         VMCLEAR(vmcs);
432         return (error);
433 }
434
435 #ifdef BHYVE_SNAPSHOT
436 int
437 vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val)
438 {
439         int error;
440
441         if (!running)
442                 VMPTRLD(vmcs);
443
444         error = vmread(ident, val);
445
446         if (!running)
447                 VMCLEAR(vmcs);
448
449         return (error);
450 }
451
452 int
453 vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val)
454 {
455         int error;
456
457         if (!running)
458                 VMPTRLD(vmcs);
459
460         error = vmwrite(ident, val);
461
462         if (!running)
463                 VMCLEAR(vmcs);
464
465         return (error);
466 }
467
468 int
469 vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
470                   struct vm_snapshot_meta *meta)
471 {
472         int ret;
473         uint64_t val;
474
475         if (meta->op == VM_SNAPSHOT_SAVE) {
476                 ret = vmcs_getreg(vmcs, running, ident, &val);
477                 if (ret != 0)
478                         goto done;
479
480                 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
481         } else if (meta->op == VM_SNAPSHOT_RESTORE) {
482                 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
483
484                 ret = vmcs_setreg(vmcs, running, ident, val);
485                 if (ret != 0)
486                         goto done;
487         } else {
488                 ret = EINVAL;
489                 goto done;
490         }
491
492 done:
493         return (ret);
494 }
495
496 int
497 vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
498                    struct vm_snapshot_meta *meta)
499 {
500         int ret;
501         struct seg_desc desc;
502
503         if (meta->op == VM_SNAPSHOT_SAVE) {
504                 ret = vmcs_getdesc(vmcs, running, seg, &desc);
505                 if (ret != 0)
506                         goto done;
507
508                 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
509                 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
510                 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
511         } else if (meta->op == VM_SNAPSHOT_RESTORE) {
512                 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
513                 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
514                 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
515
516                 ret = vmcs_setdesc(vmcs, running, seg, &desc);
517                 if (ret != 0)
518                         goto done;
519         } else {
520                 ret = EINVAL;
521                 goto done;
522         }
523
524 done:
525         return (ret);
526 }
527
528 int
529 vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
530                   struct vm_snapshot_meta *meta)
531 {
532         int ret;
533         uint64_t val;
534
535         if (meta->op == VM_SNAPSHOT_SAVE) {
536                 ret = vmcs_getany(vmcs, running, ident, &val);
537                 if (ret != 0)
538                         goto done;
539
540                 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
541         } else if (meta->op == VM_SNAPSHOT_RESTORE) {
542                 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
543
544                 ret = vmcs_setany(vmcs, running, ident, val);
545                 if (ret != 0)
546                         goto done;
547         } else {
548                 ret = EINVAL;
549                 goto done;
550         }
551
552 done:
553         return (ret);
554 }
555 #endif
556
557 #ifdef DDB
558 extern int vmxon_enabled[];
559
560 DB_SHOW_COMMAND(vmcs, db_show_vmcs)
561 {
562         uint64_t cur_vmcs, val;
563         uint32_t exit;
564
565         if (!vmxon_enabled[curcpu]) {
566                 db_printf("VMX not enabled\n");
567                 return;
568         }
569
570         if (have_addr) {
571                 db_printf("Only current VMCS supported\n");
572                 return;
573         }
574
575         vmptrst(&cur_vmcs);
576         if (cur_vmcs == VMCS_INITIAL) {
577                 db_printf("No current VM context\n");
578                 return;
579         }
580         db_printf("VMCS: %jx\n", cur_vmcs);
581         db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
582         db_printf("Activity: ");
583         val = vmcs_read(VMCS_GUEST_ACTIVITY);
584         switch (val) {
585         case 0:
586                 db_printf("Active");
587                 break;
588         case 1:
589                 db_printf("HLT");
590                 break;
591         case 2:
592                 db_printf("Shutdown");
593                 break;
594         case 3:
595                 db_printf("Wait for SIPI");
596                 break;
597         default:
598                 db_printf("Unknown: %#lx", val);
599         }
600         db_printf("\n");
601         exit = vmcs_read(VMCS_EXIT_REASON);
602         if (exit & 0x80000000)
603                 db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
604         else
605                 db_printf("Exit Reason: %u\n", exit & 0xffff);
606         db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
607         db_printf("Guest Linear Address: %#lx\n",
608             vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
609         switch (exit & 0x8000ffff) {
610         case EXIT_REASON_EXCEPTION:
611         case EXIT_REASON_EXT_INTR:
612                 val = vmcs_read(VMCS_EXIT_INTR_INFO);
613                 db_printf("Interrupt Type: ");
614                 switch (val >> 8 & 0x7) {
615                 case 0:
616                         db_printf("external");
617                         break;
618                 case 2:
619                         db_printf("NMI");
620                         break;
621                 case 3:
622                         db_printf("HW exception");
623                         break;
624                 case 4:
625                         db_printf("SW exception");
626                         break;
627                 default:
628                         db_printf("?? %lu", val >> 8 & 0x7);
629                         break;
630                 }
631                 db_printf("  Vector: %lu", val & 0xff);
632                 if (val & 0x800)
633                         db_printf("  Error Code: %lx",
634                             vmcs_read(VMCS_EXIT_INTR_ERRCODE));
635                 db_printf("\n");
636                 break;
637         case EXIT_REASON_EPT_FAULT:
638         case EXIT_REASON_EPT_MISCONFIG:
639                 db_printf("Guest Physical Address: %#lx\n",
640                     vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
641                 break;
642         }
643         db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
644 }
645 #endif