2 * Copyright (c) 2011 NetApp, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/module.h>
35 #include <sys/sysctl.h>
36 #include <sys/malloc.h>
39 #include <sys/mutex.h>
41 #include <sys/sched.h>
43 #include <sys/systm.h>
47 #include <machine/vm.h>
48 #include <machine/pcb.h>
49 #include <x86/apicreg.h>
51 #include <machine/vmm.h>
54 #include <machine/vmm_dev.h>
67 int pincpu; /* host cpuid this vcpu is bound to */
68 int hostcpu; /* host cpuid this vcpu last ran on */
69 uint64_t guest_msrs[VMM_MSR_NUM];
70 struct vlapic *vlapic;
72 struct savefpu savefpu; /* guest fpu state */
75 #define VCPU_F_PINNED 0x0001
76 #define VCPU_F_RUNNING 0x0002
78 #define VCPU_PINCPU(vm, vcpuid) \
79 ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
81 #define VCPU_UNPIN(vm, vcpuid) (vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED)
83 #define VCPU_PIN(vm, vcpuid, host_cpuid) \
85 vm->vcpu[vcpuid].flags |= VCPU_F_PINNED; \
86 vm->vcpu[vcpuid].pincpu = host_cpuid; \
89 #define VM_MAX_MEMORY_SEGMENTS 2
92 void *cookie; /* processor-specific data */
93 void *iommu; /* iommu-specific data */
94 struct vcpu vcpu[VM_MAXCPU];
96 struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
97 char name[VM_MAX_NAMELEN];
100 * Mask of active vcpus.
101 * An active vcpu is one that has been started implicitly (BSP) or
102 * explicitly (AP) by sending it a startup ipi.
104 cpumask_t active_cpus;
107 static struct vmm_ops *ops;
108 #define VMM_INIT() (ops != NULL ? (*ops->init)() : 0)
109 #define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0)
111 #define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL)
112 #define VMRUN(vmi, vcpu, rip, vmexit) \
113 (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, vmexit) : ENXIO)
114 #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
115 #define VMMMAP(vmi, gpa, hpa, len, attr, prot, spm) \
116 (ops != NULL ? (*ops->vmmmap)(vmi, gpa, hpa, len, attr, prot, spm) : ENXIO)
117 #define VMGETREG(vmi, vcpu, num, retval) \
118 (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
119 #define VMSETREG(vmi, vcpu, num, val) \
120 (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
121 #define VMGETDESC(vmi, vcpu, num, desc) \
122 (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
123 #define VMSETDESC(vmi, vcpu, num, desc) \
124 (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
125 #define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \
126 (ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO)
127 #define VMNMI(vmi, vcpu) \
128 (ops != NULL ? (*ops->vmnmi)(vmi, vcpu) : ENXIO)
129 #define VMGETCAP(vmi, vcpu, num, retval) \
130 (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
131 #define VMSETCAP(vmi, vcpu, num, val) \
132 (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
134 #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
135 #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
136 #define fpu_start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
137 : : "n" (CR0_TS) : "ax")
138 #define fpu_stop_emulating() __asm("clts")
140 static MALLOC_DEFINE(M_VM, "vm", "vm");
141 CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */
144 static VMM_STAT_DEFINE(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
147 vcpu_cleanup(struct vcpu *vcpu)
149 vlapic_cleanup(vcpu->vlapic);
150 vmm_stat_free(vcpu->stats);
154 vcpu_init(struct vm *vm, uint32_t vcpu_id)
158 vcpu = &vm->vcpu[vcpu_id];
161 vcpu->vcpuid = vcpu_id;
162 vcpu->vlapic = vlapic_init(vm, vcpu_id);
163 fpugetregs(curthread);
164 vcpu->savefpu = curthread->td_pcb->pcb_user_save;
165 vcpu->stats = vmm_stat_alloc();
175 error = vmm_mem_init();
180 ops = &vmm_ops_intel;
181 else if (vmm_is_amd())
192 vmm_handler(module_t mod, int what, void *arg)
206 error = VMM_CLEANUP();
215 static moduledata_t vmm_kmod = {
222 * Execute the module load handler after the pci passthru driver has had
223 * a chance to claim devices. We need this information at the time we do
224 * iommu initialization.
226 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_CONFIGURE + 1, SI_ORDER_ANY);
227 MODULE_VERSION(vmm, 1);
229 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
232 vm_create(const char *name)
240 if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
243 vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
244 strcpy(vm->name, name);
245 vm->cookie = VMINIT(vm);
247 for (i = 0; i < VM_MAXCPU; i++) {
249 guest_msrs_init(vm, i);
252 maxaddr = vmm_mem_maxaddr();
253 vm->iommu = iommu_create_domain(maxaddr);
254 vm_activate_cpu(vm, BSP);
260 vm_destroy(struct vm *vm)
264 ppt_unassign_all(vm);
266 for (i = 0; i < vm->num_mem_segs; i++)
267 vmm_mem_free(vm->mem_segs[i].hpa, vm->mem_segs[i].len);
269 for (i = 0; i < VM_MAXCPU; i++)
270 vcpu_cleanup(&vm->vcpu[i]);
272 iommu_destroy_domain(vm->iommu);
274 VMCLEANUP(vm->cookie);
280 vm_name(struct vm *vm)
286 vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
288 const boolean_t spok = TRUE; /* superpage mappings are ok */
290 return (VMMMAP(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE,
295 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
297 const boolean_t spok = TRUE; /* superpage mappings are ok */
299 return (VMMMAP(vm->cookie, gpa, 0, len, VM_MEMATTR_UNCACHEABLE,
300 VM_PROT_NONE, spok));
304 vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t *ret_hpa)
309 const boolean_t spok = TRUE; /* superpage mappings are ok */
312 * find the hpa if already it was already vm_malloc'd.
314 hpa = vm_gpa2hpa(vm, gpa, len);
315 if (hpa != ((vm_paddr_t)-1))
318 if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
321 hpa = vmm_mem_alloc(len);
325 error = VMMMAP(vm->cookie, gpa, hpa, len, VM_MEMATTR_WRITE_BACK,
328 vmm_mem_free(hpa, len);
332 iommu_create_mapping(vm->iommu, gpa, hpa, len);
334 vm->mem_segs[vm->num_mem_segs].gpa = gpa;
335 vm->mem_segs[vm->num_mem_segs].hpa = hpa;
336 vm->mem_segs[vm->num_mem_segs].len = len;
344 vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len)
347 vm_paddr_t gpabase, gpalimit, hpabase;
349 for (i = 0; i < vm->num_mem_segs; i++) {
350 hpabase = vm->mem_segs[i].hpa;
351 gpabase = vm->mem_segs[i].gpa;
352 gpalimit = gpabase + vm->mem_segs[i].len;
353 if (gpa >= gpabase && gpa + len <= gpalimit)
354 return ((gpa - gpabase) + hpabase);
356 return ((vm_paddr_t)-1);
360 vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
361 struct vm_memory_segment *seg)
365 for (i = 0; i < vm->num_mem_segs; i++) {
366 if (gpabase == vm->mem_segs[i].gpa) {
367 *seg = vm->mem_segs[i];
375 vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
378 if (vcpu < 0 || vcpu >= VM_MAXCPU)
381 if (reg >= VM_REG_LAST)
384 return (VMGETREG(vm->cookie, vcpu, reg, retval));
388 vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
391 if (vcpu < 0 || vcpu >= VM_MAXCPU)
394 if (reg >= VM_REG_LAST)
397 return (VMSETREG(vm->cookie, vcpu, reg, val));
401 is_descriptor_table(int reg)
405 case VM_REG_GUEST_IDTR:
406 case VM_REG_GUEST_GDTR:
414 is_segment_register(int reg)
418 case VM_REG_GUEST_ES:
419 case VM_REG_GUEST_CS:
420 case VM_REG_GUEST_SS:
421 case VM_REG_GUEST_DS:
422 case VM_REG_GUEST_FS:
423 case VM_REG_GUEST_GS:
424 case VM_REG_GUEST_TR:
425 case VM_REG_GUEST_LDTR:
433 vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
434 struct seg_desc *desc)
437 if (vcpu < 0 || vcpu >= VM_MAXCPU)
440 if (!is_segment_register(reg) && !is_descriptor_table(reg))
443 return (VMGETDESC(vm->cookie, vcpu, reg, desc));
447 vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
448 struct seg_desc *desc)
450 if (vcpu < 0 || vcpu >= VM_MAXCPU)
453 if (!is_segment_register(reg) && !is_descriptor_table(reg))
456 return (VMSETDESC(vm->cookie, vcpu, reg, desc));
460 vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid)
463 if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
466 *cpuid = VCPU_PINCPU(vm, vcpuid);
472 vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid)
476 if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
479 td = curthread; /* XXXSMP only safe when muxing vcpus */
482 if (host_cpuid < 0) {
483 VCPU_UNPIN(vm, vcpuid);
490 if (CPU_ABSENT(host_cpuid))
494 * XXX we should check that 'host_cpuid' has not already been pinned
498 sched_bind(td, host_cpuid);
500 VCPU_PIN(vm, vcpuid, host_cpuid);
506 restore_guest_fpustate(struct vcpu *vcpu)
511 fpu_stop_emulating();
512 fxrstor(&vcpu->savefpu);
513 fpu_start_emulating();
518 save_guest_fpustate(struct vcpu *vcpu)
523 fpu_stop_emulating();
524 fxsave(&vcpu->savefpu);
525 fpu_start_emulating();
530 vm_run(struct vm *vm, struct vm_run *vmrun)
537 vcpuid = vmrun->cpuid;
539 if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
542 vcpu = &vm->vcpu[vcpuid];
548 pcb = PCPU_GET(curpcb);
549 set_pcb_flags(pcb, PCB_FULL_IRET);
551 vcpu->hostcpu = curcpu;
554 restore_guest_msrs(vm, vcpuid);
555 restore_guest_fpustate(vcpu);
556 error = VMRUN(vm->cookie, vcpuid, vmrun->rip, &vmrun->vm_exit);
557 save_guest_fpustate(vcpu);
558 restore_host_msrs(vm, vcpuid);
560 vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
568 vm_inject_event(struct vm *vm, int vcpuid, int type,
569 int vector, uint32_t code, int code_valid)
571 if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
574 if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0)
577 if (vector < 0 || vector > 255)
580 return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid));
584 vm_inject_nmi(struct vm *vm, int vcpu)
588 if (vcpu < 0 || vcpu >= VM_MAXCPU)
591 error = VMNMI(vm->cookie, vcpu);
592 vm_interrupt_hostcpu(vm, vcpu);
597 vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
599 if (vcpu < 0 || vcpu >= VM_MAXCPU)
602 if (type < 0 || type >= VM_CAP_MAX)
605 return (VMGETCAP(vm->cookie, vcpu, type, retval));
609 vm_set_capability(struct vm *vm, int vcpu, int type, int val)
611 if (vcpu < 0 || vcpu >= VM_MAXCPU)
614 if (type < 0 || type >= VM_CAP_MAX)
617 return (VMSETCAP(vm->cookie, vcpu, type, val));
621 vm_guest_msrs(struct vm *vm, int cpu)
623 return (vm->vcpu[cpu].guest_msrs);
627 vm_lapic(struct vm *vm, int cpu)
629 return (vm->vcpu[cpu].vlapic);
633 vmm_is_pptdev(int bus, int slot, int func)
635 int found, b, s, f, n;
636 char *val, *cp, *cp2;
639 * setenv pptdevs "1/2/3 4/5/6 7/8/9 10/11/12"
642 cp = val = getenv("pptdevs");
643 while (cp != NULL && *cp != '\0') {
644 if ((cp2 = strchr(cp, ' ')) != NULL)
647 n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
648 if (n == 3 && bus == b && slot == s && func == f) {
663 vm_iommu_domain(struct vm *vm)
670 vm_set_run_state(struct vm *vm, int vcpuid, int state)
674 if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
675 panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
677 vcpu = &vm->vcpu[vcpuid];
679 if (state == VCPU_RUNNING) {
680 if (vcpu->flags & VCPU_F_RUNNING) {
681 panic("vm_set_run_state: %s[%d] is already running",
682 vm_name(vm), vcpuid);
684 vcpu->flags |= VCPU_F_RUNNING;
686 if ((vcpu->flags & VCPU_F_RUNNING) == 0) {
687 panic("vm_set_run_state: %s[%d] is already stopped",
688 vm_name(vm), vcpuid);
690 vcpu->flags &= ~VCPU_F_RUNNING;
695 vm_get_run_state(struct vm *vm, int vcpuid, int *cpuptr)
700 if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
701 panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
703 vcpu = &vm->vcpu[vcpuid];
704 if (vcpu->flags & VCPU_F_RUNNING) {
705 retval = VCPU_RUNNING;
706 hostcpu = vcpu->hostcpu;
708 retval = VCPU_STOPPED;
719 vm_activate_cpu(struct vm *vm, int vcpuid)
722 if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
723 vm->active_cpus |= vcpu_mask(vcpuid);
727 vm_active_cpus(struct vm *vm)
730 return (vm->active_cpus);
734 vcpu_stats(struct vm *vm, int vcpuid)
737 return (vm->vcpu[vcpuid].stats);