From 27087c20242bc2222b63195bc86547848bcd3a55 Mon Sep 17 00:00:00 2001 From: jhb Date: Mon, 21 Jul 2014 19:08:02 +0000 Subject: [PATCH] MFC 264353,264509,264768,264770,264825,264846,264988,265114,265165,265365, 265941,265951,266390,266550,266910: Various bhyve fixes: - Don't save host's return address in 'struct vmxctx'. - Permit non-32-bit accesses to local APIC registers. - Factor out common ioport handler code. - Use calloc() in favor of malloc + memset. - Change the vlapic timer frequency to be in the ballpark of contemporary hardware. - Allow the guest to read the TSC via MSR 0x10. - A VMCS is always inactive when it exits the vmx_run() loop. Remove redundant code and the misleading comment that suggest otherwise. - Ignore writes to microcode update MSR. This MSR is accessed by RHEL7 guest. Add KTR tracepoints to annotate wrmsr and rdmsr VM exits. - Provide an alias for the userboot console and name it 'comconsole'. - Use EV_ADD to create an mevent and EV_ENABLE to enable it. - abort(3) the process in response to a VMEXIT_ABORT. - Don't include the guest memory segments in the bhyve(8) process core dump. - Make the vmx asm code dtrace-fbt-friendly. - Allow vmx_getdesc() and vmx_setdesc() to be called for a vcpu that is in the VCPU_RUNNING state. - Enable VMX in the IA32_FEATURE_CONTROL MSR if it not enabled and the MSR isn't locked. git-svn-id: svn://svn.freebsd.org/base/stable/10@268953 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- lib/libvmmapi/vmmapi.c | 18 +++++++-- lib/libvmmapi/vmmapi.h | 3 ++ sys/amd64/vmm/intel/vmcs.c | 16 +++++--- sys/amd64/vmm/intel/vmcs.h | 4 +- sys/amd64/vmm/intel/vmx.c | 45 ++++++++++++++++------ sys/amd64/vmm/intel/vmx.h | 6 +-- sys/amd64/vmm/intel/vmx_genassym.c | 1 - sys/amd64/vmm/intel/vmx_support.S | 35 ++++++++++------- sys/amd64/vmm/io/vlapic.c | 7 +++- sys/amd64/vmm/vmm_ioport.c | 34 ++++++---------- sys/amd64/vmm/vmm_lapic.c | 8 ++-- sys/boot/userboot/userboot/conf.c | 2 + sys/boot/userboot/userboot/userboot_cons.c | 44 +++++++++++++++++++++ usr.sbin/bhyve/bhyve.8 | 4 +- usr.sbin/bhyve/bhyverun.c | 22 ++++++----- usr.sbin/bhyve/block_if.c | 3 +- usr.sbin/bhyve/inout.c | 34 ++++++---------- usr.sbin/bhyve/mevent.c | 22 ++++++----- usr.sbin/bhyve/pci_ahci.c | 3 +- usr.sbin/bhyve/pci_emul.c | 9 ++--- usr.sbin/bhyve/pci_passthru.c | 6 +-- usr.sbin/bhyve/pci_virtio_block.c | 3 +- usr.sbin/bhyve/pci_virtio_net.c | 3 +- usr.sbin/bhyve/pci_virtio_rnd.c | 3 +- usr.sbin/bhyve/uart_emul.c | 3 +- usr.sbin/bhyve/xmsr.c | 2 + 26 files changed, 209 insertions(+), 131 deletions(-) diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 4a7f852c6..b853ae7e2 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -57,6 +57,7 @@ struct vmctx { int fd; uint32_t lowmem_limit; enum vm_mmap_style vms; + int memflags; size_t lowmem; char *lowmem_addr; size_t highmem; @@ -101,6 +102,7 @@ vm_open(const char *name) assert(vm != NULL); vm->fd = -1; + vm->memflags = 0; vm->lowmem_limit = 3 * GB; vm->name = (char *)(vm + 1); strcpy(vm->name, name); @@ -180,10 +182,17 @@ vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit) ctx->lowmem_limit = limit; } +void +vm_set_memflags(struct vmctx *ctx, int flags) +{ + + ctx->memflags = flags; +} + static int setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr) { - int error; + int error, mmap_flags; struct vm_memory_segment seg; /* @@ -195,8 +204,11 @@ setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr) seg.len = len; error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg); if (error == 0 && addr != NULL) { - *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, - ctx->fd, gpa); + mmap_flags = MAP_SHARED; + if ((ctx->memflags & VM_MEM_F_INCORE) == 0) + mmap_flags |= MAP_NOCORE; + *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, mmap_flags, + ctx->fd, gpa); } return (error); } diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 2a2ca6b7f..c1a4b35b3 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -42,6 +42,8 @@ enum vm_mmap_style { VM_MMAP_SPARSE, /* mappings created on-demand */ }; +#define VM_MEM_F_INCORE 0x01 /* include guest memory in core file */ + int vm_create(const char *name); struct vmctx *vm_open(const char *name); void vm_destroy(struct vmctx *ctx); @@ -53,6 +55,7 @@ void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len); int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num); uint32_t vm_get_lowmem_limit(struct vmctx *ctx); void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit); +void vm_set_memflags(struct vmctx *ctx, int flags); int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t base, uint32_t limit, uint32_t access); int vm_get_desc(struct vmctx *ctx, int vcpu, int reg, diff --git a/sys/amd64/vmm/intel/vmcs.c b/sys/amd64/vmm/intel/vmcs.c index 1ddefe0d6..cc97d9516 100644 --- a/sys/amd64/vmm/intel/vmcs.c +++ b/sys/amd64/vmm/intel/vmcs.c @@ -231,7 +231,7 @@ vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val) } int -vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc) +vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) { int error; uint32_t base, limit, access; @@ -240,7 +240,8 @@ vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc) if (error != 0) panic("vmcs_setdesc: invalid segment register %d", seg); - VMPTRLD(vmcs); + if (!running) + VMPTRLD(vmcs); if ((error = vmwrite(base, desc->base)) != 0) goto done; @@ -252,12 +253,13 @@ vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc) goto done; } done: - VMCLEAR(vmcs); + if (!running) + VMCLEAR(vmcs); return (error); } int -vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc) +vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) { int error; uint32_t base, limit, access; @@ -267,7 +269,8 @@ vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc) if (error != 0) panic("vmcs_getdesc: invalid segment register %d", seg); - VMPTRLD(vmcs); + if (!running) + VMPTRLD(vmcs); if ((error = vmread(base, &u64)) != 0) goto done; desc->base = u64; @@ -282,7 +285,8 @@ vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc) desc->access = u64; } done: - VMCLEAR(vmcs); + if (!running) + VMCLEAR(vmcs); return (error); } diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h index 9cde99903..657d5b0f6 100644 --- a/sys/amd64/vmm/intel/vmcs.h +++ b/sys/amd64/vmm/intel/vmcs.h @@ -49,9 +49,9 @@ int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count); int vmcs_init(struct vmcs *vmcs); int vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *rv); int vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val); -int vmcs_getdesc(struct vmcs *vmcs, int ident, +int vmcs_getdesc(struct vmcs *vmcs, int running, int ident, struct seg_desc *desc); -int vmcs_setdesc(struct vmcs *vmcs, int ident, +int vmcs_setdesc(struct vmcs *vmcs, int running, int ident, struct seg_desc *desc); static __inline uint64_t diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 1c39552ad..e85e5e441 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -509,6 +509,15 @@ static void vmx_enable(void *arg __unused) { int error; + uint64_t feature_control; + + feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 0 || + (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) { + wrmsr(MSR_IA32_FEATURE_CONTROL, + feature_control | IA32_FEATURE_CONTROL_VMX_EN | + IA32_FEATURE_CONTROL_LOCK); + } load_cr4(rcr4() | CR4_VMXE); @@ -544,7 +553,7 @@ vmx_init(int ipinum) * are set (bits 0 and 2 respectively). */ feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); - if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 0 || + if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 1 && (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) { printf("vmx_init: VMX operation disabled by BIOS\n"); return (ENXIO); @@ -863,6 +872,11 @@ vmx_vminit(struct vm *vm, pmap_t pmap) * MSR_EFER is saved and restored in the guest VMCS area on a * VM exit and entry respectively. It is also restored from the * host VMCS area on a VM exit. + * + * The TSC MSR is exposed read-only. Writes are disallowed as that + * will impact the host TSC. + * XXX Writes would be implemented with a wrmsr trap, and + * then modifying the TSC offset in the VMCS. */ if (guest_msr_rw(vmx, MSR_GSBASE) || guest_msr_rw(vmx, MSR_FSBASE) || @@ -870,7 +884,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap) guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) || guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) || guest_msr_rw(vmx, MSR_KGSBASE) || - guest_msr_rw(vmx, MSR_EFER)) + guest_msr_rw(vmx, MSR_EFER) || + guest_msr_ro(vmx, MSR_TSC)) panic("vmx_vminit: error setting guest msr access"); /* @@ -1829,6 +1844,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1); retu = false; ecx = vmxctx->guest_rcx; + VCPU_CTR1(vmx->vm, vcpu, "rdmsr 0x%08x", ecx); error = emulate_rdmsr(vmx->vm, vcpu, ecx, &retu); if (error) { vmexit->exitcode = VM_EXITCODE_RDMSR; @@ -1847,6 +1863,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) eax = vmxctx->guest_rax; ecx = vmxctx->guest_rcx; edx = vmxctx->guest_rdx; + VCPU_CTR2(vmx->vm, vcpu, "wrmsr 0x%08x value 0x%016lx", + ecx, (uint64_t)edx << 32 | eax); error = emulate_wrmsr(vmx->vm, vcpu, ecx, (uint64_t)edx << 32 | eax, &retu); if (error) { @@ -2257,7 +2275,7 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap, static void vmx_vmcleanup(void *arg) { - int i, error; + int i; struct vmx *vmx = arg; if (apic_access_virtualization(vmx, 0)) @@ -2266,13 +2284,6 @@ vmx_vmcleanup(void *arg) for (i = 0; i < VM_MAXCPU; i++) vpid_free(vmx->state[i].vpid); - /* - * XXXSMP we also need to clear the VMCS active on the other vcpus. - */ - error = vmclear(&vmx->vmcs[0]); - if (error != 0) - panic("vmx_vmcleanup: vmclear error %d on vcpu 0", error); - free(vmx, M_VMX); return; @@ -2430,17 +2441,27 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) { + int hostcpu, running; struct vmx *vmx = arg; - return (vmcs_getdesc(&vmx->vmcs[vcpu], reg, desc)); + running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), vcpu); + + return (vmcs_getdesc(&vmx->vmcs[vcpu], running, reg, desc)); } static int vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) { + int hostcpu, running; struct vmx *vmx = arg; - return (vmcs_setdesc(&vmx->vmcs[vcpu], reg, desc)); + running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu); + + return (vmcs_setdesc(&vmx->vmcs[vcpu], running, reg, desc)); } static int diff --git a/sys/amd64/vmm/intel/vmx.h b/sys/amd64/vmm/intel/vmx.h index 80bfd72a4..208fceed9 100644 --- a/sys/amd64/vmm/intel/vmx.h +++ b/sys/amd64/vmm/intel/vmx.h @@ -60,7 +60,6 @@ struct vmxctx { register_t host_rbp; register_t host_rsp; register_t host_rbx; - register_t host_rip; /* * XXX todo debug registers and fpu state */ @@ -68,7 +67,7 @@ struct vmxctx { int inst_fail_status; /* - * The pmap needs to be deactivated in vmx_exit_guest() + * The pmap needs to be deactivated in vmx_enter_guest() * so keep a copy of the 'pmap' in each vmxctx. */ struct pmap *pmap; @@ -122,10 +121,11 @@ CTASSERT((offsetof(struct vmx, pir_desc[0]) & 63) == 0); #define VMX_VMLAUNCH_ERROR 2 #define VMX_INVEPT_ERROR 3 int vmx_enter_guest(struct vmxctx *ctx, struct vmx *vmx, int launched); -void vmx_exit_guest(void); void vmx_call_isr(uintptr_t entry); u_long vmx_fix_cr0(u_long cr0); u_long vmx_fix_cr4(u_long cr4); +extern char vmx_exit_guest[]; + #endif diff --git a/sys/amd64/vmm/intel/vmx_genassym.c b/sys/amd64/vmm/intel/vmx_genassym.c index 5c91fecb5..e1b98d63f 100644 --- a/sys/amd64/vmm/intel/vmx_genassym.c +++ b/sys/amd64/vmm/intel/vmx_genassym.c @@ -65,7 +65,6 @@ ASSYM(VMXCTX_HOST_R12, offsetof(struct vmxctx, host_r12)); ASSYM(VMXCTX_HOST_RBP, offsetof(struct vmxctx, host_rbp)); ASSYM(VMXCTX_HOST_RSP, offsetof(struct vmxctx, host_rsp)); ASSYM(VMXCTX_HOST_RBX, offsetof(struct vmxctx, host_rbx)); -ASSYM(VMXCTX_HOST_RIP, offsetof(struct vmxctx, host_rip)); ASSYM(VMXCTX_INST_FAIL_STATUS, offsetof(struct vmxctx, inst_fail_status)); ASSYM(VMXCTX_PMAP, offsetof(struct vmxctx, pmap)); diff --git a/sys/amd64/vmm/intel/vmx_support.S b/sys/amd64/vmm/intel/vmx_support.S index 9e8cf2de7..840b7e061 100644 --- a/sys/amd64/vmm/intel/vmx_support.S +++ b/sys/amd64/vmm/intel/vmx_support.S @@ -37,6 +37,10 @@ #define LK #endif +/* Be friendly to DTrace FBT's prologue/epilogue pattern matching */ +#define VENTER push %rbp ; mov %rsp,%rbp +#define VLEAVE pop %rbp + /* * Assumes that %rdi holds a pointer to the 'vmxctx'. * @@ -72,8 +76,7 @@ * * Assumes that %rdi holds a pointer to the 'vmxctx'. */ -#define VMX_HOST_SAVE(tmpreg) \ - movq (%rsp), tmpreg; /* return address */ \ +#define VMX_HOST_SAVE \ movq %r15, VMXCTX_HOST_R15(%rdi); \ movq %r14, VMXCTX_HOST_R14(%rdi); \ movq %r13, VMXCTX_HOST_R13(%rdi); \ @@ -81,9 +84,8 @@ movq %rbp, VMXCTX_HOST_RBP(%rdi); \ movq %rsp, VMXCTX_HOST_RSP(%rdi); \ movq %rbx, VMXCTX_HOST_RBX(%rdi); \ - movq tmpreg, VMXCTX_HOST_RIP(%rdi) -#define VMX_HOST_RESTORE(tmpreg) \ +#define VMX_HOST_RESTORE \ movq VMXCTX_HOST_R15(%rdi), %r15; \ movq VMXCTX_HOST_R14(%rdi), %r14; \ movq VMXCTX_HOST_R13(%rdi), %r13; \ @@ -91,8 +93,6 @@ movq VMXCTX_HOST_RBP(%rdi), %rbp; \ movq VMXCTX_HOST_RSP(%rdi), %rsp; \ movq VMXCTX_HOST_RBX(%rdi), %rbx; \ - movq VMXCTX_HOST_RIP(%rdi), tmpreg; \ - movq tmpreg, (%rsp) /* return address */ /* * vmx_enter_guest(struct vmxctx *vmxctx, int launched) @@ -102,10 +102,11 @@ * Interrupts must be disabled on entry. */ ENTRY(vmx_enter_guest) + VENTER /* * Save host state before doing anything else. */ - VMX_HOST_SAVE(%r10) + VMX_HOST_SAVE /* * Activate guest pmap on this cpu. @@ -186,15 +187,18 @@ inst_error: movl PCPU(CPUID), %r10d LK btrl %r10d, PM_ACTIVE(%r11) - VMX_HOST_RESTORE(%r10) + VMX_HOST_RESTORE + VLEAVE ret -END(vmx_enter_guest) /* - * void vmx_exit_guest(void) - * %rsp points to the struct vmxctx + * Non-error VM-exit from the guest. Make this a label so it can + * be used by C code when setting up the VMCS. + * The VMCS-restored %rsp points to the struct vmxctx */ -ENTRY(vmx_exit_guest) + ALIGN_TEXT + .globl vmx_exit_guest +vmx_exit_guest: /* * Save guest state that is not automatically saved in the vmcs. */ @@ -226,15 +230,16 @@ ENTRY(vmx_exit_guest) movl PCPU(CPUID), %r10d LK btrl %r10d, PM_ACTIVE(%r11) - VMX_HOST_RESTORE(%r10) + VMX_HOST_RESTORE /* * This will return to the caller of 'vmx_enter_guest()' with a return * value of VMX_GUEST_VMEXIT. */ movl $VMX_GUEST_VMEXIT, %eax + VLEAVE ret -END(vmx_exit_guest) +END(vmx_enter_guest) /* * %rdi = interrupt handler entry point @@ -243,6 +248,7 @@ END(vmx_exit_guest) * instruction in Intel SDM, Vol 2. */ ENTRY(vmx_call_isr) + VENTER mov %rsp, %r11 /* save %rsp */ and $~0xf, %rsp /* align on 16-byte boundary */ pushq $KERNEL_SS /* %ss */ @@ -251,5 +257,6 @@ ENTRY(vmx_call_isr) pushq $KERNEL_CS /* %cs */ cli /* disable interrupts */ callq *%rdi /* push %rip and call isr */ + VLEAVE ret END(vmx_call_isr) diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index 2e0a575c8..d93641c19 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -70,7 +70,12 @@ __FBSDID("$FreeBSD$"); #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) -#define VLAPIC_BUS_FREQ tsc_freq +/* + * APIC timer frequency: + * - arbitrary but chosen to be in the ballpark of contemporary hardware. + * - power-of-two to avoid loss of precision when converted to a bintime. + */ +#define VLAPIC_BUS_FREQ (128 * 1024 * 1024) static __inline uint32_t vlapic_get_id(struct vlapic *vlapic) diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c index ed17e40c3..eae45cc5c 100644 --- a/sys/amd64/vmm/vmm_ioport.c +++ b/sys/amd64/vmm/vmm_ioport.c @@ -69,18 +69,19 @@ emulate_ioport(struct vm *vm, int vcpuid, struct vm_exit *vmexit) if (handler == NULL) return (-1); + switch (vmexit->u.inout.bytes) { + case 1: + mask = 0xff; + break; + case 2: + mask = 0xffff; + break; + default: + mask = 0xffffffff; + break; + } + if (!vmexit->u.inout.in) { - switch (vmexit->u.inout.bytes) { - case 1: - mask = 0xff; - break; - case 2: - mask = 0xffff; - break; - default: - mask = 0xffffffff; - break; - } val = vmexit->u.inout.eax & mask; } @@ -88,17 +89,6 @@ emulate_ioport(struct vm *vm, int vcpuid, struct vm_exit *vmexit) vmexit->u.inout.port, vmexit->u.inout.bytes, &val); if (!error && vmexit->u.inout.in) { - switch (vmexit->u.inout.bytes) { - case 1: - mask = 0xff; - break; - case 2: - mask = 0xffff; - break; - default: - mask = 0xffffffff; - break; - } vmexit->u.inout.eax &= ~mask; vmexit->u.inout.eax |= val & mask; } diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c index 640c779c3..fa9832e09 100644 --- a/sys/amd64/vmm/vmm_lapic.c +++ b/sys/amd64/vmm/vmm_lapic.c @@ -230,10 +230,12 @@ lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size, off = gpa - DEFAULT_APIC_BASE; /* - * Memory mapped local apic accesses must be 4 bytes wide and - * aligned on a 16-byte boundary. + * Memory mapped local apic accesses should be aligned on a + * 16-byte boundary. They are also suggested to be 4 bytes + * wide, alas not all OSes follow suggestions. */ - if (size != 4 || off & 0xf) + off &= ~3; + if (off & 0xf) return (EINVAL); vlapic = vm_lapic(vm, cpu); diff --git a/sys/boot/userboot/userboot/conf.c b/sys/boot/userboot/userboot/conf.c index 5eac87da9..6727a223e 100644 --- a/sys/boot/userboot/userboot/conf.c +++ b/sys/boot/userboot/userboot/conf.c @@ -97,8 +97,10 @@ struct file_format *file_formats[] = { * data structures from bootstrap.h as well. */ extern struct console userboot_console; +extern struct console userboot_comconsole; struct console *consoles[] = { &userboot_console, + &userboot_comconsole, NULL }; diff --git a/sys/boot/userboot/userboot/userboot_cons.c b/sys/boot/userboot/userboot/userboot_cons.c index 5ecb7c8b2..6f73ad563 100644 --- a/sys/boot/userboot/userboot/userboot_cons.c +++ b/sys/boot/userboot/userboot/userboot_cons.c @@ -33,8 +33,12 @@ __FBSDID("$FreeBSD$"); int console; +static struct console *userboot_comconsp; + static void userboot_cons_probe(struct console *cp); static int userboot_cons_init(int); +static void userboot_comcons_probe(struct console *cp); +static int userboot_comcons_init(int); static void userboot_cons_putchar(int); static int userboot_cons_getchar(void); static int userboot_cons_poll(void); @@ -50,6 +54,21 @@ struct console userboot_console = { userboot_cons_poll, }; +/* + * Provide a simple alias to allow loader scripts to set the + * console to comconsole without resulting in an error + */ +struct console userboot_comconsole = { + "comconsole", + "comconsole", + 0, + userboot_comcons_probe, + userboot_comcons_init, + userboot_cons_putchar, + userboot_cons_getchar, + userboot_cons_poll, +}; + static void userboot_cons_probe(struct console *cp) { @@ -64,6 +83,31 @@ userboot_cons_init(int arg) return (0); } +static void +userboot_comcons_probe(struct console *cp) +{ + + /* + * Save the console pointer so the comcons_init routine + * can set the C_PRESENT* flags. They are not set + * here to allow the existing userboot console to + * be elected the default. + */ + userboot_comconsp = cp; +} + +static int +userboot_comcons_init(int arg) +{ + + /* + * Set the C_PRESENT* flags to allow the comconsole + * to be selected as the active console + */ + userboot_comconsp->c_flags |= (C_PRESENTIN | C_PRESENTOUT); + return (0); +} + static void userboot_cons_putchar(int c) { diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 index 2d2d3965c..aad1aef70 100644 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -32,7 +32,7 @@ .Nd "run a guest operating system inside a virtual machine" .Sh SYNOPSIS .Nm -.Op Fl aehwxAHPW +.Op Fl aehwxACHPW .Op Fl c Ar numcpus .Op Fl g Ar gdbport .Op Fl p Ar vcpu:hostcpu @@ -69,6 +69,8 @@ guests. .It Fl c Ar numcpus Number of guest virtual CPUs. The default is 1 and the maximum is 16. +.It Fl C +Include guest memory in core file. .It Fl H Yield the virtual CPU thread when a HLT instruction is detected. If this option is not specified, virtual CPUs will use 100% of a host CPU. diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index e662ca39d..b2354c9e2 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -68,7 +68,6 @@ __FBSDID("$FreeBSD$"); #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ -#define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */ #define VMEXIT_CONTINUE 1 /* continue from next instruction */ #define VMEXIT_RESTART 2 /* restart current instruction */ #define VMEXIT_ABORT 3 /* abort the vm run loop */ @@ -135,6 +134,7 @@ usage(int code) " -A: create an ACPI table\n" " -g: gdb port\n" " -c: # cpus (default 1)\n" + " -C: include guest memory in core file\n" " -p: pin 'vcpu' to 'hostcpu'\n" " -H: vmexit from the guest on hlt\n" " -P: vmexit from the guest on pause\n" @@ -271,12 +271,6 @@ fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) return (CPU_EMPTY(&cpumask)); } -static int -vmexit_catch_inout(void) -{ - return (VMEXIT_ABORT); -} - static int vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, uint32_t eax) @@ -330,7 +324,7 @@ vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out", bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); - return (vmexit_catch_inout()); + return (VMEXIT_ABORT); } } @@ -575,6 +569,8 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) assert(error == 0 || errno == EALREADY); rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; break; + case VMEXIT_ABORT: + abort(); default: exit(1); } @@ -647,19 +643,20 @@ int main(int argc, char *argv[]) { int c, error, gdb_port, err, bvmcons; - int max_vcpus, mptgen; + int dump_guest_memory, max_vcpus, mptgen; struct vmctx *ctx; uint64_t rip; size_t memsize; bvmcons = 0; + dump_guest_memory = 0; progname = basename(argv[0]); gdb_port = 0; guest_ncpus = 1; memsize = 256 * MB; mptgen = 1; - while ((c = getopt(argc, argv, "abehwxAHIPWYp:g:c:s:m:l:U:")) != -1) { + while ((c = getopt(argc, argv, "abehwxACHIPWYp:g:c:s:m:l:U:")) != -1) { switch (c) { case 'a': x2apic_mode = 0; @@ -679,6 +676,9 @@ main(int argc, char *argv[]) case 'c': guest_ncpus = atoi(optarg); break; + case 'C': + dump_guest_memory = 1; + break; case 'g': gdb_port = atoi(optarg); break; @@ -760,6 +760,8 @@ main(int argc, char *argv[]) fbsdrun_set_capabilities(ctx, BSP); + if (dump_guest_memory) + vm_set_memflags(ctx, VM_MEM_F_INCORE); err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); if (err) { fprintf(stderr, "Unable to setup memory (%d)\n", err); diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c index ffa343c0d..b29bc7856 100644 --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -270,13 +270,12 @@ blockif_open(const char *optstr, const char *ident) assert(sectsz != 0); } - bc = malloc(sizeof(struct blockif_ctxt)); + bc = calloc(1, sizeof(struct blockif_ctxt)); if (bc == NULL) { close(fd); return (NULL); } - memset(bc, 0, sizeof(*bc)); bc->bc_magic = BLOCKIF_SIG; bc->bc_fd = fd; bc->bc_size = size; diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c index 9fec70a33..5fbe99ba0 100644 --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -107,18 +107,19 @@ emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, if (strict && handler == default_inout) return (-1); + switch (bytes) { + case 1: + mask = 0xff; + break; + case 2: + mask = 0xffff; + break; + default: + mask = 0xffffffff; + break; + } + if (!in) { - switch (bytes) { - case 1: - mask = 0xff; - break; - case 2: - mask = 0xffff; - break; - default: - mask = 0xffffffff; - break; - } val = *eax & mask; } @@ -131,17 +132,6 @@ emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, error = -1; if (!error && in) { - switch (bytes) { - case 1: - mask = 0xff; - break; - case 2: - mask = 0xffff; - break; - default: - mask = 0xffffffff; - break; - } *eax &= ~mask; *eax |= val & mask; } diff --git a/usr.sbin/bhyve/mevent.c b/usr.sbin/bhyve/mevent.c index 82faab2d2..07d3baf72 100644 --- a/usr.sbin/bhyve/mevent.c +++ b/usr.sbin/bhyve/mevent.c @@ -52,9 +52,10 @@ __FBSDID("$FreeBSD$"); #define MEVENT_MAX 64 -#define MEV_ENABLE 1 -#define MEV_DISABLE 2 -#define MEV_DEL_PENDING 3 +#define MEV_ADD 1 +#define MEV_ENABLE 2 +#define MEV_DISABLE 3 +#define MEV_DEL_PENDING 4 extern char *vmname; @@ -147,10 +148,11 @@ mevent_kq_flags(struct mevent *mevp) int ret; switch (mevp->me_state) { + case MEV_ADD: + ret = EV_ADD; /* implicitly enabled */ + break; case MEV_ENABLE: - ret = EV_ADD; - if (mevp->me_type == EVF_TIMER) - ret |= EV_ENABLE; + ret = EV_ENABLE; break; case MEV_DISABLE: ret = EV_DISABLE; @@ -158,6 +160,9 @@ mevent_kq_flags(struct mevent *mevp) case MEV_DEL_PENDING: ret = EV_DELETE; break; + default: + assert(0); + break; } return (ret); @@ -268,12 +273,11 @@ mevent_add(int tfd, enum ev_type type, /* * Allocate an entry, populate it, and add it to the change list. */ - mevp = malloc(sizeof(struct mevent)); + mevp = calloc(1, sizeof(struct mevent)); if (mevp == NULL) { goto exit; } - memset(mevp, 0, sizeof(struct mevent)); if (type == EVF_TIMER) { mevp->me_msecs = tfd; mevp->me_timid = mevent_timid++; @@ -285,7 +289,7 @@ mevent_add(int tfd, enum ev_type type, LIST_INSERT_HEAD(&change_head, mevp, me_list); mevp->me_cq = 1; - mevp->me_state = MEV_ENABLE; + mevp->me_state = MEV_ADD; mevent_notify(); exit: diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c index dd9bfb908..9f6110730 100644 --- a/usr.sbin/bhyve/pci_ahci.c +++ b/usr.sbin/bhyve/pci_ahci.c @@ -1786,8 +1786,7 @@ pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi) dbg = fopen("/tmp/log", "w+"); #endif - sc = malloc(sizeof(struct pci_ahci_softc)); - memset(sc, 0, sizeof(struct pci_ahci_softc)); + sc = calloc(1, sizeof(struct pci_ahci_softc)); pi->pi_arg = sc; sc->asc_pi = pi; sc->ports = MAX_PORTS; diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 8fd19ac16..5455f40c9 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -705,8 +705,7 @@ pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, struct pci_devinst *pdi; int err; - pdi = malloc(sizeof(struct pci_devinst)); - bzero(pdi, sizeof(*pdi)); + pdi = calloc(1, sizeof(struct pci_devinst)); pdi->pi_vmctx = ctx; pdi->pi_bus = bus; @@ -798,8 +797,7 @@ pci_msix_table_init(struct pci_devinst *pi, int table_entries) assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; - pi->pi_msix.table = malloc(table_size); - bzero(pi->pi_msix.table, table_size); + pi->pi_msix.table = calloc(1, table_size); /* set mask bit of vector control register */ for (i = 0; i < table_entries; i++) @@ -1809,8 +1807,7 @@ pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) int error; struct pci_emul_dsoftc *sc; - sc = malloc(sizeof(struct pci_emul_dsoftc)); - memset(sc, 0, sizeof(struct pci_emul_dsoftc)); + sc = calloc(1, sizeof(struct pci_emul_dsoftc)); pi->pi_arg = sc; diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c index 562d53267..04d68c4c1 100644 --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -232,8 +232,7 @@ cfginitmsi(struct passthru_softc *sc) /* Allocate the emulated MSI-X table array */ table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; - pi->pi_msix.table = malloc(table_size); - bzero(pi->pi_msix.table, table_size); + pi->pi_msix.table = calloc(1, table_size); /* Mask all table entries */ for (i = 0; i < pi->pi_msix.table_count; i++) { @@ -574,8 +573,7 @@ passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) if (vm_assign_pptdev(ctx, bus, slot, func) != 0) goto done; - sc = malloc(sizeof(struct passthru_softc)); - memset(sc, 0, sizeof(struct passthru_softc)); + sc = calloc(1, sizeof(struct passthru_softc)); pi->pi_arg = sc; sc->psc_pi = pi; diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c index 2f37804fe..3474fd7fb 100644 --- a/usr.sbin/bhyve/pci_virtio_block.c +++ b/usr.sbin/bhyve/pci_virtio_block.c @@ -299,8 +299,7 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) assert(sectsz != 0); } - sc = malloc(sizeof(struct pci_vtblk_softc)); - memset(sc, 0, sizeof(struct pci_vtblk_softc)); + sc = calloc(1, sizeof(struct pci_vtblk_softc)); /* record fd of storage device/file */ sc->vbsc_fd = fd; diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index e7b54bca4..c3b8690bb 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -513,8 +513,7 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) char *vtopts; int mac_provided; - sc = malloc(sizeof(struct pci_vtnet_softc)); - memset(sc, 0, sizeof(struct pci_vtnet_softc)); + sc = calloc(1, sizeof(struct pci_vtnet_softc)); pthread_mutex_init(&sc->vsc_mtx, NULL); diff --git a/usr.sbin/bhyve/pci_virtio_rnd.c b/usr.sbin/bhyve/pci_virtio_rnd.c index 38459d216..4d531834c 100644 --- a/usr.sbin/bhyve/pci_virtio_rnd.c +++ b/usr.sbin/bhyve/pci_virtio_rnd.c @@ -155,8 +155,7 @@ pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) return (1); } - sc = malloc(sizeof(struct pci_vtrnd_softc)); - memset(sc, 0, sizeof(struct pci_vtrnd_softc)); + sc = calloc(1, sizeof(struct pci_vtrnd_softc)); vi_softc_linkup(&sc->vrsc_vs, &vtrnd_vi_consts, sc, pi, &sc->vrsc_vq); sc->vrsc_vs.vs_mtx = &sc->vrsc_mtx; diff --git a/usr.sbin/bhyve/uart_emul.c b/usr.sbin/bhyve/uart_emul.c index ae4cab366..4242e5ce9 100644 --- a/usr.sbin/bhyve/uart_emul.c +++ b/usr.sbin/bhyve/uart_emul.c @@ -594,8 +594,7 @@ uart_init(uart_intr_func_t intr_assert, uart_intr_func_t intr_deassert, { struct uart_softc *sc; - sc = malloc(sizeof(struct uart_softc)); - bzero(sc, sizeof(struct uart_softc)); + sc = calloc(1, sizeof(struct uart_softc)); sc->arg = arg; sc->intr_assert = intr_assert; diff --git a/usr.sbin/bhyve/xmsr.c b/usr.sbin/bhyve/xmsr.c index ba94125a0..63522bf02 100644 --- a/usr.sbin/bhyve/xmsr.c +++ b/usr.sbin/bhyve/xmsr.c @@ -47,6 +47,8 @@ emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val) case 0xd04: /* Sandy Bridge uncore PMC MSRs */ case 0xc24: return (0); + case 0x79: + return (0); /* IA32_BIOS_UPDT_TRIG MSR */ default: break; } -- 2.45.0