2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2008, 2013 Citrix Systems, Inc.
5 * Copyright (c) 2012 Spectra Logic Corporation
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/param.h>
32 #include <sys/kernel.h>
33 #include <sys/linker.h>
34 #include <sys/malloc.h>
37 #include <sys/systm.h>
41 #include <vm/vm_param.h>
43 #include <dev/pci/pcivar.h>
45 #include <machine/_inttypes.h>
46 #include <machine/cpufunc.h>
47 #include <machine/cpu.h>
48 #include <machine/md_var.h>
49 #include <machine/metadata.h>
50 #include <machine/smp.h>
52 #include <x86/apicreg.h>
54 #include <xen/xen-os.h>
55 #include <xen/error.h>
56 #include <xen/features.h>
57 #include <xen/gnttab.h>
58 #include <xen/hypervisor.h>
60 #include <xen/xen_intr.h>
62 #include <contrib/xen/arch-x86/cpuid.h>
63 #include <contrib/xen/hvm/params.h>
64 #include <contrib/xen/vcpu.h>
66 /*--------------------------- Forward Declarations ---------------------------*/
67 static void xen_hvm_cpu_init(void);
69 /*-------------------------------- Global Data -------------------------------*/
71 struct cpu_ops xen_hvm_cpu_ops = {
72 .cpu_init = xen_hvm_cpu_init,
73 .cpu_resume = xen_hvm_cpu_init
77 static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
80 * If non-zero, the hypervisor has been configured to use a direct
81 * IDT event callback for interrupt injection.
83 int xen_vector_callback_enabled;
86 * Signal whether the vector injected for the event channel upcall requires to
87 * be EOI'ed on the local APIC.
89 bool xen_evtchn_needs_ack;
91 /*------------------------------- Per-CPU Data -------------------------------*/
92 DPCPU_DECLARE(struct vcpu_info *, vcpu_info);
94 /*------------------------------ Sysctl tunables -----------------------------*/
95 int xen_disable_pv_disks = 0;
96 int xen_disable_pv_nics = 0;
97 TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks);
98 TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics);
100 /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
102 void xen_emergency_print(const char *str, size_t size)
104 outsb(XEN_HVM_DEBUGCONS_IOPORT, str, size);
107 uint32_t xen_cpuid_base;
110 xen_hvm_cpuid_base(void)
112 uint32_t base, regs[4];
114 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
115 do_cpuid(base, regs);
116 if (!memcmp("XenVMMXenVMM", ®s[1], 12)
117 && (regs[0] - base) >= 2)
124 hypervisor_quirks(unsigned int major, unsigned int minor)
127 if (((major < 4) || (major == 4 && minor <= 5)) &&
128 msix_disable_migration == -1) {
130 * Xen hypervisors prior to 4.6.0 do not properly
131 * handle updates to enabled MSI-X table entries,
132 * so disable MSI-X interrupt migration in that
137 "Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n"
138 "Set machdep.msix_disable_migration=0 to forcefully enable it.\n");
139 msix_disable_migration = 1;
145 hypervisor_version(void)
150 do_cpuid(xen_cpuid_base + 1, regs);
152 major = regs[0] >> 16;
153 minor = regs[0] & 0xffff;
154 printf("XEN: Hypervisor version %d.%d detected.\n", major, minor);
156 hypervisor_quirks(major, minor);
160 * Allocate and fill in the hypcall page.
163 xen_hvm_init_hypercall_stubs(enum xen_hvm_init_type init_type)
167 if (xen_cpuid_base != 0)
171 xen_cpuid_base = xen_hvm_cpuid_base();
172 if (xen_cpuid_base == 0)
176 * Find the hypercall pages.
178 do_cpuid(xen_cpuid_base + 2, regs);
182 wrmsr(regs[1], (init_type == XEN_HVM_INIT_EARLY)
183 ? (vm_paddr_t)((uintptr_t)&hypercall_page - KERNBASE)
184 : vtophys(&hypercall_page));
187 hypervisor_version();
192 * Translate linear to physical address when still running on the bootloader
193 * created page-tables.
196 early_init_vtop(void *addr)
200 * Using a KASSERT won't print anything, as this is before console
203 if (__predict_false((uintptr_t)addr < KERNBASE)) {
204 xc_printf("invalid linear address: %#lx\n", (uintptr_t)addr);
208 return ((uintptr_t)addr - KERNBASE
210 + kernphys - KERNLOAD
216 map_shared_info(void)
219 * TODO shared info page should be mapped in an unpopulated (IOW:
220 * non-RAM) address. But finding one at this point in boot is
221 * complicated, hence re-use a RAM address for the time being. This
222 * sadly causes super-page shattering in the second stage translation
226 shared_info_t shared_info;
227 uint8_t raw[PAGE_SIZE];
228 } shared_page __attribute__((aligned(PAGE_SIZE)));
229 static struct xen_add_to_physmap xatp = {
231 .space = XENMAPSPACE_shared_info,
235 _Static_assert(sizeof(shared_page) == PAGE_SIZE,
236 "invalid Xen shared_info struct size");
239 xatp.gpfn = atop(early_init_vtop(&shared_page.shared_info));
241 rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
243 xc_printf("cannot map shared info page: %d\n", rc);
244 HYPERVISOR_shared_info = NULL;
245 } else if (HYPERVISOR_shared_info == NULL)
246 HYPERVISOR_shared_info = &shared_page.shared_info;
254 struct xen_platform_op op = {
255 .cmd = XENPF_get_dom0_console,
257 xenpf_dom0_console_t *console = &op.u.dom0_console;
265 kmdp = preload_search_by_type("elf kernel");
267 kmdp = preload_search_by_type("elf64 kernel");
269 xc_printf("Unable to find kernel metadata\n");
273 size = HYPERVISOR_platform_op(&op);
275 xc_printf("Failed to get video console info: %d\n", size);
279 switch (console->video_type) {
280 case XEN_VGATYPE_VESA_LFB:
281 fb = (__typeof__ (fb))preload_search_info(kmdp,
282 MODINFO_METADATA | MODINFOMD_VBE_FB);
285 xc_printf("No VBE FB in kernel metadata\n");
289 _Static_assert(offsetof(struct vbe_fb, fb_bpp) ==
290 offsetof(struct efi_fb, fb_mask_reserved) +
291 sizeof(fb->efi.fb_mask_reserved),
292 "Bad structure overlay\n");
293 fb->vbe.fb_bpp = console->u.vesa_lfb.bits_per_pixel;
295 case XEN_VGATYPE_EFI_LFB:
297 fb = (__typeof__ (fb))preload_search_info(kmdp,
298 MODINFO_METADATA | MODINFOMD_EFI_FB);
300 xc_printf("No EFI FB in kernel metadata\n");
305 fb->efi.fb_addr = console->u.vesa_lfb.lfb_base;
307 offsetof(xenpf_dom0_console_t, u.vesa_lfb.ext_lfb_base))
309 (uint64_t)console->u.vesa_lfb.ext_lfb_base << 32;
310 fb->efi.fb_size = console->u.vesa_lfb.lfb_size << 16;
311 fb->efi.fb_height = console->u.vesa_lfb.height;
312 fb->efi.fb_width = console->u.vesa_lfb.width;
313 fb->efi.fb_stride = (console->u.vesa_lfb.bytes_per_line << 3) /
314 console->u.vesa_lfb.bits_per_pixel;
316 ((~0u << console->u.vesa_lfb.c ## _pos) & \
317 (~0u >> (32 - console->u.vesa_lfb.c ## _pos - \
318 console->u.vesa_lfb.c ## _size)))
319 fb->efi.fb_mask_red = FBMASK(red);
320 fb->efi.fb_mask_green = FBMASK(green);
321 fb->efi.fb_mask_blue = FBMASK(blue);
322 fb->efi.fb_mask_reserved = FBMASK(rsvd);
327 xc_printf("Video console type unsupported\n");
332 /* Early initialization when running as a Xen guest. */
339 xen_cpuid_base = xen_hvm_cpuid_base();
340 if (xen_cpuid_base == 0)
343 /* Find the hypercall pages. */
344 do_cpuid(xen_cpuid_base + 2, regs);
346 xc_printf("Invalid number of hypercall pages %u\n",
348 vm_guest = VM_GUEST_VM;
352 wrmsr(regs[1], early_init_vtop(&hypercall_page));
354 rc = map_shared_info();
356 vm_guest = VM_GUEST_VM;
360 if (xen_initial_domain())
361 /* Fixup video console information in case Xen changed the mode. */
366 xen_hvm_init_shared_info_page(void)
368 struct xen_add_to_physmap xatp;
370 if (xen_pv_domain()) {
372 * Already setup in the PV case, shared_info is passed inside
373 * of the start_info struct at start of day.
378 if (HYPERVISOR_shared_info == NULL) {
379 HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT);
380 if (HYPERVISOR_shared_info == NULL)
381 panic("Unable to allocate Xen shared info page");
384 xatp.domid = DOMID_SELF;
386 xatp.space = XENMAPSPACE_shared_info;
387 xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT;
388 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
389 panic("HYPERVISOR_memory_op failed");
393 set_percpu_callback(unsigned int vcpu)
395 struct xen_hvm_evtchn_upcall_vector vec;
399 vec.vector = IDT_EVTCHN;
400 error = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &vec);
402 return (error != 0 ? xen_translate_error(error) : 0);
406 * Tell the hypervisor how to contact us for event channel callbacks.
409 xen_hvm_set_callback(device_t dev)
411 struct xen_hvm_param xhp;
414 if (xen_vector_callback_enabled)
417 xhp.domid = DOMID_SELF;
418 xhp.index = HVM_PARAM_CALLBACK_IRQ;
419 if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
422 error = set_percpu_callback(0);
424 xen_evtchn_needs_ack = true;
425 /* Trick toolstack to think we are enlightened */
428 xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN);
429 error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp);
431 xen_vector_callback_enabled = 1;
433 } else if (xen_evtchn_needs_ack)
434 panic("Unable to setup fake HVM param: %d", error);
436 printf("Xen HVM callback vector registration failed (%d). "
437 "Falling back to emulated device interrupt\n", error);
439 xen_vector_callback_enabled = 0;
442 * Called from early boot or resume.
443 * xenpci will invoke us again later.
448 irq = pci_get_irq(dev);
450 xhp.value = HVM_CALLBACK_GSI(irq);
455 slot = pci_get_slot(dev);
456 pin = pci_get_intpin(dev) - 1;
457 xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin);
460 if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0)
461 panic("Can't set evtchn callback");
464 #define XEN_MAGIC_IOPORT 0x10
467 XMI_UNPLUG_IDE_DISKS = 0x01,
468 XMI_UNPLUG_NICS = 0x02,
469 XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04
473 xen_hvm_disable_emulated_devices(void)
475 u_short disable_devs = 0;
477 if (xen_pv_domain()) {
479 * No emulated devices in the PV case, so no need to unplug
482 if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0)
483 printf("PV devices cannot be disabled in PV guests\n");
487 if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC)
490 if (xen_disable_pv_disks == 0) {
492 printf("XEN: disabling emulated disks\n");
493 disable_devs |= XMI_UNPLUG_IDE_DISKS;
495 if (xen_disable_pv_nics == 0) {
497 printf("XEN: disabling emulated nics\n");
498 disable_devs |= XMI_UNPLUG_NICS;
501 if (disable_devs != 0)
502 outw(XEN_MAGIC_IOPORT, disable_devs);
506 xen_hvm_init(enum xen_hvm_init_type init_type)
512 init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
515 error = xen_hvm_init_hypercall_stubs(init_type);
518 case XEN_HVM_INIT_LATE:
522 setup_xen_features();
524 cpu_ops = xen_hvm_cpu_ops;
527 case XEN_HVM_INIT_RESUME:
529 panic("Unable to init Xen hypercall stubs on resume");
531 /* Clear stale vcpu_info. */
533 DPCPU_ID_SET(i, vcpu_info, NULL);
536 panic("Unsupported HVM initialization type");
539 xen_vector_callback_enabled = 0;
540 xen_evtchn_needs_ack = false;
541 xen_hvm_set_callback(NULL);
544 * On (PV)HVM domains we need to request the hypervisor to
545 * fill the shared info page, for PVH guest the shared_info page
546 * is passed inside the start_info struct and is already set, so this
547 * functions are no-ops.
549 xen_hvm_init_shared_info_page();
550 xen_hvm_disable_emulated_devices();
554 xen_hvm_suspend(void)
559 xen_hvm_resume(bool suspend_cancelled)
562 xen_hvm_init(suspend_cancelled ?
563 XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
565 /* Register vcpu_info area for CPU#0. */
570 xen_hvm_sysinit(void *arg __unused)
572 xen_hvm_init(XEN_HVM_INIT_LATE);
574 SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
577 xen_hvm_cpu_init(void)
585 if (DPCPU_GET(vcpu_info) != NULL) {
587 * vcpu_info is already set. We're resuming
588 * from a failed migration and our pre-suspend
589 * configuration is still valid.
595 * Set vCPU ID. If available fetch the ID from CPUID, if not just use
598 KASSERT(xen_cpuid_base != 0, ("Invalid base Xen CPUID leaf"));
599 cpuid_count(xen_cpuid_base + 4, 0, regs);
600 KASSERT((regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ||
602 ("Xen PV domain without vcpu_id in cpuid"));
603 PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ?
604 regs[1] : PCPU_GET(acpi_id));
606 if (xen_evtchn_needs_ack && !IS_BSP()) {
608 * Setup the per-vpcu event channel upcall vector. This is only
609 * required when using the new HVMOP_set_evtchn_upcall_vector
610 * hypercall, which allows using a different vector for each
611 * vCPU. Note that FreeBSD uses the same vector for all vCPUs
612 * because it's not dynamically allocated.
614 rc = set_percpu_callback(PCPU_GET(vcpu_id));
616 panic("Event channel upcall vector setup failed: %d",
620 xen_setup_vcpu_info();
622 SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL);
625 xen_has_iommu_maps(void)
629 KASSERT(xen_cpuid_base != 0, ("Invalid base Xen CPUID leaf"));
630 cpuid_count(xen_cpuid_base + 4, 0, regs);
632 return (regs[0] & XEN_HVM_CPUID_IOMMU_MAPPINGS);