]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/x86/xen/hvm.c
x86/xen: do video console fixup as part of early initialization
[FreeBSD/FreeBSD.git] / sys / x86 / xen / hvm.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2008, 2013 Citrix Systems, Inc.
5  * Copyright (c) 2012 Spectra Logic Corporation
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/param.h>
31 #include <sys/bus.h>
32 #include <sys/kernel.h>
33 #include <sys/linker.h>
34 #include <sys/malloc.h>
35 #include <sys/proc.h>
36 #include <sys/smp.h>
37 #include <sys/systm.h>
38
39 #include <vm/vm.h>
40 #include <vm/pmap.h>
41 #include <vm/vm_param.h>
42
43 #include <dev/pci/pcivar.h>
44
45 #include <machine/_inttypes.h>
46 #include <machine/cpufunc.h>
47 #include <machine/cpu.h>
48 #include <machine/md_var.h>
49 #include <machine/metadata.h>
50 #include <machine/smp.h>
51
52 #include <x86/apicreg.h>
53
54 #include <xen/xen-os.h>
55 #include <xen/error.h>
56 #include <xen/features.h>
57 #include <xen/gnttab.h>
58 #include <xen/hypervisor.h>
59 #include <xen/hvm.h>
60 #include <xen/xen_intr.h>
61
62 #include <contrib/xen/arch-x86/cpuid.h>
63 #include <contrib/xen/hvm/params.h>
64 #include <contrib/xen/vcpu.h>
65
66 /*--------------------------- Forward Declarations ---------------------------*/
67 static void xen_hvm_cpu_init(void);
68
69 /*-------------------------------- Global Data -------------------------------*/
70 #ifdef SMP
71 struct cpu_ops xen_hvm_cpu_ops = {
72         .cpu_init       = xen_hvm_cpu_init,
73         .cpu_resume     = xen_hvm_cpu_init
74 };
75 #endif
76
77 static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
78
79 /**
80  * If non-zero, the hypervisor has been configured to use a direct
81  * IDT event callback for interrupt injection.
82  */
83 int xen_vector_callback_enabled;
84
85 /**
86  * Signal whether the vector injected for the event channel upcall requires to
87  * be EOI'ed on the local APIC.
88  */
89 bool xen_evtchn_needs_ack;
90
91 /*------------------------------- Per-CPU Data -------------------------------*/
92 DPCPU_DECLARE(struct vcpu_info *, vcpu_info);
93
94 /*------------------------------ Sysctl tunables -----------------------------*/
95 int xen_disable_pv_disks = 0;
96 int xen_disable_pv_nics = 0;
97 TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks);
98 TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics);
99
100 /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
101
102 void xen_emergency_print(const char *str, size_t size)
103 {
104         outsb(XEN_HVM_DEBUGCONS_IOPORT, str, size);
105 }
106
107 uint32_t xen_cpuid_base;
108
109 static uint32_t
110 xen_hvm_cpuid_base(void)
111 {
112         uint32_t base, regs[4];
113
114         for (base = 0x40000000; base < 0x40010000; base += 0x100) {
115                 do_cpuid(base, regs);
116                 if (!memcmp("XenVMMXenVMM", &regs[1], 12)
117                     && (regs[0] - base) >= 2)
118                         return (base);
119         }
120         return (0);
121 }
122
123 static void
124 hypervisor_quirks(unsigned int major, unsigned int minor)
125 {
126 #ifdef SMP
127         if (((major < 4) || (major == 4 && minor <= 5)) &&
128             msix_disable_migration == -1) {
129                 /*
130                  * Xen hypervisors prior to 4.6.0 do not properly
131                  * handle updates to enabled MSI-X table entries,
132                  * so disable MSI-X interrupt migration in that
133                  * case.
134                  */
135                 if (bootverbose)
136                         printf(
137 "Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n"
138 "Set machdep.msix_disable_migration=0 to forcefully enable it.\n");
139                 msix_disable_migration = 1;
140         }
141 #endif
142 }
143
144 static void
145 hypervisor_version(void)
146 {
147         uint32_t regs[4];
148         int major, minor;
149
150         do_cpuid(xen_cpuid_base + 1, regs);
151
152         major = regs[0] >> 16;
153         minor = regs[0] & 0xffff;
154         printf("XEN: Hypervisor version %d.%d detected.\n", major, minor);
155
156         hypervisor_quirks(major, minor);
157 }
158
159 /*
160  * Allocate and fill in the hypcall page.
161  */
162 int
163 xen_hvm_init_hypercall_stubs(enum xen_hvm_init_type init_type)
164 {
165         uint32_t regs[4];
166
167         if (xen_cpuid_base != 0)
168                 /* Already setup. */
169                 goto out;
170
171         xen_cpuid_base = xen_hvm_cpuid_base();
172         if (xen_cpuid_base == 0)
173                 return (ENXIO);
174
175         /*
176          * Find the hypercall pages.
177          */
178         do_cpuid(xen_cpuid_base + 2, regs);
179         if (regs[0] != 1)
180                 return (EINVAL);
181
182         wrmsr(regs[1], (init_type == XEN_HVM_INIT_EARLY)
183             ? (vm_paddr_t)((uintptr_t)&hypercall_page - KERNBASE)
184             : vtophys(&hypercall_page));
185
186 out:
187         hypervisor_version();
188         return (0);
189 }
190
191 /*
192  * Translate linear to physical address when still running on the bootloader
193  * created page-tables.
194  */
195 static vm_paddr_t
196 early_init_vtop(void *addr)
197 {
198
199         /*
200          * Using a KASSERT won't print anything, as this is before console
201          * initialization.
202          */
203         if (__predict_false((uintptr_t)addr < KERNBASE)) {
204                 xc_printf("invalid linear address: %#lx\n", (uintptr_t)addr);
205                 halt();
206         }
207
208         return ((uintptr_t)addr - KERNBASE
209 #ifdef __amd64__
210             + kernphys - KERNLOAD
211 #endif
212             );
213 }
214
215 static int
216 map_shared_info(void)
217 {
218         /*
219          * TODO shared info page should be mapped in an unpopulated (IOW:
220          * non-RAM) address.  But finding one at this point in boot is
221          * complicated, hence re-use a RAM address for the time being.  This
222          * sadly causes super-page shattering in the second stage translation
223          * page tables.
224          */
225         static union {
226                 shared_info_t shared_info;
227                 uint8_t raw[PAGE_SIZE];
228         } shared_page __attribute__((aligned(PAGE_SIZE)));
229         static struct xen_add_to_physmap xatp = {
230             .domid = DOMID_SELF,
231             .space = XENMAPSPACE_shared_info,
232         };
233         int rc;
234
235         _Static_assert(sizeof(shared_page) == PAGE_SIZE,
236             "invalid Xen shared_info struct size");
237
238         if (xatp.gpfn == 0)
239                 xatp.gpfn = atop(early_init_vtop(&shared_page.shared_info));
240
241         rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
242         if (rc != 0) {
243                 xc_printf("cannot map shared info page: %d\n", rc);
244                 HYPERVISOR_shared_info = NULL;
245         } else if (HYPERVISOR_shared_info == NULL)
246                 HYPERVISOR_shared_info = &shared_page.shared_info;
247
248         return (rc);
249 }
250
251 static void
252 fixup_console(void)
253 {
254         struct xen_platform_op op = {
255                 .cmd = XENPF_get_dom0_console,
256         };
257         xenpf_dom0_console_t *console = &op.u.dom0_console;
258         union {
259                 struct efi_fb efi;
260                 struct vbe_fb vbe;
261         } *fb = NULL;
262         int size;
263         caddr_t kmdp;
264
265         kmdp = preload_search_by_type("elf kernel");
266         if (kmdp == NULL)
267                 kmdp = preload_search_by_type("elf64 kernel");
268         if (kmdp == NULL) {
269                 xc_printf("Unable to find kernel metadata\n");
270                 return;
271         }
272
273         size = HYPERVISOR_platform_op(&op);
274         if (size < 0) {
275                 xc_printf("Failed to get video console info: %d\n", size);
276                 return;
277         }
278
279         switch (console->video_type) {
280         case XEN_VGATYPE_VESA_LFB:
281                 fb = (__typeof__ (fb))preload_search_info(kmdp,
282                     MODINFO_METADATA | MODINFOMD_VBE_FB);
283
284                 if (fb == NULL) {
285                         xc_printf("No VBE FB in kernel metadata\n");
286                         return;
287                 }
288
289                 _Static_assert(offsetof(struct vbe_fb, fb_bpp) ==
290                     offsetof(struct efi_fb, fb_mask_reserved) +
291                     sizeof(fb->efi.fb_mask_reserved),
292                     "Bad structure overlay\n");
293                 fb->vbe.fb_bpp = console->u.vesa_lfb.bits_per_pixel;
294                 /* FALLTHROUGH */
295         case XEN_VGATYPE_EFI_LFB:
296                 if (fb == NULL) {
297                         fb = (__typeof__ (fb))preload_search_info(kmdp,
298                             MODINFO_METADATA | MODINFOMD_EFI_FB);
299                         if (fb == NULL) {
300                                 xc_printf("No EFI FB in kernel metadata\n");
301                                 return;
302                         }
303                 }
304
305                 fb->efi.fb_addr = console->u.vesa_lfb.lfb_base;
306                 if (size >
307                     offsetof(xenpf_dom0_console_t, u.vesa_lfb.ext_lfb_base))
308                         fb->efi.fb_addr |=
309                             (uint64_t)console->u.vesa_lfb.ext_lfb_base << 32;
310                 fb->efi.fb_size = console->u.vesa_lfb.lfb_size << 16;
311                 fb->efi.fb_height = console->u.vesa_lfb.height;
312                 fb->efi.fb_width = console->u.vesa_lfb.width;
313                 fb->efi.fb_stride = (console->u.vesa_lfb.bytes_per_line << 3) /
314                     console->u.vesa_lfb.bits_per_pixel;
315 #define FBMASK(c) \
316     ((~0u << console->u.vesa_lfb.c ## _pos) & \
317     (~0u >> (32 - console->u.vesa_lfb.c ## _pos - \
318     console->u.vesa_lfb.c ## _size)))
319                 fb->efi.fb_mask_red = FBMASK(red);
320                 fb->efi.fb_mask_green = FBMASK(green);
321                 fb->efi.fb_mask_blue = FBMASK(blue);
322                 fb->efi.fb_mask_reserved = FBMASK(rsvd);
323 #undef FBMASK
324                 break;
325
326         default:
327                 xc_printf("Video console type unsupported\n");
328                 return;
329         }
330 }
331
332 /* Early initialization when running as a Xen guest. */
333 void
334 xen_early_init(void)
335 {
336         uint32_t regs[4];
337         int rc;
338
339         xen_cpuid_base = xen_hvm_cpuid_base();
340         if (xen_cpuid_base == 0)
341                 return;
342
343         /* Find the hypercall pages. */
344         do_cpuid(xen_cpuid_base + 2, regs);
345         if (regs[0] != 1) {
346                 xc_printf("Invalid number of hypercall pages %u\n",
347                     regs[0]);
348                 vm_guest = VM_GUEST_VM;
349                 return;
350         }
351
352         wrmsr(regs[1], early_init_vtop(&hypercall_page));
353
354         rc = map_shared_info();
355         if (rc != 0) {
356                 vm_guest = VM_GUEST_VM;
357                 return;
358         }
359
360         if (xen_initial_domain())
361             /* Fixup video console information in case Xen changed the mode. */
362             fixup_console();
363 }
364
365 static void
366 xen_hvm_init_shared_info_page(void)
367 {
368         struct xen_add_to_physmap xatp;
369
370         if (xen_pv_domain()) {
371                 /*
372                  * Already setup in the PV case, shared_info is passed inside
373                  * of the start_info struct at start of day.
374                  */
375                 return;
376         }
377
378         if (HYPERVISOR_shared_info == NULL) {
379                 HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT);
380                 if (HYPERVISOR_shared_info == NULL)
381                         panic("Unable to allocate Xen shared info page");
382         }
383
384         xatp.domid = DOMID_SELF;
385         xatp.idx = 0;
386         xatp.space = XENMAPSPACE_shared_info;
387         xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT;
388         if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
389                 panic("HYPERVISOR_memory_op failed");
390 }
391
392 static int
393 set_percpu_callback(unsigned int vcpu)
394 {
395         struct xen_hvm_evtchn_upcall_vector vec;
396         int error;
397
398         vec.vcpu = vcpu;
399         vec.vector = IDT_EVTCHN;
400         error = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &vec);
401
402         return (error != 0 ? xen_translate_error(error) : 0);
403 }
404
405 /*
406  * Tell the hypervisor how to contact us for event channel callbacks.
407  */
408 void
409 xen_hvm_set_callback(device_t dev)
410 {
411         struct xen_hvm_param xhp;
412         int irq;
413
414         if (xen_vector_callback_enabled)
415                 return;
416
417         xhp.domid = DOMID_SELF;
418         xhp.index = HVM_PARAM_CALLBACK_IRQ;
419         if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
420                 int error;
421
422                 error = set_percpu_callback(0);
423                 if (error == 0) {
424                         xen_evtchn_needs_ack = true;
425                         /* Trick toolstack to think we are enlightened */
426                         xhp.value = 1;
427                 } else
428                         xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN);
429                 error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp);
430                 if (error == 0) {
431                         xen_vector_callback_enabled = 1;
432                         return;
433                 } else if (xen_evtchn_needs_ack)
434                         panic("Unable to setup fake HVM param: %d", error);
435
436                 printf("Xen HVM callback vector registration failed (%d). "
437                     "Falling back to emulated device interrupt\n", error);
438         }
439         xen_vector_callback_enabled = 0;
440         if (dev == NULL) {
441                 /*
442                  * Called from early boot or resume.
443                  * xenpci will invoke us again later.
444                  */
445                 return;
446         }
447
448         irq = pci_get_irq(dev);
449         if (irq < 16) {
450                 xhp.value = HVM_CALLBACK_GSI(irq);
451         } else {
452                 u_int slot;
453                 u_int pin;
454
455                 slot = pci_get_slot(dev);
456                 pin = pci_get_intpin(dev) - 1;
457                 xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin);
458         }
459
460         if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0)
461                 panic("Can't set evtchn callback");
462 }
463
464 #define XEN_MAGIC_IOPORT 0x10
465 enum {
466         XMI_MAGIC                        = 0x49d2,
467         XMI_UNPLUG_IDE_DISKS             = 0x01,
468         XMI_UNPLUG_NICS                  = 0x02,
469         XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04
470 };
471
472 static void
473 xen_hvm_disable_emulated_devices(void)
474 {
475         u_short disable_devs = 0;
476
477         if (xen_pv_domain()) {
478                 /*
479                  * No emulated devices in the PV case, so no need to unplug
480                  * anything.
481                  */
482                 if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0)
483                         printf("PV devices cannot be disabled in PV guests\n");
484                 return;
485         }
486
487         if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC)
488                 return;
489
490         if (xen_disable_pv_disks == 0) {
491                 if (bootverbose)
492                         printf("XEN: disabling emulated disks\n");
493                 disable_devs |= XMI_UNPLUG_IDE_DISKS;
494         }
495         if (xen_disable_pv_nics == 0) {
496                 if (bootverbose)
497                         printf("XEN: disabling emulated nics\n");
498                 disable_devs |= XMI_UNPLUG_NICS;
499         }
500
501         if (disable_devs != 0)
502                 outw(XEN_MAGIC_IOPORT, disable_devs);
503 }
504
505 static void
506 xen_hvm_init(enum xen_hvm_init_type init_type)
507 {
508         int error;
509         int i;
510
511         if (!xen_domain() ||
512             init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
513                 return;
514
515         error = xen_hvm_init_hypercall_stubs(init_type);
516
517         switch (init_type) {
518         case XEN_HVM_INIT_LATE:
519                 if (error != 0)
520                         return;
521
522                 setup_xen_features();
523 #ifdef SMP
524                 cpu_ops = xen_hvm_cpu_ops;
525 #endif
526                 break;
527         case XEN_HVM_INIT_RESUME:
528                 if (error != 0)
529                         panic("Unable to init Xen hypercall stubs on resume");
530
531                 /* Clear stale vcpu_info. */
532                 CPU_FOREACH(i)
533                         DPCPU_ID_SET(i, vcpu_info, NULL);
534                 break;
535         default:
536                 panic("Unsupported HVM initialization type");
537         }
538
539         xen_vector_callback_enabled = 0;
540         xen_evtchn_needs_ack = false;
541         xen_hvm_set_callback(NULL);
542
543         /*
544          * On (PV)HVM domains we need to request the hypervisor to
545          * fill the shared info page, for PVH guest the shared_info page
546          * is passed inside the start_info struct and is already set, so this
547          * functions are no-ops.
548          */
549         xen_hvm_init_shared_info_page();
550         xen_hvm_disable_emulated_devices();
551
552
553 void
554 xen_hvm_suspend(void)
555 {
556 }
557
558 void
559 xen_hvm_resume(bool suspend_cancelled)
560 {
561
562         xen_hvm_init(suspend_cancelled ?
563             XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
564
565         /* Register vcpu_info area for CPU#0. */
566         xen_hvm_cpu_init();
567 }
568
569 static void
570 xen_hvm_sysinit(void *arg __unused)
571 {
572         xen_hvm_init(XEN_HVM_INIT_LATE);
573 }
574 SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
575
576 static void
577 xen_hvm_cpu_init(void)
578 {
579         uint32_t regs[4];
580         int rc;
581
582         if (!xen_domain())
583                 return;
584
585         if (DPCPU_GET(vcpu_info) != NULL) {
586                 /*
587                  * vcpu_info is already set.  We're resuming
588                  * from a failed migration and our pre-suspend
589                  * configuration is still valid.
590                  */
591                 return;
592         }
593
594         /*
595          * Set vCPU ID. If available fetch the ID from CPUID, if not just use
596          * the ACPI ID.
597          */
598         KASSERT(xen_cpuid_base != 0, ("Invalid base Xen CPUID leaf"));
599         cpuid_count(xen_cpuid_base + 4, 0, regs);
600         KASSERT((regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ||
601             !xen_pv_domain(),
602             ("Xen PV domain without vcpu_id in cpuid"));
603         PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ?
604             regs[1] : PCPU_GET(acpi_id));
605
606         if (xen_evtchn_needs_ack && !IS_BSP()) {
607                 /*
608                  * Setup the per-vpcu event channel upcall vector. This is only
609                  * required when using the new HVMOP_set_evtchn_upcall_vector
610                  * hypercall, which allows using a different vector for each
611                  * vCPU. Note that FreeBSD uses the same vector for all vCPUs
612                  * because it's not dynamically allocated.
613                  */
614                 rc = set_percpu_callback(PCPU_GET(vcpu_id));
615                 if (rc != 0)
616                         panic("Event channel upcall vector setup failed: %d",
617                             rc);
618         }
619
620         xen_setup_vcpu_info();
621 }
622 SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL);
623
624 bool
625 xen_has_iommu_maps(void)
626 {
627         uint32_t regs[4];
628
629         KASSERT(xen_cpuid_base != 0, ("Invalid base Xen CPUID leaf"));
630         cpuid_count(xen_cpuid_base + 4, 0, regs);
631
632         return (regs[0] & XEN_HVM_CPUID_IOMMU_MAPPINGS);
633 }