3 * Copyright (c) 2004 Christian Limpach.
4 * Copyright (c) 2004-2006,2008 Kip Macy
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by Christian Limpach.
18 * 4. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
36 #include <sys/param.h>
37 #include <sys/systm.h>
41 #include <sys/mount.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/kernel.h>
46 #include <sys/reboot.h>
47 #include <sys/rwlock.h>
48 #include <sys/sysproto.h>
50 #include <machine/xen/xen-os.h>
54 #include <machine/segments.h>
55 #include <machine/pcb.h>
56 #include <machine/stdarg.h>
57 #include <machine/vmparam.h>
58 #include <machine/cpu.h>
59 #include <machine/intr_machdep.h>
60 #include <machine/md_var.h>
61 #include <machine/asmacros.h>
65 #include <xen/hypervisor.h>
66 #include <machine/xen/xenvar.h>
67 #include <machine/xen/xenfunc.h>
68 #include <machine/xen/xenpmap.h>
69 #include <machine/xen/xenfunc.h>
70 #include <xen/interface/memory.h>
71 #include <machine/xen/features.h>
73 #include <machine/privatespace.h>
77 #include <vm/vm_page.h>
80 #define IDTVEC(name) __CONCAT(X,name)
83 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
84 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
85 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
86 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
87 IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
91 start_info_t *xen_start_info;
92 shared_info_t *HYPERVISOR_shared_info;
93 xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
94 xen_pfn_t *xen_phys_machine;
95 xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
96 xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
97 int preemptable, init_first;
98 extern unsigned int avail_space;
107 CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
108 __asm__("pushl %edx;"
121 __asm__("pushl %edx;"
133 * Modify the cmd_line by converting ',' to NULLs so that it is in a format
134 * suitable for the static env vars.
137 xen_setbootenv(char *cmd_line)
141 /* Skip leading spaces */
142 for (; *cmd_line == ' '; cmd_line++);
144 printk("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
146 for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
155 {"boot_askname", RB_ASKNAME},
156 {"boot_single", RB_SINGLE},
157 {"boot_nosync", RB_NOSYNC},
158 {"boot_halt", RB_ASKNAME},
159 {"boot_serial", RB_SERIAL},
160 {"boot_cdrom", RB_CDROM},
161 {"boot_gdb", RB_GDB},
162 {"boot_gdb_pause", RB_RESERVED1},
163 {"boot_verbose", RB_VERBOSE},
164 {"boot_multicons", RB_MULTIPLE},
169 xen_boothowto(char *envp)
173 /* get equivalents from the environment */
174 for (i = 0; howto_names[i].ev != NULL; i++)
175 if (getenv(howto_names[i].ev) != NULL)
176 howto |= howto_names[i].mask;
180 #define PRINTK_BUFSIZE 1024
182 printk(const char *fmt, ...)
186 static char buf[PRINTK_BUFSIZE];
189 retval = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
192 (void)HYPERVISOR_console_write(buf, retval);
196 #define XPQUEUE_SIZE 128
204 /* per-cpu queues and indices */
206 static struct mmu_log xpq_queue_log[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
209 static int xpq_idx[XEN_LEGACY_MAX_VCPUS];
210 static mmu_update_t xpq_queue[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
212 #define XPQ_QUEUE_LOG xpq_queue_log[vcpu]
213 #define XPQ_QUEUE xpq_queue[vcpu]
214 #define XPQ_IDX xpq_idx[vcpu]
215 #define SET_VCPU() int vcpu = smp_processor_id()
218 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
220 static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
222 static int xpq_idx = 0;
224 #define XPQ_QUEUE_LOG xpq_queue_log
225 #define XPQ_QUEUE xpq_queue
226 #define XPQ_IDX xpq_idx
230 #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
236 int _xpq_idx = XPQ_IDX;
242 printk("xen_dump_queue(): %u entries\n", _xpq_idx);
243 for (i = 0; i < _xpq_idx; i++) {
244 printk(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
251 _xen_flush_queue(void)
254 int _xpq_idx = XPQ_IDX;
258 if (__predict_true(gdtset))
259 CRITICAL_ASSERT(curthread);
263 /* Make sure index is cleared first to avoid double updates. */
264 error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
265 _xpq_idx, NULL, DOMID_SELF);
268 if (__predict_true(gdtset))
269 for (i = _xpq_idx; i > 0;) {
271 CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
272 "ptr: %lx val: %lx ptr: %lx",
273 (XPQ_QUEUE[i-1].val & 0xffffffff),
274 (XPQ_QUEUE[i-1].ptr & 0xffffffff),
275 (XPQ_QUEUE[i-2].val & 0xffffffff),
276 (XPQ_QUEUE[i-2].ptr & 0xffffffff),
277 (XPQ_QUEUE[i-3].val & 0xffffffff),
278 (XPQ_QUEUE[i-3].ptr & 0xffffffff));
281 CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
282 (XPQ_QUEUE[i-1].val & 0xffffffff),
283 (XPQ_QUEUE[i-1].ptr & 0xffffffff),
284 (XPQ_QUEUE[i-2].val & 0xffffffff),
285 (XPQ_QUEUE[i-2].ptr & 0xffffffff));
288 CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx",
289 (XPQ_QUEUE[i-1].val & 0xffffffff),
290 (XPQ_QUEUE[i-1].ptr & 0xffffffff));
295 if (__predict_false(error < 0)) {
296 for (i = 0; i < _xpq_idx; i++)
297 printf("val: %llx ptr: %llx\n",
298 XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
299 panic("Failed to execute MMU updates: %d", error);
305 xen_flush_queue(void)
309 if (__predict_true(gdtset))
311 if (XPQ_IDX != 0) _xen_flush_queue();
312 if (__predict_true(gdtset))
317 xen_increment_idx(void)
322 if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
327 xen_check_queue(void)
332 KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
337 xen_invlpg(vm_offset_t va)
340 op.cmd = MMUEXT_INVLPG_ALL;
341 op.arg1.linear_addr = va & ~PAGE_MASK;
342 PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
346 xen_load_cr3(u_int val)
352 KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
354 op.cmd = MMUEXT_NEW_BASEPTR;
355 op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
356 PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
360 static __inline u_int
365 __asm __volatile("movl 4(%%ebp),%0" : "=r" (data));
376 eflags = _read_eflags();
377 _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];
378 if (_vcpu->evtchn_upcall_mask)
385 write_eflags(u_int eflags)
389 CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
390 intr = ((eflags & PSL_I) == 0);
391 __restore_flags(intr);
392 _write_eflags(eflags);
398 CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
405 CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
413 return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
417 _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
421 if (__predict_true(gdtset))
423 XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
424 XPQ_QUEUE[XPQ_IDX].val = pfn;
426 XPQ_QUEUE_LOG[XPQ_IDX].file = file;
427 XPQ_QUEUE_LOG[XPQ_IDX].line = line;
430 if (__predict_true(gdtset))
434 extern struct rwlock pvh_global_lock;
437 _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
441 if (__predict_true(gdtset))
442 rw_assert(&pvh_global_lock, RA_WLOCKED);
444 KASSERT((ptr & 7) == 0, ("misaligned update"));
446 if (__predict_true(gdtset))
449 XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
450 XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
452 XPQ_QUEUE_LOG[XPQ_IDX].file = file;
453 XPQ_QUEUE_LOG[XPQ_IDX].line = line;
456 if (__predict_true(gdtset))
461 xen_pgdpt_pin(vm_paddr_t ma)
464 op.cmd = MMUEXT_PIN_L3_TABLE;
465 op.arg1.mfn = ma >> PAGE_SHIFT;
467 PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
471 xen_pgd_pin(vm_paddr_t ma)
474 op.cmd = MMUEXT_PIN_L2_TABLE;
475 op.arg1.mfn = ma >> PAGE_SHIFT;
477 PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
481 xen_pgd_unpin(vm_paddr_t ma)
484 op.cmd = MMUEXT_UNPIN_TABLE;
485 op.arg1.mfn = ma >> PAGE_SHIFT;
487 PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
491 xen_pt_pin(vm_paddr_t ma)
494 op.cmd = MMUEXT_PIN_L1_TABLE;
495 op.arg1.mfn = ma >> PAGE_SHIFT;
497 PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
501 xen_pt_unpin(vm_paddr_t ma)
504 op.cmd = MMUEXT_UNPIN_TABLE;
505 op.arg1.mfn = ma >> PAGE_SHIFT;
507 PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
511 xen_set_ldt(vm_paddr_t ptr, unsigned long len)
514 op.cmd = MMUEXT_SET_LDT;
515 op.arg1.linear_addr = ptr;
516 op.arg2.nr_ents = len;
518 PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
521 void xen_tlb_flush(void)
524 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
526 PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
530 xen_update_descriptor(union descriptor *table, union descriptor *entry)
535 ptp = vtopte((vm_offset_t)table);
536 pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
537 if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
538 panic("HYPERVISOR_update_descriptor failed\n");
544 * Bitmap is indexed by page number. If bit is set, the page is part of a
545 * xen_create_contiguous_region() area of memory.
547 unsigned long *contiguous_bitmap;
550 contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
552 unsigned long start_off, end_off, curr_idx, end_idx;
554 curr_idx = first_page / BITS_PER_LONG;
555 start_off = first_page & (BITS_PER_LONG-1);
556 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
557 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
559 if (curr_idx == end_idx) {
560 contiguous_bitmap[curr_idx] |=
561 ((1UL<<end_off)-1) & -(1UL<<start_off);
563 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
564 while ( ++curr_idx < end_idx )
565 contiguous_bitmap[curr_idx] = ~0UL;
566 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
571 contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
573 unsigned long start_off, end_off, curr_idx, end_idx;
575 curr_idx = first_page / BITS_PER_LONG;
576 start_off = first_page & (BITS_PER_LONG-1);
577 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
578 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
580 if (curr_idx == end_idx) {
581 contiguous_bitmap[curr_idx] &=
582 -(1UL<<end_off) | ((1UL<<start_off)-1);
584 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
585 while ( ++curr_idx != end_idx )
586 contiguous_bitmap[curr_idx] = 0;
587 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
592 /* Ensure multi-page extents are contiguous in machine memory. */
594 xen_create_contiguous_region(vm_page_t pages, int npages)
596 unsigned long mfn, i, flags;
598 struct xen_memory_reservation reservation = {
603 set_xen_guest_handle(reservation.extent_start, &mfn);
607 /* can currently only handle power of two allocation */
608 PANIC_IF(ffs(npages) != fls(npages));
610 /* 0. determine order */
611 order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
613 /* 1. give away machine pages. */
614 for (i = 0; i < (1 << order); i++) {
616 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
618 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
619 PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
623 /* 2. Get a new contiguous memory extent. */
624 reservation.extent_order = order;
625 /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not
626 * running with a broxen driver XXXEN
628 reservation.address_bits = 31;
629 if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
632 /* 3. Map the new extent in place of old pages. */
633 for (i = 0; i < (1 << order); i++) {
635 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
636 xen_machphys_update(mfn+i, pfn);
637 PFNTOMFN(pfn) = mfn+i;
643 contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
646 balloon_unlock(flags);
651 reservation.extent_order = 0;
652 reservation.address_bits = 0;
654 for (i = 0; i < (1 << order); i++) {
656 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
657 PANIC_IF(HYPERVISOR_memory_op(
658 XENMEM_increase_reservation, &reservation) != 1);
659 xen_machphys_update(mfn, pfn);
665 balloon_unlock(flags);
671 xen_destroy_contiguous_region(void *addr, int npages)
673 unsigned long mfn, i, flags, order, pfn0;
674 struct xen_memory_reservation reservation = {
679 set_xen_guest_handle(reservation.extent_start, &mfn);
681 pfn0 = vtophys(addr) >> PAGE_SHIFT;
683 scrub_pages(vstart, 1 << order);
685 /* can currently only handle power of two allocation */
686 PANIC_IF(ffs(npages) != fls(npages));
688 /* 0. determine order */
689 order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
694 contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
697 /* 1. Zap current PTEs, giving away the underlying pages. */
698 for (i = 0; i < (1 << order); i++) {
700 uint64_t new_val = 0;
701 pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
703 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
704 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
705 PANIC_IF(HYPERVISOR_memory_op(
706 XENMEM_decrease_reservation, &reservation) != 1);
709 /* 2. Map new pages in place of old pages. */
710 for (i = 0; i < (1 << order); i++) {
714 PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
716 new_val = mfn << PAGE_SHIFT;
717 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE),
718 new_val, PG_KERNEL));
719 xen_machphys_update(mfn, pfn);
725 balloon_unlock(flags);
728 extern vm_offset_t proc0kstack;
729 extern int vm86paddr, vm86phystk;
730 char *bootmem_start, *bootmem_current, *bootmem_end;
732 pteinfo_t *pteinfo_list;
733 void initvalues(start_info_t *startinfo);
735 struct xenstore_domain_interface;
736 extern struct xenstore_domain_interface *xen_store;
741 bootmem_alloc(unsigned int size)
745 retptr = bootmem_current;
746 PANIC_IF(retptr + size > bootmem_end);
747 bootmem_current += size;
753 bootmem_free(void *ptr, unsigned int size)
758 PANIC_IF(tptr != bootmem_current - size ||
759 bootmem_current - size < bootmem_start);
761 bootmem_current -= size;
766 xpmap_mtop2(vm_paddr_t mpa)
768 return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
769 ) | (mpa & ~PG_FRAME);
773 xpmap_get_bootpde(vm_paddr_t va)
776 return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
780 xpmap_get_vbootpde(vm_paddr_t va)
784 pde = xpmap_get_bootpde(va);
785 if ((pde & PG_V) == 0)
786 return (pde & ~PG_FRAME);
787 return (pde & ~PG_FRAME) |
788 (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
792 xpmap_get_bootptep(vm_paddr_t va)
796 pde = xpmap_get_vbootpde(va);
797 if ((pde & PG_V) == 0)
799 #define PT_MASK 0x003ff000 /* page table address bits */
800 return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
804 xpmap_get_bootpte(vm_paddr_t va)
807 return xpmap_get_bootptep(va)[0];
814 shift_phys_machine(unsigned long *phys_machine, int nr_pages)
817 unsigned long *tmp_page, *current_page, *next_page;
820 tmp_page = bootmem_alloc(PAGE_SIZE);
821 current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));
822 next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));
823 bcopy(phys_machine, tmp_page, PAGE_SIZE);
825 while (current_page > phys_machine) {
827 bcopy(next_page, tmp_page, PAGE_SIZE);
828 /* shift down page */
829 bcopy(current_page, next_page, PAGE_SIZE);
831 bcopy(tmp_page, current_page, PAGE_SIZE);
833 current_page -= (PAGE_SIZE/sizeof(unsigned long));
834 next_page -= (PAGE_SIZE/sizeof(unsigned long));
836 bootmem_free(tmp_page, PAGE_SIZE);
838 for (i = 0; i < nr_pages; i++) {
839 xen_machphys_update(phys_machine[i], i);
841 memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
844 #endif /* ADD_ISA_HOLE */
847 * Build a directory of the pages that make up our Physical to Machine
848 * mapping table. The Xen suspend/restore code uses this to find our
852 init_frame_list_list(void *arg)
854 unsigned long nr_pages = xen_start_info->nr_pages;
855 #define FPP (PAGE_SIZE/sizeof(xen_pfn_t))
858 xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
859 for (i = 0, j = 0, k = -1; i < nr_pages;
861 if ((j & (FPP - 1)) == 0) {
863 xen_pfn_to_mfn_frame_list[k] =
864 malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
865 xen_pfn_to_mfn_frame_list_list[k] =
866 VTOMFN(xen_pfn_to_mfn_frame_list[k]);
869 xen_pfn_to_mfn_frame_list[k][j] =
870 VTOMFN(&xen_phys_machine[i]);
873 HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
874 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
875 = VTOMFN(xen_pfn_to_mfn_frame_list_list);
877 SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
879 extern unsigned long physfree;
884 extern uint32_t kernbase;
887 initvalues(start_info_t *startinfo)
889 vm_offset_t cur_space, cur_space_pt;
890 struct physdev_set_iopl set_iopl;
892 int l3_pages, l2_pages, l1_pages, offset;
893 vm_paddr_t console_page_ma, xen_store_ma;
897 vm_paddr_t IdlePDPTma, IdlePDPTnewma;
898 vm_paddr_t IdlePTDnewma[4];
899 pd_entry_t *IdlePDPTnew, *IdlePTDnew;
900 vm_paddr_t IdlePTDma[4];
902 vm_paddr_t IdlePTDma[1];
909 max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
910 NPGPTD*NPDEPG - KPTDI),
911 (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
913 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
916 * need to install handler
918 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);
920 xen_start_info = startinfo;
921 xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
923 IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
929 IdlePDPT = (pd_entry_t *)startinfo->pt_base;
930 IdlePDPTma = VTOM(startinfo->pt_base);
931 for (i = (KERNBASE >> 30);
932 (i < 4) && (IdlePDPT[i] != 0); i++)
935 * Note that only one page directory has been allocated at this point.
938 for (i = 0; i < l2_pages; i++)
939 IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
941 l2_pages = (l2_pages == 0) ? 1 : l2_pages;
946 for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
947 (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
954 /* number of pages allocated after the pts + 1*/;
955 cur_space = xen_start_info->pt_base +
956 (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
958 printk("initvalues(): wooh - availmem=%x,%x\n", avail_space, cur_space);
960 printk("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n",
961 KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
962 xen_start_info->nr_pt_frames);
963 xendebug_flags = 0; /* 0xffffffff; */
966 shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
968 XENPRINTF("IdlePTD %p\n", IdlePTD);
969 XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
970 "mod_start: 0x%lx mod_len: 0x%lx\n",
971 xen_start_info->nr_pages, xen_start_info->shared_info,
972 xen_start_info->flags, xen_start_info->pt_base,
973 xen_start_info->mod_start, xen_start_info->mod_len);
976 IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
977 bzero(IdlePDPTnew, PAGE_SIZE);
979 IdlePDPTnewma = VTOM(IdlePDPTnew);
980 IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
981 bzero(IdlePTDnew, 4*PAGE_SIZE);
983 for (i = 0; i < 4; i++)
984 IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
988 * Copy the 4 machine addresses of the new PTDs in to the PDPT
991 for (i = 0; i < 4; i++)
992 IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
997 * re-map the new PDPT read-only
999 PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
1002 * Unpin the current PDPT
1004 xen_pt_unpin(IdlePDPTma);
1008 /* Map proc0's KSTACK */
1009 proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
1010 printk("proc0kstack=%u\n", proc0kstack);
1012 /* vm86/bios stack */
1013 cur_space += PAGE_SIZE;
1015 /* Map space for the vm86 region */
1016 vm86paddr = (vm_offset_t)cur_space;
1017 cur_space += (PAGE_SIZE * 3);
1019 /* allocate 4 pages for bootmem allocator */
1020 bootmem_start = bootmem_current = (char *)cur_space;
1021 cur_space += (4 * PAGE_SIZE);
1022 bootmem_end = (char *)cur_space;
1024 /* allocate pages for gdt */
1025 gdt = (union descriptor *)cur_space;
1026 cur_space += PAGE_SIZE*ncpus;
1028 /* allocate page for ldt */
1029 ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
1030 cur_space += PAGE_SIZE;
1032 /* unmap remaining pages from initial chunk
1035 for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
1036 tmpva += PAGE_SIZE) {
1037 bzero((char *)tmpva, PAGE_SIZE);
1038 PT_SET_MA(tmpva, (vm_paddr_t)0);
1043 memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
1044 ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
1045 l1_pages*sizeof(pt_entry_t));
1047 for (i = 0; i < 4; i++) {
1048 PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
1049 IdlePTDnewma[i] | PG_V);
1051 xen_load_cr3(VTOP(IdlePDPTnew));
1052 xen_pgdpt_pin(VTOM(IdlePDPTnew));
1054 /* allocate remainder of nkpt pages */
1055 cur_space_pt = cur_space;
1056 for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
1057 i++, cur_space += PAGE_SIZE) {
1058 pdir = (offset + i) / NPDEPG;
1059 curoffset = ((offset + i) % NPDEPG);
1060 if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
1064 * make sure that all the initial page table pages
1067 PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
1068 bzero((char *)cur_space, PAGE_SIZE);
1069 PT_SET_MA(cur_space, (vm_paddr_t)0);
1070 xen_pt_pin(VTOM(cur_space));
1071 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1072 curoffset*sizeof(vm_paddr_t)),
1073 VTOM(cur_space) | PG_KERNEL);
1077 for (i = 0; i < 4; i++) {
1078 pdir = (PTDPTDI + i) / NPDEPG;
1079 curoffset = (PTDPTDI + i) % NPDEPG;
1081 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1082 curoffset*sizeof(vm_paddr_t)),
1083 IdlePTDnewma[i] | PG_V);
1088 IdlePTD = IdlePTDnew;
1089 IdlePDPT = IdlePDPTnew;
1090 IdlePDPTma = IdlePDPTnewma;
1092 HYPERVISOR_shared_info = (shared_info_t *)cur_space;
1093 cur_space += PAGE_SIZE;
1095 xen_store = (struct xenstore_domain_interface *)cur_space;
1096 cur_space += PAGE_SIZE;
1098 console_page = (char *)cur_space;
1099 cur_space += PAGE_SIZE;
1102 * shared_info is an unsigned long so this will randomly break if
1103 * it is allocated above 4GB - I guess people are used to that
1104 * sort of thing with Xen ... sigh
1106 shinfo = xen_start_info->shared_info;
1107 PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
1111 xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
1112 PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
1113 console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
1114 PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
1119 PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
1122 /* add page table for KERNBASE */
1123 xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t),
1124 VTOM(cur_space) | PG_KERNEL);
1127 xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t),
1128 VTOM(cur_space) | PG_V | PG_A);
1130 xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t),
1131 VTOM(cur_space) | PG_V | PG_A);
1134 cur_space += PAGE_SIZE;
1138 if (xen_start_info->flags & SIF_INITDOMAIN) {
1139 /* Map first megabyte */
1140 for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE)
1141 PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
1146 * re-map kernel text read-only
1149 for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
1150 i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
1151 PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
1154 physfree = VTOP(cur_space);
1155 init_first = physfree >> PAGE_SHIFT;
1156 IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
1157 IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
1158 setup_xen_features();
1159 printk("#8, proc0kstack=%u\n", proc0kstack);
1163 trap_info_t trap_table[] = {
1164 { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
1165 { 1, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
1166 { 3, 3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
1167 { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
1168 /* This is UPL on Linux and KPL on BSD */
1169 { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
1170 { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
1171 { 7, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
1173 * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
1174 * no handler for double fault
1176 { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
1177 {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
1178 {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
1179 {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
1180 {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
1181 {14, 0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
1182 {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
1183 {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
1184 {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
1185 {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
1186 {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
1187 {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
1191 /* Perform a multicall and check that individual calls succeeded. */
1193 HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
1198 /* Perform the multicall. */
1199 PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
1201 /* Check the results of individual hypercalls. */
1202 for (i = 0; i < nr_calls; i++)
1203 if (unlikely(call_list[i].result < 0))
1205 if (unlikely(ret > 0))
1206 panic("%d multicall(s) failed: cpu %d\n",
1207 ret, smp_processor_id());
1209 /* If we didn't panic already, everything succeeded. */
1213 /********** CODE WORTH KEEPING ABOVE HERE *****************/
1215 void xen_failsafe_handler(void);
1218 xen_failsafe_handler(void)
1221 panic("xen_failsafe_handler called!\n");
1224 void xen_handle_thread_switch(struct pcb *pcb);
1226 /* This is called by cpu_switch() when switching threads. */
1227 /* The pcb arg refers to the process control block of the */
1228 /* next thread which is to run */
1230 xen_handle_thread_switch(struct pcb *pcb)
1232 uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
1233 uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
1234 multicall_entry_t mcl[3];
1237 /* Notify Xen of task switch */
1238 mcl[i].op = __HYPERVISOR_stack_switch;
1239 mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
1240 mcl[i++].args[1] = (unsigned long)pcb;
1242 /* Check for update of fsd */
1243 if (*a != *b || *(a+1) != *(b+1)) {
1244 mcl[i].op = __HYPERVISOR_update_descriptor;
1245 *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1246 *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1252 /* Check for update of gsd */
1253 if (*a != *b || *(a+1) != *(b+1)) {
1254 mcl[i].op = __HYPERVISOR_update_descriptor;
1255 *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1256 *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1259 (void)HYPERVISOR_multicall(mcl, i);