]> CyberLeo.Net >> Repos - FreeBSD/releng/8.0.git/blob - sys/i386/xen/xen_machdep.c
Adjust to reflect 8.0-RELEASE.
[FreeBSD/releng/8.0.git] / sys / i386 / xen / xen_machdep.c
1 /*
2  *
3  * Copyright (c) 2004 Christian Limpach.
4  * Copyright (c) 2004-2006,2008 Kip Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by Christian Limpach.
18  * 4. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bus.h>
39 #include <sys/lock.h>
40 #include <sys/mount.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/kernel.h>
44 #include <sys/reboot.h>
45 #include <sys/sysproto.h>
46
47 #include <machine/xen/xen-os.h>
48
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <machine/segments.h>
52 #include <machine/pcb.h>
53 #include <machine/stdarg.h>
54 #include <machine/vmparam.h>
55 #include <machine/cpu.h>
56 #include <machine/intr_machdep.h>
57 #include <machine/md_var.h>
58 #include <machine/asmacros.h>
59
60
61
62 #include <xen/hypervisor.h>
63 #include <machine/xen/xenvar.h>
64 #include <machine/xen/xenfunc.h>
65 #include <machine/xen/xenpmap.h>
66 #include <machine/xen/xenfunc.h>
67 #include <xen/interface/memory.h>
68 #include <machine/xen/features.h>
69 #ifdef SMP
70 #include <machine/privatespace.h>
71 #endif
72
73
74 #include <vm/vm_page.h>
75
76
77 #define IDTVEC(name)    __CONCAT(X,name)
78
79 extern inthand_t
80 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
81         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
82         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
83         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
84         IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
85
86
87 int xendebug_flags; 
88 start_info_t *xen_start_info;
89 shared_info_t *HYPERVISOR_shared_info;
90 xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
91 xen_pfn_t *xen_phys_machine;
92 xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
93 xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
94 int preemptable, init_first;
95 extern unsigned int avail_space;
96
97 void ni_cli(void);
98 void ni_sti(void);
99
100
101 void
102 ni_cli(void)
103 {
104         __asm__("pushl %edx;"
105                 "pushl %eax;"
106                 );
107         __cli();
108         __asm__("popl %eax;"
109                 "popl %edx;"
110                 );
111 }
112
113
114 void
115 ni_sti(void)
116 {
117         __asm__("pushl %edx;"
118                 "pushl %esi;"
119                 "pushl %eax;"
120                 );
121         __sti();
122         __asm__("popl %eax;"
123                 "popl %esi;"
124                 "popl %edx;"
125                 );
126 }
127
128 /*
129  * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
130  * suitable for the static env vars.
131  */
132 char *
133 xen_setbootenv(char *cmd_line)
134 {
135         char *cmd_line_next;
136     
137         /* Skip leading spaces */
138         for (; *cmd_line == ' '; cmd_line++);
139
140         printk("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
141
142         for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
143         return cmd_line;
144 }
145
146 static struct 
147 {
148         const char      *ev;
149         int             mask;
150 } howto_names[] = {
151         {"boot_askname",        RB_ASKNAME},
152         {"boot_single", RB_SINGLE},
153         {"boot_nosync", RB_NOSYNC},
154         {"boot_halt",   RB_ASKNAME},
155         {"boot_serial", RB_SERIAL},
156         {"boot_cdrom",  RB_CDROM},
157         {"boot_gdb",    RB_GDB},
158         {"boot_gdb_pause",      RB_RESERVED1},
159         {"boot_verbose",        RB_VERBOSE},
160         {"boot_multicons",      RB_MULTIPLE},
161         {NULL,  0}
162 };
163
164 int 
165 xen_boothowto(char *envp)
166 {
167         int i, howto = 0;
168
169         /* get equivalents from the environment */
170         for (i = 0; howto_names[i].ev != NULL; i++)
171                 if (getenv(howto_names[i].ev) != NULL)
172                         howto |= howto_names[i].mask;
173         return howto;
174 }
175
176 #define PRINTK_BUFSIZE 1024
177 void
178 printk(const char *fmt, ...)
179 {
180         __va_list ap;
181         int retval;
182         static char buf[PRINTK_BUFSIZE];
183
184         va_start(ap, fmt);
185         retval = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
186         va_end(ap);
187         buf[retval] = 0;
188         (void)HYPERVISOR_console_write(buf, retval);
189 }
190
191
192 #define XPQUEUE_SIZE 128
193
194 struct mmu_log {
195         char *file;
196         int line;
197 };
198
199 #ifdef SMP
200 /* per-cpu queues and indices */
201 #ifdef INVARIANTS
202 static struct mmu_log xpq_queue_log[MAX_VIRT_CPUS][XPQUEUE_SIZE];
203 #endif
204
205 static int xpq_idx[MAX_VIRT_CPUS];  
206 static mmu_update_t xpq_queue[MAX_VIRT_CPUS][XPQUEUE_SIZE];
207
208 #define XPQ_QUEUE_LOG xpq_queue_log[vcpu]
209 #define XPQ_QUEUE xpq_queue[vcpu]
210 #define XPQ_IDX xpq_idx[vcpu]
211 #define SET_VCPU() int vcpu = smp_processor_id()
212 #else
213         
214 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
215 static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
216 static int xpq_idx = 0;
217
218 #define XPQ_QUEUE_LOG xpq_queue_log
219 #define XPQ_QUEUE xpq_queue
220 #define XPQ_IDX xpq_idx
221 #define SET_VCPU()
222 #endif /* !SMP */
223
224 #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
225
226 #if 0
227 static void
228 xen_dump_queue(void)
229 {
230         int _xpq_idx = XPQ_IDX;
231         int i;
232
233         if (_xpq_idx <= 1)
234                 return;
235
236         printk("xen_dump_queue(): %u entries\n", _xpq_idx);
237         for (i = 0; i < _xpq_idx; i++) {
238                 printk(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
239         }
240 }
241 #endif
242
243
244 static __inline void
245 _xen_flush_queue(void)
246 {
247         SET_VCPU();
248         int _xpq_idx = XPQ_IDX;
249         int error, i;
250         /* window of vulnerability here? */
251
252         if (__predict_true(gdtset))
253                 critical_enter();
254         XPQ_IDX = 0;
255         /* Make sure index is cleared first to avoid double updates. */
256         error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
257                                       _xpq_idx, NULL, DOMID_SELF);
258     
259 #if 0
260         if (__predict_true(gdtset))
261         for (i = _xpq_idx; i > 0;) {
262                 if (i >= 3) {
263                         CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
264                             "ptr: %lx val: %lx ptr: %lx",
265                             (XPQ_QUEUE[i-1].val & 0xffffffff),
266                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
267                             (XPQ_QUEUE[i-2].val & 0xffffffff),
268                             (XPQ_QUEUE[i-2].ptr & 0xffffffff),
269                             (XPQ_QUEUE[i-3].val & 0xffffffff),
270                             (XPQ_QUEUE[i-3].ptr & 0xffffffff));
271                             i -= 3;
272                 } else if (i == 2) {
273                         CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
274                             (XPQ_QUEUE[i-1].val & 0xffffffff),
275                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
276                             (XPQ_QUEUE[i-2].val & 0xffffffff),
277                             (XPQ_QUEUE[i-2].ptr & 0xffffffff));
278                         i = 0;
279                 } else {
280                         CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", 
281                             (XPQ_QUEUE[i-1].val & 0xffffffff),
282                             (XPQ_QUEUE[i-1].ptr & 0xffffffff));
283                         i = 0;
284                 }
285         }
286 #endif  
287         if (__predict_true(gdtset))
288                 critical_exit();
289         if (__predict_false(error < 0)) {
290                 for (i = 0; i < _xpq_idx; i++)
291                         printf("val: %llx ptr: %llx\n",
292                             XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
293                 panic("Failed to execute MMU updates: %d", error);
294         }
295
296 }
297
298 void
299 xen_flush_queue(void)
300 {
301         SET_VCPU();
302         if (XPQ_IDX != 0) _xen_flush_queue();
303 }
304
305 static __inline void
306 xen_increment_idx(void)
307 {
308         SET_VCPU();
309
310         XPQ_IDX++;
311         if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
312                 xen_flush_queue();
313 }
314
315 void
316 xen_check_queue(void)
317 {
318 #ifdef INVARIANTS
319         SET_VCPU();
320         
321         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
322 #endif
323 }
324
325 void
326 xen_invlpg(vm_offset_t va)
327 {
328         struct mmuext_op op;
329         op.cmd = MMUEXT_INVLPG_ALL;
330         op.arg1.linear_addr = va & ~PAGE_MASK;
331         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
332 }
333
334 void
335 xen_load_cr3(u_int val)
336 {
337         struct mmuext_op op;
338 #ifdef INVARIANTS
339         SET_VCPU();
340         
341         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
342 #endif
343         op.cmd = MMUEXT_NEW_BASEPTR;
344         op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
345         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
346 }
347
348 void
349 xen_restore_flags(u_int eflags)
350 {
351         if (eflags > 1)
352                 eflags = ((eflags & PSL_I) == 0);
353
354         __restore_flags(eflags);
355 }
356
357 int
358 xen_save_and_cli(void)
359 {
360         int eflags;
361         
362         __save_and_cli(eflags);
363         return (eflags);
364 }
365
366 void
367 xen_cli(void)
368 {
369         __cli();
370 }
371
372 void
373 xen_sti(void)
374 {
375         __sti();
376 }
377
378 u_int
379 xen_rcr2(void)
380 {
381
382         return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
383 }
384
385 void
386 _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
387 {
388         SET_VCPU();
389         
390         if (__predict_true(gdtset))
391                 critical_enter();
392         XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
393         XPQ_QUEUE[XPQ_IDX].val = pfn;
394 #ifdef INVARIANTS
395         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
396         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
397 #endif          
398         xen_increment_idx();
399         if (__predict_true(gdtset))
400                 critical_exit();
401 }
402
403 void
404 _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
405 {
406         SET_VCPU();
407
408         if (__predict_true(gdtset))     
409                 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
410
411         KASSERT((ptr & 7) == 0, ("misaligned update"));
412         
413         if (__predict_true(gdtset))
414                 critical_enter();
415         
416         XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
417         XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
418 #ifdef INVARIANTS
419         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
420         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
421 #endif  
422         xen_increment_idx();
423         if (__predict_true(gdtset))
424                 critical_exit();
425 }
426
427 void 
428 xen_pgdpt_pin(vm_paddr_t ma)
429 {
430         struct mmuext_op op;
431         op.cmd = MMUEXT_PIN_L3_TABLE;
432         op.arg1.mfn = ma >> PAGE_SHIFT;
433         xen_flush_queue();
434         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
435 }
436
437 void 
438 xen_pgd_pin(vm_paddr_t ma)
439 {
440         struct mmuext_op op;
441         op.cmd = MMUEXT_PIN_L2_TABLE;
442         op.arg1.mfn = ma >> PAGE_SHIFT;
443         xen_flush_queue();
444         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
445 }
446
447 void 
448 xen_pgd_unpin(vm_paddr_t ma)
449 {
450         struct mmuext_op op;
451         op.cmd = MMUEXT_UNPIN_TABLE;
452         op.arg1.mfn = ma >> PAGE_SHIFT;
453         xen_flush_queue();
454         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
455 }
456
457 void 
458 xen_pt_pin(vm_paddr_t ma)
459 {
460         struct mmuext_op op;
461         op.cmd = MMUEXT_PIN_L1_TABLE;
462         op.arg1.mfn = ma >> PAGE_SHIFT;
463         printk("xen_pt_pin(): mfn=%x\n", op.arg1.mfn);
464         xen_flush_queue();
465         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
466 }
467
468 void 
469 xen_pt_unpin(vm_paddr_t ma)
470 {
471         struct mmuext_op op;
472         op.cmd = MMUEXT_UNPIN_TABLE;
473         op.arg1.mfn = ma >> PAGE_SHIFT;
474         xen_flush_queue();
475         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
476 }
477
478 void 
479 xen_set_ldt(vm_paddr_t ptr, unsigned long len)
480 {
481         struct mmuext_op op;
482         op.cmd = MMUEXT_SET_LDT;
483         op.arg1.linear_addr = ptr;
484         op.arg2.nr_ents = len;
485         xen_flush_queue();
486         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
487 }
488
489 void xen_tlb_flush(void)
490 {
491         struct mmuext_op op;
492         op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
493         xen_flush_queue();
494         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
495 }
496
497 void
498 xen_update_descriptor(union descriptor *table, union descriptor *entry)
499 {
500         vm_paddr_t pa;
501         pt_entry_t *ptp;
502
503         ptp = vtopte((vm_offset_t)table);
504         pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
505         if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
506                 panic("HYPERVISOR_update_descriptor failed\n");
507 }
508
509
510 #if 0
511 /*
512  * Bitmap is indexed by page number. If bit is set, the page is part of a
513  * xen_create_contiguous_region() area of memory.
514  */
515 unsigned long *contiguous_bitmap;
516
517 static void 
518 contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
519 {
520         unsigned long start_off, end_off, curr_idx, end_idx;
521
522         curr_idx  = first_page / BITS_PER_LONG;
523         start_off = first_page & (BITS_PER_LONG-1);
524         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
525         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
526
527         if (curr_idx == end_idx) {
528                 contiguous_bitmap[curr_idx] |=
529                         ((1UL<<end_off)-1) & -(1UL<<start_off);
530         } else {
531                 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
532                 while ( ++curr_idx < end_idx )
533                         contiguous_bitmap[curr_idx] = ~0UL;
534                 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
535         }
536 }
537
538 static void 
539 contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
540 {
541         unsigned long start_off, end_off, curr_idx, end_idx;
542
543         curr_idx  = first_page / BITS_PER_LONG;
544         start_off = first_page & (BITS_PER_LONG-1);
545         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
546         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
547
548         if (curr_idx == end_idx) {
549                 contiguous_bitmap[curr_idx] &=
550                         -(1UL<<end_off) | ((1UL<<start_off)-1);
551         } else {
552                 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
553                 while ( ++curr_idx != end_idx )
554                         contiguous_bitmap[curr_idx] = 0;
555                 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
556         }
557 }
558 #endif
559
560 /* Ensure multi-page extents are contiguous in machine memory. */
561 int 
562 xen_create_contiguous_region(vm_page_t pages, int npages)
563 {
564         unsigned long  mfn, i, flags;
565         int order;
566         struct xen_memory_reservation reservation = {
567                 .nr_extents   = 1,
568                 .extent_order = 0,
569                 .domid        = DOMID_SELF
570         };
571         set_xen_guest_handle(reservation.extent_start, &mfn);
572         
573         balloon_lock(flags);
574
575         /* can currently only handle power of two allocation */
576         PANIC_IF(ffs(npages) != fls(npages));
577
578         /* 0. determine order */
579         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
580         
581         /* 1. give away machine pages. */
582         for (i = 0; i < (1 << order); i++) {
583                 int pfn;
584                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
585                 mfn = PFNTOMFN(pfn);
586                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
587                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
588         }
589
590
591         /* 2. Get a new contiguous memory extent. */
592         reservation.extent_order = order;
593         /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not 
594          * running with a broxen driver XXXEN
595          */
596         reservation.address_bits = 31; 
597         if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
598                 goto fail;
599
600         /* 3. Map the new extent in place of old pages. */
601         for (i = 0; i < (1 << order); i++) {
602                 int pfn;
603                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
604                 xen_machphys_update(mfn+i, pfn);
605                 PFNTOMFN(pfn) = mfn+i;
606         }
607
608         xen_tlb_flush();
609
610 #if 0
611         contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
612 #endif
613
614         balloon_unlock(flags);
615
616         return 0;
617
618  fail:
619         reservation.extent_order = 0;
620         reservation.address_bits = 0;
621
622         for (i = 0; i < (1 << order); i++) {
623                 int pfn;
624                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
625                 PANIC_IF(HYPERVISOR_memory_op(
626                         XENMEM_increase_reservation, &reservation) != 1);
627                 xen_machphys_update(mfn, pfn);
628                 PFNTOMFN(pfn) = mfn;
629         }
630
631         xen_tlb_flush();
632
633         balloon_unlock(flags);
634
635         return ENOMEM;
636 }
637
638 void 
639 xen_destroy_contiguous_region(void *addr, int npages)
640 {
641         unsigned long  mfn, i, flags, order, pfn0;
642         struct xen_memory_reservation reservation = {
643                 .nr_extents   = 1,
644                 .extent_order = 0,
645                 .domid        = DOMID_SELF
646         };
647         set_xen_guest_handle(reservation.extent_start, &mfn);
648         
649         pfn0 = vtophys(addr) >> PAGE_SHIFT;
650 #if 0
651         scrub_pages(vstart, 1 << order);
652 #endif
653         /* can currently only handle power of two allocation */
654         PANIC_IF(ffs(npages) != fls(npages));
655
656         /* 0. determine order */
657         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
658
659         balloon_lock(flags);
660
661 #if 0
662         contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
663 #endif
664
665         /* 1. Zap current PTEs, giving away the underlying pages. */
666         for (i = 0; i < (1 << order); i++) {
667                 int pfn;
668                 uint64_t new_val = 0;
669                 pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
670
671                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
672                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
673                 PANIC_IF(HYPERVISOR_memory_op(
674                         XENMEM_decrease_reservation, &reservation) != 1);
675         }
676
677         /* 2. Map new pages in place of old pages. */
678         for (i = 0; i < (1 << order); i++) {
679                 int pfn;
680                 uint64_t new_val;
681                 pfn = pfn0 + i;
682                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
683                 
684                 new_val = mfn << PAGE_SHIFT;
685                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), 
686                                                       new_val, PG_KERNEL));
687                 xen_machphys_update(mfn, pfn);
688                 PFNTOMFN(pfn) = mfn;
689         }
690
691         xen_tlb_flush();
692
693         balloon_unlock(flags);
694 }
695
696 extern unsigned long cpu0prvpage;
697 extern unsigned long *SMPpt;
698 extern  struct user     *proc0uarea;
699 extern  vm_offset_t     proc0kstack;
700 extern int vm86paddr, vm86phystk;
701 char *bootmem_start, *bootmem_current, *bootmem_end;
702
703 pteinfo_t *pteinfo_list;
704 void initvalues(start_info_t *startinfo);
705
706 struct ringbuf_head *xen_store; /* XXX move me */
707 char *console_page;
708
709 void *
710 bootmem_alloc(unsigned int size) 
711 {
712         char *retptr;
713         
714         retptr = bootmem_current;
715         PANIC_IF(retptr + size > bootmem_end);
716         bootmem_current += size;
717
718         return retptr;
719 }
720
721 void 
722 bootmem_free(void *ptr, unsigned int size) 
723 {
724         char *tptr;
725         
726         tptr = ptr;
727         PANIC_IF(tptr != bootmem_current - size ||
728                 bootmem_current - size < bootmem_start);        
729
730         bootmem_current -= size;
731 }
732
733 #if 0
734 static vm_paddr_t
735 xpmap_mtop2(vm_paddr_t mpa)
736 {
737         return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
738             ) | (mpa & ~PG_FRAME);
739 }
740
741 static pd_entry_t 
742 xpmap_get_bootpde(vm_paddr_t va)
743 {
744
745         return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
746 }
747
748 static pd_entry_t
749 xpmap_get_vbootpde(vm_paddr_t va)
750 {
751         pd_entry_t pde;
752
753         pde = xpmap_get_bootpde(va);
754         if ((pde & PG_V) == 0)
755                 return (pde & ~PG_FRAME);
756         return (pde & ~PG_FRAME) |
757                 (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
758 }
759
760 static pt_entry_t 8*
761 xpmap_get_bootptep(vm_paddr_t va)
762 {
763         pd_entry_t pde;
764
765         pde = xpmap_get_vbootpde(va);
766         if ((pde & PG_V) == 0)
767                 return (void *)-1;
768 #define PT_MASK         0x003ff000      /* page table address bits */
769         return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
770 }
771
772 static pt_entry_t
773 xpmap_get_bootpte(vm_paddr_t va)
774 {
775
776         return xpmap_get_bootptep(va)[0];
777 }
778 #endif
779
780
781 #ifdef ADD_ISA_HOLE
782 static void
783 shift_phys_machine(unsigned long *phys_machine, int nr_pages)
784 {
785
786         unsigned long *tmp_page, *current_page, *next_page;
787         int i;
788
789         tmp_page = bootmem_alloc(PAGE_SIZE);
790         current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));  
791         next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));  
792         bcopy(phys_machine, tmp_page, PAGE_SIZE);
793
794         while (current_page > phys_machine) { 
795                 /*  save next page */
796                 bcopy(next_page, tmp_page, PAGE_SIZE);
797                 /* shift down page */
798                 bcopy(current_page, next_page, PAGE_SIZE);
799                 /*  finish swap */
800                 bcopy(tmp_page, current_page, PAGE_SIZE);
801           
802                 current_page -= (PAGE_SIZE/sizeof(unsigned long));
803                 next_page -= (PAGE_SIZE/sizeof(unsigned long));
804         }
805         bootmem_free(tmp_page, PAGE_SIZE);      
806         
807         for (i = 0; i < nr_pages; i++) {
808                 xen_machphys_update(phys_machine[i], i);
809         }
810         memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
811
812 }
813 #endif /* ADD_ISA_HOLE */
814
815 /*
816  * Build a directory of the pages that make up our Physical to Machine
817  * mapping table. The Xen suspend/restore code uses this to find our
818  * mapping table.
819  */
820 static void
821 init_frame_list_list(void *arg)
822 {
823         unsigned long nr_pages = xen_start_info->nr_pages;
824 #define FPP     (PAGE_SIZE/sizeof(xen_pfn_t))
825         int i, j, k;
826
827         xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
828         for (i = 0, j = 0, k = -1; i < nr_pages;
829              i += FPP, j++) {
830                 if ((j & (FPP - 1)) == 0) {
831                         k++;
832                         xen_pfn_to_mfn_frame_list[k] =
833                                 malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
834                         xen_pfn_to_mfn_frame_list_list[k] =
835                                 VTOMFN(xen_pfn_to_mfn_frame_list[k]);
836                         j = 0;
837                 }
838                 xen_pfn_to_mfn_frame_list[k][j] = 
839                         VTOMFN(&xen_phys_machine[i]);
840         }
841
842         HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
843         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
844                 = VTOMFN(xen_pfn_to_mfn_frame_list_list);
845 }       
846 SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
847
848 extern unsigned long physfree;
849
850 int pdir, curoffset;
851 extern int nkpt;
852
853 void
854 initvalues(start_info_t *startinfo)
855
856         int l3_pages, l2_pages, l1_pages, offset;
857         vm_offset_t cur_space, cur_space_pt;
858         struct physdev_set_iopl set_iopl;
859         
860         vm_paddr_t KPTphys, IdlePTDma;
861         vm_paddr_t console_page_ma, xen_store_ma;
862         vm_offset_t KPTphysoff, tmpva;
863         vm_paddr_t shinfo;
864 #ifdef PAE
865         vm_paddr_t IdlePDPTma, IdlePDPTnewma;
866         vm_paddr_t IdlePTDnewma[4];
867         pd_entry_t *IdlePDPTnew, *IdlePTDnew;
868 #else
869         vm_paddr_t pdir_shadow_ma;
870 #endif
871         unsigned long i;
872         int ncpus = MAXCPU;
873
874         nkpt = min(
875                 min(
876                         max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
877                     NPGPTD*NPDEPG - KPTDI),
878                     (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
879
880         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);      
881 #ifdef notyet
882         /*
883          * need to install handler
884          */
885         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);       
886 #endif  
887         xen_start_info = startinfo;
888         xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
889
890         IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
891         l1_pages = 0;
892         
893 #ifdef PAE
894         l3_pages = 1;
895         l2_pages = 0;
896         IdlePDPT = (pd_entry_t *)startinfo->pt_base;
897         IdlePDPTma = xpmap_ptom(VTOP(startinfo->pt_base));
898         for (i = (KERNBASE >> 30);
899              (i < 4) && (IdlePDPT[i] != 0); i++)
900                         l2_pages++;
901         /*
902          * Note that only one page directory has been allocated at this point.
903          * Thus, if KERNBASE
904          */
905 #if 0
906         for (i = 0; i < l2_pages; i++)
907                 IdlePTDma[i] = xpmap_ptom(VTOP(IdlePTD + i*PAGE_SIZE));
908 #endif
909         
910         l2_pages = (l2_pages == 0) ? 1 : l2_pages;
911 #else   
912         l3_pages = 0;
913         l2_pages = 1;
914 #endif
915         for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
916              (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
917                 
918                 if (IdlePTD[i] == 0)
919                         break;
920                 l1_pages++;
921         }
922         
923         /* number of pages allocated after the pts + 1*/;
924         cur_space = xen_start_info->pt_base +
925             ((xen_start_info->nr_pt_frames) + 3 )*PAGE_SIZE;
926         printk("initvalues(): wooh - availmem=%x,%x\n", avail_space, cur_space);
927
928         printk("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n",
929             KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
930             xen_start_info->nr_pt_frames);
931         xendebug_flags = 0; /* 0xffffffff; */
932
933         /* allocate 4 pages for bootmem allocator */
934         bootmem_start = bootmem_current = (char *)cur_space;
935         cur_space += (4 * PAGE_SIZE);
936         bootmem_end = (char *)cur_space;
937
938         /* allocate page for gdt */
939         gdt = (union descriptor *)cur_space;
940         cur_space += PAGE_SIZE*ncpus;
941
942         /* allocate page for ldt */
943         ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
944         cur_space += PAGE_SIZE;
945         
946         HYPERVISOR_shared_info = (shared_info_t *)cur_space;
947         cur_space += PAGE_SIZE;
948
949         xen_store = (struct ringbuf_head *)cur_space;
950         cur_space += PAGE_SIZE;
951
952         console_page = (char *)cur_space;
953         cur_space += PAGE_SIZE;
954
955 #ifdef ADD_ISA_HOLE
956         shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
957 #endif
958         /* 
959          * pre-zero unused mapped pages - mapped on 4MB boundary
960          */
961 #ifdef PAE
962         IdlePDPT = (pd_entry_t *)startinfo->pt_base;
963         IdlePDPTma = xpmap_ptom(VTOP(startinfo->pt_base));
964         /*
965          * Note that only one page directory has been allocated at this point.
966          * Thus, if KERNBASE
967          */
968         IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
969         IdlePTDma = xpmap_ptom(VTOP(IdlePTD));
970         l3_pages = 1;
971 #else   
972         IdlePTD = (pd_entry_t *)startinfo->pt_base;
973         IdlePTDma = xpmap_ptom(VTOP(startinfo->pt_base));
974         l3_pages = 0;
975 #endif
976         l2_pages = 1;
977         l1_pages = xen_start_info->nr_pt_frames - l2_pages - l3_pages;
978
979         KPTphysoff = (l2_pages + l3_pages)*PAGE_SIZE;
980
981         KPTphys = xpmap_ptom(VTOP(startinfo->pt_base + KPTphysoff));
982         XENPRINTF("IdlePTD %p\n", IdlePTD);
983         XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
984                   "mod_start: 0x%lx mod_len: 0x%lx\n",
985                   xen_start_info->nr_pages, xen_start_info->shared_info, 
986                   xen_start_info->flags, xen_start_info->pt_base, 
987                   xen_start_info->mod_start, xen_start_info->mod_len);
988         /* Map proc0's KSTACK */
989
990         proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
991         printk("proc0kstack=%u\n", proc0kstack);
992
993         /* vm86/bios stack */
994         cur_space += PAGE_SIZE;
995
996         /* Map space for the vm86 region */
997         vm86paddr = (vm_offset_t)cur_space;
998         cur_space += (PAGE_SIZE * 3);
999
1000 #ifdef PAE
1001         IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
1002         bzero(IdlePDPTnew, PAGE_SIZE);
1003
1004         IdlePDPTnewma =  xpmap_ptom(VTOP(IdlePDPTnew));
1005         IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
1006         bzero(IdlePTDnew, 4*PAGE_SIZE);
1007
1008         for (i = 0; i < 4; i++) 
1009                 IdlePTDnewma[i] =
1010                     xpmap_ptom(VTOP((uint8_t *)IdlePTDnew + i*PAGE_SIZE));
1011         /*
1012          * L3
1013          *
1014          * Copy the 4 machine addresses of the new PTDs in to the PDPT
1015          * 
1016          */
1017         for (i = 0; i < 4; i++)
1018                 IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
1019
1020         __asm__("nop;");
1021         /*
1022          *
1023          * re-map the new PDPT read-only
1024          */
1025         PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
1026         /*
1027          * 
1028          * Unpin the current PDPT
1029          */
1030         xen_pt_unpin(IdlePDPTma);
1031         
1032         for (i = 0; i < 20; i++) {
1033                 int startidx = ((KERNBASE >> 18) & PAGE_MASK) >> 3;
1034
1035                 if (IdlePTD[startidx + i] == 0) {
1036                         l1_pages = i;
1037                         break;
1038                 }       
1039         }
1040
1041 #endif  /* PAE */
1042         
1043         /* unmap remaining pages from initial 4MB chunk
1044          *
1045          */
1046         for (tmpva = cur_space; (tmpva & ((1<<22)-1)) != 0; tmpva += PAGE_SIZE) {
1047                 bzero((char *)tmpva, PAGE_SIZE);
1048                 PT_SET_MA(tmpva, (vm_paddr_t)0);
1049         }
1050         
1051         PT_UPDATES_FLUSH();
1052
1053         memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
1054             ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
1055             l1_pages*sizeof(pt_entry_t));
1056
1057         for (i = 0; i < 4; i++) {
1058                 PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
1059                     IdlePTDnewma[i] | PG_V);
1060         }
1061         xen_load_cr3(VTOP(IdlePDPTnew));
1062         xen_pgdpt_pin(xpmap_ptom(VTOP(IdlePDPTnew)));
1063
1064         /* allocate remainder of nkpt pages */
1065         cur_space_pt = cur_space;
1066         for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
1067              i++, cur_space += PAGE_SIZE) {
1068                 pdir = (offset + i) / NPDEPG;
1069                 curoffset = ((offset + i) % NPDEPG);
1070                 if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
1071                         break;
1072
1073                 /*
1074                  * make sure that all the initial page table pages
1075                  * have been zeroed
1076                  */
1077                 PT_SET_MA(cur_space_pt,
1078                     xpmap_ptom(VTOP(cur_space)) | PG_V | PG_RW);
1079                 bzero((char *)cur_space_pt, PAGE_SIZE);
1080                 PT_SET_MA(cur_space_pt, (vm_paddr_t)0);
1081                 xen_pt_pin(xpmap_ptom(VTOP(cur_space)));
1082                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1083                         curoffset*sizeof(vm_paddr_t)), 
1084                     xpmap_ptom(VTOP(cur_space)) | PG_KERNEL);
1085                 PT_UPDATES_FLUSH();
1086         }
1087         
1088         for (i = 0; i < 4; i++) {
1089                 pdir = (PTDPTDI + i) / NPDEPG;
1090                 curoffset = (PTDPTDI + i) % NPDEPG;
1091
1092                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1093                         curoffset*sizeof(vm_paddr_t)), 
1094                     IdlePTDnewma[i] | PG_V);
1095         }
1096
1097         PT_UPDATES_FLUSH();
1098         
1099         IdlePTD = IdlePTDnew;
1100         IdlePDPT = IdlePDPTnew;
1101         IdlePDPTma = IdlePDPTnewma;
1102         
1103         /*
1104          * shared_info is an unsigned long so this will randomly break if
1105          * it is allocated above 4GB - I guess people are used to that
1106          * sort of thing with Xen ... sigh
1107          */
1108         shinfo = xen_start_info->shared_info;
1109         PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
1110         
1111         printk("#4\n");
1112
1113         xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
1114         PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
1115         console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
1116         PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
1117
1118         printk("#5\n");
1119
1120         set_iopl.iopl = 1;
1121         PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
1122         printk("#6\n");
1123 #if 0
1124         /* add page table for KERNBASE */
1125         xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), 
1126                             xpmap_ptom(VTOP(cur_space) | PG_KERNEL));
1127         xen_flush_queue();
1128 #ifdef PAE      
1129         xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), 
1130                             xpmap_ptom(VTOP(cur_space) | PG_V | PG_A));
1131 #else
1132         xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), 
1133                             xpmap_ptom(VTOP(cur_space) | PG_V | PG_A));
1134 #endif  
1135         xen_flush_queue();
1136         cur_space += PAGE_SIZE;
1137         printk("#6\n");
1138 #endif /* 0 */  
1139 #ifdef notyet
1140         if (xen_start_info->flags & SIF_INITDOMAIN) {
1141                 /* Map first megabyte */
1142                 for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) 
1143                         PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
1144                 xen_flush_queue();
1145         }
1146 #endif
1147         /*
1148          * re-map kernel text read-only
1149          *
1150          */
1151         for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
1152              i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
1153                 PT_SET_MA(i, xpmap_ptom(VTOP(i)) | PG_V | PG_A);
1154         
1155         printk("#7\n");
1156         physfree = VTOP(cur_space);
1157         init_first = physfree >> PAGE_SHIFT;
1158         IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
1159         IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
1160         setup_xen_features();
1161         printk("#8, proc0kstack=%u\n", proc0kstack);
1162 }
1163
1164
1165 trap_info_t trap_table[] = {
1166         { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
1167         { 1,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
1168         { 3,   3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
1169         { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
1170         /* This is UPL on Linux and KPL on BSD */
1171         { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
1172         { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
1173         { 7,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
1174         /*
1175          * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
1176          *   no handler for double fault
1177          */
1178         { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
1179         {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
1180         {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
1181         {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
1182         {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
1183         {14,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
1184         {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
1185         {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
1186         {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
1187         {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
1188         {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
1189         {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
1190         {  0, 0,           0, 0 }
1191 };
1192
1193
1194 /********** CODE WORTH KEEPING ABOVE HERE *****************/ 
1195
1196 void xen_failsafe_handler(void);
1197
1198 void
1199 xen_failsafe_handler(void)
1200 {
1201
1202         panic("xen_failsafe_handler called!\n");
1203 }
1204
1205 void xen_handle_thread_switch(struct pcb *pcb);
1206
1207 /* This is called by cpu_switch() when switching threads. */
1208 /* The pcb arg refers to the process control block of the */
1209 /* next thread which is to run */
1210 void
1211 xen_handle_thread_switch(struct pcb *pcb)
1212 {
1213     uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
1214     uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
1215     multicall_entry_t mcl[3];
1216     int i = 0;
1217
1218     /* Notify Xen of task switch */
1219     mcl[i].op = __HYPERVISOR_stack_switch;
1220     mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
1221     mcl[i++].args[1] = (unsigned long)pcb;
1222
1223     /* Check for update of fsd */
1224     if (*a != *b || *(a+1) != *(b+1)) {
1225         mcl[i].op = __HYPERVISOR_update_descriptor;
1226         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1227         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1228     }    
1229
1230     a += 2;
1231     b += 2;
1232
1233     /* Check for update of gsd */
1234     if (*a != *b || *(a+1) != *(b+1)) {
1235         mcl[i].op = __HYPERVISOR_update_descriptor;
1236         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1237         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1238     }    
1239
1240     (void)HYPERVISOR_multicall(mcl, i);
1241 }