]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - sys/i386/xen/xen_machdep.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / sys / i386 / xen / xen_machdep.c
1 /*
2  *
3  * Copyright (c) 2004 Christian Limpach.
4  * Copyright (c) 2004-2006,2008 Kip Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by Christian Limpach.
18  * 4. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bus.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/mount.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/kernel.h>
45 #include <sys/reboot.h>
46 #include <sys/sysproto.h>
47
48 #include <machine/xen/xen-os.h>
49
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52 #include <machine/segments.h>
53 #include <machine/pcb.h>
54 #include <machine/stdarg.h>
55 #include <machine/vmparam.h>
56 #include <machine/cpu.h>
57 #include <machine/intr_machdep.h>
58 #include <machine/md_var.h>
59 #include <machine/asmacros.h>
60
61
62
63 #include <xen/hypervisor.h>
64 #include <machine/xen/xenvar.h>
65 #include <machine/xen/xenfunc.h>
66 #include <machine/xen/xenpmap.h>
67 #include <machine/xen/xenfunc.h>
68 #include <xen/interface/memory.h>
69 #include <machine/xen/features.h>
70 #ifdef SMP
71 #include <machine/privatespace.h>
72 #endif
73
74
75 #include <vm/vm_page.h>
76
77
78 #define IDTVEC(name)    __CONCAT(X,name)
79
80 extern inthand_t
81 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
82         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
83         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
84         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
85         IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
86
87
88 int xendebug_flags; 
89 start_info_t *xen_start_info;
90 shared_info_t *HYPERVISOR_shared_info;
91 xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
92 xen_pfn_t *xen_phys_machine;
93 xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
94 xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
95 int preemptable, init_first;
96 extern unsigned int avail_space;
97
98 void ni_cli(void);
99 void ni_sti(void);
100
101
102 void
103 ni_cli(void)
104 {
105         CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
106         __asm__("pushl %edx;"
107                 "pushl %eax;"
108                 );
109         __cli();
110         __asm__("popl %eax;"
111                 "popl %edx;"
112                 );
113 }
114
115
116 void
117 ni_sti(void)
118 {
119         __asm__("pushl %edx;"
120                 "pushl %esi;"
121                 "pushl %eax;"
122                 );
123         __sti();
124         __asm__("popl %eax;"
125                 "popl %esi;"
126                 "popl %edx;"
127                 );
128 }
129
130 /*
131  * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
132  * suitable for the static env vars.
133  */
134 char *
135 xen_setbootenv(char *cmd_line)
136 {
137         char *cmd_line_next;
138     
139         /* Skip leading spaces */
140         for (; *cmd_line == ' '; cmd_line++);
141
142         printk("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
143
144         for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
145         return cmd_line;
146 }
147
148 static struct 
149 {
150         const char      *ev;
151         int             mask;
152 } howto_names[] = {
153         {"boot_askname",        RB_ASKNAME},
154         {"boot_single", RB_SINGLE},
155         {"boot_nosync", RB_NOSYNC},
156         {"boot_halt",   RB_ASKNAME},
157         {"boot_serial", RB_SERIAL},
158         {"boot_cdrom",  RB_CDROM},
159         {"boot_gdb",    RB_GDB},
160         {"boot_gdb_pause",      RB_RESERVED1},
161         {"boot_verbose",        RB_VERBOSE},
162         {"boot_multicons",      RB_MULTIPLE},
163         {NULL,  0}
164 };
165
166 int 
167 xen_boothowto(char *envp)
168 {
169         int i, howto = 0;
170
171         /* get equivalents from the environment */
172         for (i = 0; howto_names[i].ev != NULL; i++)
173                 if (getenv(howto_names[i].ev) != NULL)
174                         howto |= howto_names[i].mask;
175         return howto;
176 }
177
178 #define PRINTK_BUFSIZE 1024
179 void
180 printk(const char *fmt, ...)
181 {
182         __va_list ap;
183         int retval;
184         static char buf[PRINTK_BUFSIZE];
185
186         va_start(ap, fmt);
187         retval = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
188         va_end(ap);
189         buf[retval] = 0;
190         (void)HYPERVISOR_console_write(buf, retval);
191 }
192
193
194 #define XPQUEUE_SIZE 128
195
196 struct mmu_log {
197         char *file;
198         int line;
199 };
200
201 #ifdef SMP
202 /* per-cpu queues and indices */
203 #ifdef INVARIANTS
204 static struct mmu_log xpq_queue_log[MAX_VIRT_CPUS][XPQUEUE_SIZE];
205 #endif
206
207 static int xpq_idx[MAX_VIRT_CPUS];  
208 static mmu_update_t xpq_queue[MAX_VIRT_CPUS][XPQUEUE_SIZE];
209
210 #define XPQ_QUEUE_LOG xpq_queue_log[vcpu]
211 #define XPQ_QUEUE xpq_queue[vcpu]
212 #define XPQ_IDX xpq_idx[vcpu]
213 #define SET_VCPU() int vcpu = smp_processor_id()
214 #else
215         
216 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
217 static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
218 static int xpq_idx = 0;
219
220 #define XPQ_QUEUE_LOG xpq_queue_log
221 #define XPQ_QUEUE xpq_queue
222 #define XPQ_IDX xpq_idx
223 #define SET_VCPU()
224 #endif /* !SMP */
225
226 #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
227
228 #if 0
229 static void
230 xen_dump_queue(void)
231 {
232         int _xpq_idx = XPQ_IDX;
233         int i;
234
235         if (_xpq_idx <= 1)
236                 return;
237
238         printk("xen_dump_queue(): %u entries\n", _xpq_idx);
239         for (i = 0; i < _xpq_idx; i++) {
240                 printk(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
241         }
242 }
243 #endif
244
245
246 static __inline void
247 _xen_flush_queue(void)
248 {
249         SET_VCPU();
250         int _xpq_idx = XPQ_IDX;
251         int error, i;
252         /* window of vulnerability here? */
253
254         if (__predict_true(gdtset))
255                 critical_enter();
256         XPQ_IDX = 0;
257         /* Make sure index is cleared first to avoid double updates. */
258         error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
259                                       _xpq_idx, NULL, DOMID_SELF);
260     
261 #if 0
262         if (__predict_true(gdtset))
263         for (i = _xpq_idx; i > 0;) {
264                 if (i >= 3) {
265                         CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
266                             "ptr: %lx val: %lx ptr: %lx",
267                             (XPQ_QUEUE[i-1].val & 0xffffffff),
268                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
269                             (XPQ_QUEUE[i-2].val & 0xffffffff),
270                             (XPQ_QUEUE[i-2].ptr & 0xffffffff),
271                             (XPQ_QUEUE[i-3].val & 0xffffffff),
272                             (XPQ_QUEUE[i-3].ptr & 0xffffffff));
273                             i -= 3;
274                 } else if (i == 2) {
275                         CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
276                             (XPQ_QUEUE[i-1].val & 0xffffffff),
277                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
278                             (XPQ_QUEUE[i-2].val & 0xffffffff),
279                             (XPQ_QUEUE[i-2].ptr & 0xffffffff));
280                         i = 0;
281                 } else {
282                         CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", 
283                             (XPQ_QUEUE[i-1].val & 0xffffffff),
284                             (XPQ_QUEUE[i-1].ptr & 0xffffffff));
285                         i = 0;
286                 }
287         }
288 #endif  
289         if (__predict_true(gdtset))
290                 critical_exit();
291         if (__predict_false(error < 0)) {
292                 for (i = 0; i < _xpq_idx; i++)
293                         printf("val: %llx ptr: %llx\n",
294                             XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
295                 panic("Failed to execute MMU updates: %d", error);
296         }
297
298 }
299
300 void
301 xen_flush_queue(void)
302 {
303         SET_VCPU();
304         if (XPQ_IDX != 0) _xen_flush_queue();
305 }
306
307 static __inline void
308 xen_increment_idx(void)
309 {
310         SET_VCPU();
311
312         XPQ_IDX++;
313         if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
314                 xen_flush_queue();
315 }
316
317 void
318 xen_check_queue(void)
319 {
320 #ifdef INVARIANTS
321         SET_VCPU();
322         
323         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
324 #endif
325 }
326
327 void
328 xen_invlpg(vm_offset_t va)
329 {
330         struct mmuext_op op;
331         op.cmd = MMUEXT_INVLPG_ALL;
332         op.arg1.linear_addr = va & ~PAGE_MASK;
333         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
334 }
335
336 void
337 xen_load_cr3(u_int val)
338 {
339         struct mmuext_op op;
340 #ifdef INVARIANTS
341         SET_VCPU();
342         
343         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
344 #endif
345         op.cmd = MMUEXT_NEW_BASEPTR;
346         op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
347         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
348 }
349
350 #ifdef KTR
351 static __inline u_int
352 rebp(void)
353 {
354         u_int   data;
355
356         __asm __volatile("movl 4(%%ebp),%0" : "=r" (data));     
357         return (data);
358 }
359 #endif
360
361 u_int
362 read_eflags(void)
363 {
364         vcpu_info_t *_vcpu;
365         u_int eflags;
366
367         eflags = _read_eflags();
368         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; 
369         if (_vcpu->evtchn_upcall_mask)
370                 eflags &= ~PSL_I;
371
372         return (eflags);
373 }
374
375 void
376 write_eflags(u_int eflags)
377 {
378         u_int intr;
379
380         CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
381         intr = ((eflags & PSL_I) == 0);
382         __restore_flags(intr);
383         _write_eflags(eflags);
384 }
385
386 void
387 xen_cli(void)
388 {
389         CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
390         __cli();
391 }
392
393 void
394 xen_sti(void)
395 {
396         CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
397         __sti();
398 }
399
400 u_int
401 xen_rcr2(void)
402 {
403
404         return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
405 }
406
407 void
408 _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
409 {
410         SET_VCPU();
411         
412         if (__predict_true(gdtset))
413                 critical_enter();
414         XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
415         XPQ_QUEUE[XPQ_IDX].val = pfn;
416 #ifdef INVARIANTS
417         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
418         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
419 #endif          
420         xen_increment_idx();
421         if (__predict_true(gdtset))
422                 critical_exit();
423 }
424
425 void
426 _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
427 {
428         SET_VCPU();
429
430         if (__predict_true(gdtset))     
431                 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
432
433         KASSERT((ptr & 7) == 0, ("misaligned update"));
434         
435         if (__predict_true(gdtset))
436                 critical_enter();
437         
438         XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
439         XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
440 #ifdef INVARIANTS
441         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
442         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
443 #endif  
444         xen_increment_idx();
445         if (__predict_true(gdtset))
446                 critical_exit();
447 }
448
449 void 
450 xen_pgdpt_pin(vm_paddr_t ma)
451 {
452         struct mmuext_op op;
453         op.cmd = MMUEXT_PIN_L3_TABLE;
454         op.arg1.mfn = ma >> PAGE_SHIFT;
455         xen_flush_queue();
456         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
457 }
458
459 void 
460 xen_pgd_pin(vm_paddr_t ma)
461 {
462         struct mmuext_op op;
463         op.cmd = MMUEXT_PIN_L2_TABLE;
464         op.arg1.mfn = ma >> PAGE_SHIFT;
465         xen_flush_queue();
466         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
467 }
468
469 void 
470 xen_pgd_unpin(vm_paddr_t ma)
471 {
472         struct mmuext_op op;
473         op.cmd = MMUEXT_UNPIN_TABLE;
474         op.arg1.mfn = ma >> PAGE_SHIFT;
475         xen_flush_queue();
476         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
477 }
478
479 void 
480 xen_pt_pin(vm_paddr_t ma)
481 {
482         struct mmuext_op op;
483         op.cmd = MMUEXT_PIN_L1_TABLE;
484         op.arg1.mfn = ma >> PAGE_SHIFT;
485         printk("xen_pt_pin(): mfn=%x\n", op.arg1.mfn);
486         xen_flush_queue();
487         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
488 }
489
490 void 
491 xen_pt_unpin(vm_paddr_t ma)
492 {
493         struct mmuext_op op;
494         op.cmd = MMUEXT_UNPIN_TABLE;
495         op.arg1.mfn = ma >> PAGE_SHIFT;
496         xen_flush_queue();
497         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
498 }
499
500 void 
501 xen_set_ldt(vm_paddr_t ptr, unsigned long len)
502 {
503         struct mmuext_op op;
504         op.cmd = MMUEXT_SET_LDT;
505         op.arg1.linear_addr = ptr;
506         op.arg2.nr_ents = len;
507         xen_flush_queue();
508         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
509 }
510
511 void xen_tlb_flush(void)
512 {
513         struct mmuext_op op;
514         op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
515         xen_flush_queue();
516         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
517 }
518
519 void
520 xen_update_descriptor(union descriptor *table, union descriptor *entry)
521 {
522         vm_paddr_t pa;
523         pt_entry_t *ptp;
524
525         ptp = vtopte((vm_offset_t)table);
526         pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
527         if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
528                 panic("HYPERVISOR_update_descriptor failed\n");
529 }
530
531
532 #if 0
533 /*
534  * Bitmap is indexed by page number. If bit is set, the page is part of a
535  * xen_create_contiguous_region() area of memory.
536  */
537 unsigned long *contiguous_bitmap;
538
539 static void 
540 contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
541 {
542         unsigned long start_off, end_off, curr_idx, end_idx;
543
544         curr_idx  = first_page / BITS_PER_LONG;
545         start_off = first_page & (BITS_PER_LONG-1);
546         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
547         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
548
549         if (curr_idx == end_idx) {
550                 contiguous_bitmap[curr_idx] |=
551                         ((1UL<<end_off)-1) & -(1UL<<start_off);
552         } else {
553                 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
554                 while ( ++curr_idx < end_idx )
555                         contiguous_bitmap[curr_idx] = ~0UL;
556                 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
557         }
558 }
559
560 static void 
561 contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
562 {
563         unsigned long start_off, end_off, curr_idx, end_idx;
564
565         curr_idx  = first_page / BITS_PER_LONG;
566         start_off = first_page & (BITS_PER_LONG-1);
567         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
568         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
569
570         if (curr_idx == end_idx) {
571                 contiguous_bitmap[curr_idx] &=
572                         -(1UL<<end_off) | ((1UL<<start_off)-1);
573         } else {
574                 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
575                 while ( ++curr_idx != end_idx )
576                         contiguous_bitmap[curr_idx] = 0;
577                 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
578         }
579 }
580 #endif
581
582 /* Ensure multi-page extents are contiguous in machine memory. */
583 int 
584 xen_create_contiguous_region(vm_page_t pages, int npages)
585 {
586         unsigned long  mfn, i, flags;
587         int order;
588         struct xen_memory_reservation reservation = {
589                 .nr_extents   = 1,
590                 .extent_order = 0,
591                 .domid        = DOMID_SELF
592         };
593         set_xen_guest_handle(reservation.extent_start, &mfn);
594         
595         balloon_lock(flags);
596
597         /* can currently only handle power of two allocation */
598         PANIC_IF(ffs(npages) != fls(npages));
599
600         /* 0. determine order */
601         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
602         
603         /* 1. give away machine pages. */
604         for (i = 0; i < (1 << order); i++) {
605                 int pfn;
606                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
607                 mfn = PFNTOMFN(pfn);
608                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
609                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
610         }
611
612
613         /* 2. Get a new contiguous memory extent. */
614         reservation.extent_order = order;
615         /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not 
616          * running with a broxen driver XXXEN
617          */
618         reservation.address_bits = 31; 
619         if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
620                 goto fail;
621
622         /* 3. Map the new extent in place of old pages. */
623         for (i = 0; i < (1 << order); i++) {
624                 int pfn;
625                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
626                 xen_machphys_update(mfn+i, pfn);
627                 PFNTOMFN(pfn) = mfn+i;
628         }
629
630         xen_tlb_flush();
631
632 #if 0
633         contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
634 #endif
635
636         balloon_unlock(flags);
637
638         return 0;
639
640  fail:
641         reservation.extent_order = 0;
642         reservation.address_bits = 0;
643
644         for (i = 0; i < (1 << order); i++) {
645                 int pfn;
646                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
647                 PANIC_IF(HYPERVISOR_memory_op(
648                         XENMEM_increase_reservation, &reservation) != 1);
649                 xen_machphys_update(mfn, pfn);
650                 PFNTOMFN(pfn) = mfn;
651         }
652
653         xen_tlb_flush();
654
655         balloon_unlock(flags);
656
657         return ENOMEM;
658 }
659
660 void 
661 xen_destroy_contiguous_region(void *addr, int npages)
662 {
663         unsigned long  mfn, i, flags, order, pfn0;
664         struct xen_memory_reservation reservation = {
665                 .nr_extents   = 1,
666                 .extent_order = 0,
667                 .domid        = DOMID_SELF
668         };
669         set_xen_guest_handle(reservation.extent_start, &mfn);
670         
671         pfn0 = vtophys(addr) >> PAGE_SHIFT;
672 #if 0
673         scrub_pages(vstart, 1 << order);
674 #endif
675         /* can currently only handle power of two allocation */
676         PANIC_IF(ffs(npages) != fls(npages));
677
678         /* 0. determine order */
679         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
680
681         balloon_lock(flags);
682
683 #if 0
684         contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
685 #endif
686
687         /* 1. Zap current PTEs, giving away the underlying pages. */
688         for (i = 0; i < (1 << order); i++) {
689                 int pfn;
690                 uint64_t new_val = 0;
691                 pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
692
693                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
694                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
695                 PANIC_IF(HYPERVISOR_memory_op(
696                         XENMEM_decrease_reservation, &reservation) != 1);
697         }
698
699         /* 2. Map new pages in place of old pages. */
700         for (i = 0; i < (1 << order); i++) {
701                 int pfn;
702                 uint64_t new_val;
703                 pfn = pfn0 + i;
704                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
705                 
706                 new_val = mfn << PAGE_SHIFT;
707                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), 
708                                                       new_val, PG_KERNEL));
709                 xen_machphys_update(mfn, pfn);
710                 PFNTOMFN(pfn) = mfn;
711         }
712
713         xen_tlb_flush();
714
715         balloon_unlock(flags);
716 }
717
718 extern unsigned long cpu0prvpage;
719 extern unsigned long *SMPpt;
720 extern  struct user     *proc0uarea;
721 extern  vm_offset_t     proc0kstack;
722 extern int vm86paddr, vm86phystk;
723 char *bootmem_start, *bootmem_current, *bootmem_end;
724
725 pteinfo_t *pteinfo_list;
726 void initvalues(start_info_t *startinfo);
727
728 struct ringbuf_head *xen_store; /* XXX move me */
729 char *console_page;
730
731 void *
732 bootmem_alloc(unsigned int size) 
733 {
734         char *retptr;
735         
736         retptr = bootmem_current;
737         PANIC_IF(retptr + size > bootmem_end);
738         bootmem_current += size;
739
740         return retptr;
741 }
742
743 void 
744 bootmem_free(void *ptr, unsigned int size) 
745 {
746         char *tptr;
747         
748         tptr = ptr;
749         PANIC_IF(tptr != bootmem_current - size ||
750                 bootmem_current - size < bootmem_start);        
751
752         bootmem_current -= size;
753 }
754
755 #if 0
756 static vm_paddr_t
757 xpmap_mtop2(vm_paddr_t mpa)
758 {
759         return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
760             ) | (mpa & ~PG_FRAME);
761 }
762
763 static pd_entry_t 
764 xpmap_get_bootpde(vm_paddr_t va)
765 {
766
767         return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
768 }
769
770 static pd_entry_t
771 xpmap_get_vbootpde(vm_paddr_t va)
772 {
773         pd_entry_t pde;
774
775         pde = xpmap_get_bootpde(va);
776         if ((pde & PG_V) == 0)
777                 return (pde & ~PG_FRAME);
778         return (pde & ~PG_FRAME) |
779                 (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
780 }
781
782 static pt_entry_t 8*
783 xpmap_get_bootptep(vm_paddr_t va)
784 {
785         pd_entry_t pde;
786
787         pde = xpmap_get_vbootpde(va);
788         if ((pde & PG_V) == 0)
789                 return (void *)-1;
790 #define PT_MASK         0x003ff000      /* page table address bits */
791         return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
792 }
793
794 static pt_entry_t
795 xpmap_get_bootpte(vm_paddr_t va)
796 {
797
798         return xpmap_get_bootptep(va)[0];
799 }
800 #endif
801
802
803 #ifdef ADD_ISA_HOLE
804 static void
805 shift_phys_machine(unsigned long *phys_machine, int nr_pages)
806 {
807
808         unsigned long *tmp_page, *current_page, *next_page;
809         int i;
810
811         tmp_page = bootmem_alloc(PAGE_SIZE);
812         current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));  
813         next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));  
814         bcopy(phys_machine, tmp_page, PAGE_SIZE);
815
816         while (current_page > phys_machine) { 
817                 /*  save next page */
818                 bcopy(next_page, tmp_page, PAGE_SIZE);
819                 /* shift down page */
820                 bcopy(current_page, next_page, PAGE_SIZE);
821                 /*  finish swap */
822                 bcopy(tmp_page, current_page, PAGE_SIZE);
823           
824                 current_page -= (PAGE_SIZE/sizeof(unsigned long));
825                 next_page -= (PAGE_SIZE/sizeof(unsigned long));
826         }
827         bootmem_free(tmp_page, PAGE_SIZE);      
828         
829         for (i = 0; i < nr_pages; i++) {
830                 xen_machphys_update(phys_machine[i], i);
831         }
832         memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
833
834 }
835 #endif /* ADD_ISA_HOLE */
836
837 /*
838  * Build a directory of the pages that make up our Physical to Machine
839  * mapping table. The Xen suspend/restore code uses this to find our
840  * mapping table.
841  */
842 static void
843 init_frame_list_list(void *arg)
844 {
845         unsigned long nr_pages = xen_start_info->nr_pages;
846 #define FPP     (PAGE_SIZE/sizeof(xen_pfn_t))
847         int i, j, k;
848
849         xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
850         for (i = 0, j = 0, k = -1; i < nr_pages;
851              i += FPP, j++) {
852                 if ((j & (FPP - 1)) == 0) {
853                         k++;
854                         xen_pfn_to_mfn_frame_list[k] =
855                                 malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
856                         xen_pfn_to_mfn_frame_list_list[k] =
857                                 VTOMFN(xen_pfn_to_mfn_frame_list[k]);
858                         j = 0;
859                 }
860                 xen_pfn_to_mfn_frame_list[k][j] = 
861                         VTOMFN(&xen_phys_machine[i]);
862         }
863
864         HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
865         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
866                 = VTOMFN(xen_pfn_to_mfn_frame_list_list);
867 }       
868 SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
869
870 extern unsigned long physfree;
871
872 int pdir, curoffset;
873 extern int nkpt;
874
875 void
876 initvalues(start_info_t *startinfo)
877
878         int l3_pages, l2_pages, l1_pages, offset;
879         vm_offset_t cur_space, cur_space_pt;
880         struct physdev_set_iopl set_iopl;
881         
882         vm_paddr_t KPTphys, IdlePTDma;
883         vm_paddr_t console_page_ma, xen_store_ma;
884         vm_offset_t KPTphysoff, tmpva;
885         vm_paddr_t shinfo;
886 #ifdef PAE
887         vm_paddr_t IdlePDPTma, IdlePDPTnewma;
888         vm_paddr_t IdlePTDnewma[4];
889         pd_entry_t *IdlePDPTnew, *IdlePTDnew;
890 #else
891         vm_paddr_t pdir_shadow_ma;
892 #endif
893         unsigned long i;
894         int ncpus = MAXCPU;
895
896         nkpt = min(
897                 min(
898                         max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
899                     NPGPTD*NPDEPG - KPTDI),
900                     (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
901
902         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);      
903 #ifdef notyet
904         /*
905          * need to install handler
906          */
907         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);       
908 #endif  
909         xen_start_info = startinfo;
910         xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
911
912         IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
913         l1_pages = 0;
914         
915 #ifdef PAE
916         l3_pages = 1;
917         l2_pages = 0;
918         IdlePDPT = (pd_entry_t *)startinfo->pt_base;
919         IdlePDPTma = xpmap_ptom(VTOP(startinfo->pt_base));
920         for (i = (KERNBASE >> 30);
921              (i < 4) && (IdlePDPT[i] != 0); i++)
922                         l2_pages++;
923         /*
924          * Note that only one page directory has been allocated at this point.
925          * Thus, if KERNBASE
926          */
927 #if 0
928         for (i = 0; i < l2_pages; i++)
929                 IdlePTDma[i] = xpmap_ptom(VTOP(IdlePTD + i*PAGE_SIZE));
930 #endif
931         
932         l2_pages = (l2_pages == 0) ? 1 : l2_pages;
933 #else   
934         l3_pages = 0;
935         l2_pages = 1;
936 #endif
937         for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
938              (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
939                 
940                 if (IdlePTD[i] == 0)
941                         break;
942                 l1_pages++;
943         }
944         
945         /* number of pages allocated after the pts + 1*/;
946         cur_space = xen_start_info->pt_base +
947             ((xen_start_info->nr_pt_frames) + 3 )*PAGE_SIZE;
948         printk("initvalues(): wooh - availmem=%x,%x\n", avail_space, cur_space);
949
950         printk("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n",
951             KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
952             xen_start_info->nr_pt_frames);
953         xendebug_flags = 0; /* 0xffffffff; */
954
955         /* allocate 4 pages for bootmem allocator */
956         bootmem_start = bootmem_current = (char *)cur_space;
957         cur_space += (4 * PAGE_SIZE);
958         bootmem_end = (char *)cur_space;
959
960         /* allocate page for gdt */
961         gdt = (union descriptor *)cur_space;
962         cur_space += PAGE_SIZE*ncpus;
963
964         /* allocate page for ldt */
965         ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
966         cur_space += PAGE_SIZE;
967         
968         HYPERVISOR_shared_info = (shared_info_t *)cur_space;
969         cur_space += PAGE_SIZE;
970
971         xen_store = (struct ringbuf_head *)cur_space;
972         cur_space += PAGE_SIZE;
973
974         console_page = (char *)cur_space;
975         cur_space += PAGE_SIZE;
976
977 #ifdef ADD_ISA_HOLE
978         shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
979 #endif
980         /* 
981          * pre-zero unused mapped pages - mapped on 4MB boundary
982          */
983 #ifdef PAE
984         IdlePDPT = (pd_entry_t *)startinfo->pt_base;
985         IdlePDPTma = xpmap_ptom(VTOP(startinfo->pt_base));
986         /*
987          * Note that only one page directory has been allocated at this point.
988          * Thus, if KERNBASE
989          */
990         IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
991         IdlePTDma = xpmap_ptom(VTOP(IdlePTD));
992         l3_pages = 1;
993 #else   
994         IdlePTD = (pd_entry_t *)startinfo->pt_base;
995         IdlePTDma = xpmap_ptom(VTOP(startinfo->pt_base));
996         l3_pages = 0;
997 #endif
998         l2_pages = 1;
999         l1_pages = xen_start_info->nr_pt_frames - l2_pages - l3_pages;
1000
1001         KPTphysoff = (l2_pages + l3_pages)*PAGE_SIZE;
1002
1003         KPTphys = xpmap_ptom(VTOP(startinfo->pt_base + KPTphysoff));
1004         XENPRINTF("IdlePTD %p\n", IdlePTD);
1005         XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
1006                   "mod_start: 0x%lx mod_len: 0x%lx\n",
1007                   xen_start_info->nr_pages, xen_start_info->shared_info, 
1008                   xen_start_info->flags, xen_start_info->pt_base, 
1009                   xen_start_info->mod_start, xen_start_info->mod_len);
1010         /* Map proc0's KSTACK */
1011
1012         proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
1013         printk("proc0kstack=%u\n", proc0kstack);
1014
1015         /* vm86/bios stack */
1016         cur_space += PAGE_SIZE;
1017
1018         /* Map space for the vm86 region */
1019         vm86paddr = (vm_offset_t)cur_space;
1020         cur_space += (PAGE_SIZE * 3);
1021
1022 #ifdef PAE
1023         IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
1024         bzero(IdlePDPTnew, PAGE_SIZE);
1025
1026         IdlePDPTnewma =  xpmap_ptom(VTOP(IdlePDPTnew));
1027         IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
1028         bzero(IdlePTDnew, 4*PAGE_SIZE);
1029
1030         for (i = 0; i < 4; i++) 
1031                 IdlePTDnewma[i] =
1032                     xpmap_ptom(VTOP((uint8_t *)IdlePTDnew + i*PAGE_SIZE));
1033         /*
1034          * L3
1035          *
1036          * Copy the 4 machine addresses of the new PTDs in to the PDPT
1037          * 
1038          */
1039         for (i = 0; i < 4; i++)
1040                 IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
1041
1042         __asm__("nop;");
1043         /*
1044          *
1045          * re-map the new PDPT read-only
1046          */
1047         PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
1048         /*
1049          * 
1050          * Unpin the current PDPT
1051          */
1052         xen_pt_unpin(IdlePDPTma);
1053         
1054         for (i = 0; i < 20; i++) {
1055                 int startidx = ((KERNBASE >> 18) & PAGE_MASK) >> 3;
1056
1057                 if (IdlePTD[startidx + i] == 0) {
1058                         l1_pages = i;
1059                         break;
1060                 }       
1061         }
1062
1063 #endif  /* PAE */
1064         
1065         /* unmap remaining pages from initial 4MB chunk
1066          *
1067          */
1068         for (tmpva = cur_space; (tmpva & ((1<<22)-1)) != 0; tmpva += PAGE_SIZE) {
1069                 bzero((char *)tmpva, PAGE_SIZE);
1070                 PT_SET_MA(tmpva, (vm_paddr_t)0);
1071         }
1072         
1073         PT_UPDATES_FLUSH();
1074
1075         memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
1076             ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
1077             l1_pages*sizeof(pt_entry_t));
1078
1079         for (i = 0; i < 4; i++) {
1080                 PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
1081                     IdlePTDnewma[i] | PG_V);
1082         }
1083         xen_load_cr3(VTOP(IdlePDPTnew));
1084         xen_pgdpt_pin(xpmap_ptom(VTOP(IdlePDPTnew)));
1085
1086         /* allocate remainder of nkpt pages */
1087         cur_space_pt = cur_space;
1088         for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
1089              i++, cur_space += PAGE_SIZE) {
1090                 pdir = (offset + i) / NPDEPG;
1091                 curoffset = ((offset + i) % NPDEPG);
1092                 if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
1093                         break;
1094
1095                 /*
1096                  * make sure that all the initial page table pages
1097                  * have been zeroed
1098                  */
1099                 PT_SET_MA(cur_space_pt,
1100                     xpmap_ptom(VTOP(cur_space)) | PG_V | PG_RW);
1101                 bzero((char *)cur_space_pt, PAGE_SIZE);
1102                 PT_SET_MA(cur_space_pt, (vm_paddr_t)0);
1103                 xen_pt_pin(xpmap_ptom(VTOP(cur_space)));
1104                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1105                         curoffset*sizeof(vm_paddr_t)), 
1106                     xpmap_ptom(VTOP(cur_space)) | PG_KERNEL);
1107                 PT_UPDATES_FLUSH();
1108         }
1109         
1110         for (i = 0; i < 4; i++) {
1111                 pdir = (PTDPTDI + i) / NPDEPG;
1112                 curoffset = (PTDPTDI + i) % NPDEPG;
1113
1114                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1115                         curoffset*sizeof(vm_paddr_t)), 
1116                     IdlePTDnewma[i] | PG_V);
1117         }
1118
1119         PT_UPDATES_FLUSH();
1120         
1121         IdlePTD = IdlePTDnew;
1122         IdlePDPT = IdlePDPTnew;
1123         IdlePDPTma = IdlePDPTnewma;
1124         
1125         /*
1126          * shared_info is an unsigned long so this will randomly break if
1127          * it is allocated above 4GB - I guess people are used to that
1128          * sort of thing with Xen ... sigh
1129          */
1130         shinfo = xen_start_info->shared_info;
1131         PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
1132         
1133         printk("#4\n");
1134
1135         xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
1136         PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
1137         console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
1138         PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
1139
1140         printk("#5\n");
1141
1142         set_iopl.iopl = 1;
1143         PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
1144         printk("#6\n");
1145 #if 0
1146         /* add page table for KERNBASE */
1147         xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), 
1148                             xpmap_ptom(VTOP(cur_space) | PG_KERNEL));
1149         xen_flush_queue();
1150 #ifdef PAE      
1151         xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), 
1152                             xpmap_ptom(VTOP(cur_space) | PG_V | PG_A));
1153 #else
1154         xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), 
1155                             xpmap_ptom(VTOP(cur_space) | PG_V | PG_A));
1156 #endif  
1157         xen_flush_queue();
1158         cur_space += PAGE_SIZE;
1159         printk("#6\n");
1160 #endif /* 0 */  
1161 #ifdef notyet
1162         if (xen_start_info->flags & SIF_INITDOMAIN) {
1163                 /* Map first megabyte */
1164                 for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) 
1165                         PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
1166                 xen_flush_queue();
1167         }
1168 #endif
1169         /*
1170          * re-map kernel text read-only
1171          *
1172          */
1173         for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
1174              i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
1175                 PT_SET_MA(i, xpmap_ptom(VTOP(i)) | PG_V | PG_A);
1176         
1177         printk("#7\n");
1178         physfree = VTOP(cur_space);
1179         init_first = physfree >> PAGE_SHIFT;
1180         IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
1181         IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
1182         setup_xen_features();
1183         printk("#8, proc0kstack=%u\n", proc0kstack);
1184 }
1185
1186
1187 trap_info_t trap_table[] = {
1188         { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
1189         { 1,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
1190         { 3,   3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
1191         { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
1192         /* This is UPL on Linux and KPL on BSD */
1193         { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
1194         { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
1195         { 7,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
1196         /*
1197          * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
1198          *   no handler for double fault
1199          */
1200         { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
1201         {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
1202         {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
1203         {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
1204         {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
1205         {14,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
1206         {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
1207         {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
1208         {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
1209         {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
1210         {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
1211         {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
1212         {  0, 0,           0, 0 }
1213 };
1214
1215
1216 /********** CODE WORTH KEEPING ABOVE HERE *****************/ 
1217
1218 void xen_failsafe_handler(void);
1219
1220 void
1221 xen_failsafe_handler(void)
1222 {
1223
1224         panic("xen_failsafe_handler called!\n");
1225 }
1226
1227 void xen_handle_thread_switch(struct pcb *pcb);
1228
1229 /* This is called by cpu_switch() when switching threads. */
1230 /* The pcb arg refers to the process control block of the */
1231 /* next thread which is to run */
1232 void
1233 xen_handle_thread_switch(struct pcb *pcb)
1234 {
1235     uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
1236     uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
1237     multicall_entry_t mcl[3];
1238     int i = 0;
1239
1240     /* Notify Xen of task switch */
1241     mcl[i].op = __HYPERVISOR_stack_switch;
1242     mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
1243     mcl[i++].args[1] = (unsigned long)pcb;
1244
1245     /* Check for update of fsd */
1246     if (*a != *b || *(a+1) != *(b+1)) {
1247         mcl[i].op = __HYPERVISOR_update_descriptor;
1248         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1249         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1250     }    
1251
1252     a += 2;
1253     b += 2;
1254
1255     /* Check for update of gsd */
1256     if (*a != *b || *(a+1) != *(b+1)) {
1257         mcl[i].op = __HYPERVISOR_update_descriptor;
1258         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1259         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1260     }    
1261
1262     (void)HYPERVISOR_multicall(mcl, i);
1263 }