]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/i386/xen/xen_machdep.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / sys / i386 / xen / xen_machdep.c
1 /*
2  *
3  * Copyright (c) 2004 Christian Limpach.
4  * Copyright (c) 2004-2006,2008 Kip Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by Christian Limpach.
18  * 4. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bus.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/mount.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/kernel.h>
45 #include <sys/proc.h>
46 #include <sys/reboot.h>
47 #include <sys/sysproto.h>
48
49 #include <machine/xen/xen-os.h>
50
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53 #include <machine/segments.h>
54 #include <machine/pcb.h>
55 #include <machine/stdarg.h>
56 #include <machine/vmparam.h>
57 #include <machine/cpu.h>
58 #include <machine/intr_machdep.h>
59 #include <machine/md_var.h>
60 #include <machine/asmacros.h>
61
62
63
64 #include <xen/hypervisor.h>
65 #include <machine/xen/xenvar.h>
66 #include <machine/xen/xenfunc.h>
67 #include <machine/xen/xenpmap.h>
68 #include <machine/xen/xenfunc.h>
69 #include <xen/interface/memory.h>
70 #include <machine/xen/features.h>
71 #ifdef SMP
72 #include <machine/privatespace.h>
73 #endif
74
75
76 #include <vm/vm_page.h>
77
78
79 #define IDTVEC(name)    __CONCAT(X,name)
80
81 extern inthand_t
82 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
83         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
84         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
85         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
86         IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
87
88
89 int xendebug_flags; 
90 start_info_t *xen_start_info;
91 shared_info_t *HYPERVISOR_shared_info;
92 xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
93 xen_pfn_t *xen_phys_machine;
94 xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
95 xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
96 int preemptable, init_first;
97 extern unsigned int avail_space;
98
99 void ni_cli(void);
100 void ni_sti(void);
101
102
103 void
104 ni_cli(void)
105 {
106         CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
107         __asm__("pushl %edx;"
108                 "pushl %eax;"
109                 );
110         __cli();
111         __asm__("popl %eax;"
112                 "popl %edx;"
113                 );
114 }
115
116
117 void
118 ni_sti(void)
119 {
120         __asm__("pushl %edx;"
121                 "pushl %esi;"
122                 "pushl %eax;"
123                 );
124         __sti();
125         __asm__("popl %eax;"
126                 "popl %esi;"
127                 "popl %edx;"
128                 );
129 }
130
131 /*
132  * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
133  * suitable for the static env vars.
134  */
135 char *
136 xen_setbootenv(char *cmd_line)
137 {
138         char *cmd_line_next;
139     
140         /* Skip leading spaces */
141         for (; *cmd_line == ' '; cmd_line++);
142
143         printk("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
144
145         for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
146         return cmd_line;
147 }
148
149 static struct 
150 {
151         const char      *ev;
152         int             mask;
153 } howto_names[] = {
154         {"boot_askname",        RB_ASKNAME},
155         {"boot_single", RB_SINGLE},
156         {"boot_nosync", RB_NOSYNC},
157         {"boot_halt",   RB_ASKNAME},
158         {"boot_serial", RB_SERIAL},
159         {"boot_cdrom",  RB_CDROM},
160         {"boot_gdb",    RB_GDB},
161         {"boot_gdb_pause",      RB_RESERVED1},
162         {"boot_verbose",        RB_VERBOSE},
163         {"boot_multicons",      RB_MULTIPLE},
164         {NULL,  0}
165 };
166
167 int 
168 xen_boothowto(char *envp)
169 {
170         int i, howto = 0;
171
172         /* get equivalents from the environment */
173         for (i = 0; howto_names[i].ev != NULL; i++)
174                 if (getenv(howto_names[i].ev) != NULL)
175                         howto |= howto_names[i].mask;
176         return howto;
177 }
178
179 #define PRINTK_BUFSIZE 1024
180 void
181 printk(const char *fmt, ...)
182 {
183         __va_list ap;
184         int retval;
185         static char buf[PRINTK_BUFSIZE];
186
187         va_start(ap, fmt);
188         retval = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
189         va_end(ap);
190         buf[retval] = 0;
191         (void)HYPERVISOR_console_write(buf, retval);
192 }
193
194
195 #define XPQUEUE_SIZE 128
196
197 struct mmu_log {
198         char *file;
199         int line;
200 };
201
202 #ifdef SMP
203 /* per-cpu queues and indices */
204 #ifdef INVARIANTS
205 static struct mmu_log xpq_queue_log[MAX_VIRT_CPUS][XPQUEUE_SIZE];
206 #endif
207
208 static int xpq_idx[MAX_VIRT_CPUS];  
209 static mmu_update_t xpq_queue[MAX_VIRT_CPUS][XPQUEUE_SIZE];
210
211 #define XPQ_QUEUE_LOG xpq_queue_log[vcpu]
212 #define XPQ_QUEUE xpq_queue[vcpu]
213 #define XPQ_IDX xpq_idx[vcpu]
214 #define SET_VCPU() int vcpu = smp_processor_id()
215 #else
216         
217 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
218 #ifdef INVARIANTS
219 static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
220 #endif
221 static int xpq_idx = 0;
222
223 #define XPQ_QUEUE_LOG xpq_queue_log
224 #define XPQ_QUEUE xpq_queue
225 #define XPQ_IDX xpq_idx
226 #define SET_VCPU()
227 #endif /* !SMP */
228
229 #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
230
231 #if 0
232 static void
233 xen_dump_queue(void)
234 {
235         int _xpq_idx = XPQ_IDX;
236         int i;
237
238         if (_xpq_idx <= 1)
239                 return;
240
241         printk("xen_dump_queue(): %u entries\n", _xpq_idx);
242         for (i = 0; i < _xpq_idx; i++) {
243                 printk(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
244         }
245 }
246 #endif
247
248
249 static __inline void
250 _xen_flush_queue(void)
251 {
252         SET_VCPU();
253         int _xpq_idx = XPQ_IDX;
254         int error, i;
255
256 #ifdef INVARIANTS
257         if (__predict_true(gdtset))
258                 CRITICAL_ASSERT(curthread);
259 #endif
260
261         XPQ_IDX = 0;
262         /* Make sure index is cleared first to avoid double updates. */
263         error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
264                                       _xpq_idx, NULL, DOMID_SELF);
265     
266 #if 0
267         if (__predict_true(gdtset))
268         for (i = _xpq_idx; i > 0;) {
269                 if (i >= 3) {
270                         CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
271                             "ptr: %lx val: %lx ptr: %lx",
272                             (XPQ_QUEUE[i-1].val & 0xffffffff),
273                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
274                             (XPQ_QUEUE[i-2].val & 0xffffffff),
275                             (XPQ_QUEUE[i-2].ptr & 0xffffffff),
276                             (XPQ_QUEUE[i-3].val & 0xffffffff),
277                             (XPQ_QUEUE[i-3].ptr & 0xffffffff));
278                             i -= 3;
279                 } else if (i == 2) {
280                         CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
281                             (XPQ_QUEUE[i-1].val & 0xffffffff),
282                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
283                             (XPQ_QUEUE[i-2].val & 0xffffffff),
284                             (XPQ_QUEUE[i-2].ptr & 0xffffffff));
285                         i = 0;
286                 } else {
287                         CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", 
288                             (XPQ_QUEUE[i-1].val & 0xffffffff),
289                             (XPQ_QUEUE[i-1].ptr & 0xffffffff));
290                         i = 0;
291                 }
292         }
293 #endif  
294         if (__predict_false(error < 0)) {
295                 for (i = 0; i < _xpq_idx; i++)
296                         printf("val: %llx ptr: %llx\n",
297                             XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
298                 panic("Failed to execute MMU updates: %d", error);
299         }
300
301 }
302
303 void
304 xen_flush_queue(void)
305 {
306         SET_VCPU();
307
308         if (__predict_true(gdtset))
309                 critical_enter();
310         if (XPQ_IDX != 0) _xen_flush_queue();
311         if (__predict_true(gdtset))
312                 critical_exit();
313 }
314
315 static __inline void
316 xen_increment_idx(void)
317 {
318         SET_VCPU();
319
320         XPQ_IDX++;
321         if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
322                 xen_flush_queue();
323 }
324
325 void
326 xen_check_queue(void)
327 {
328 #ifdef INVARIANTS
329         SET_VCPU();
330         
331         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
332 #endif
333 }
334
335 void
336 xen_invlpg(vm_offset_t va)
337 {
338         struct mmuext_op op;
339         op.cmd = MMUEXT_INVLPG_ALL;
340         op.arg1.linear_addr = va & ~PAGE_MASK;
341         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
342 }
343
344 void
345 xen_load_cr3(u_int val)
346 {
347         struct mmuext_op op;
348 #ifdef INVARIANTS
349         SET_VCPU();
350         
351         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
352 #endif
353         op.cmd = MMUEXT_NEW_BASEPTR;
354         op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
355         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
356 }
357
358 #ifdef KTR
359 static __inline u_int
360 rebp(void)
361 {
362         u_int   data;
363
364         __asm __volatile("movl 4(%%ebp),%0" : "=r" (data));     
365         return (data);
366 }
367 #endif
368
369 u_int
370 read_eflags(void)
371 {
372         vcpu_info_t *_vcpu;
373         u_int eflags;
374
375         eflags = _read_eflags();
376         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; 
377         if (_vcpu->evtchn_upcall_mask)
378                 eflags &= ~PSL_I;
379
380         return (eflags);
381 }
382
383 void
384 write_eflags(u_int eflags)
385 {
386         u_int intr;
387
388         CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
389         intr = ((eflags & PSL_I) == 0);
390         __restore_flags(intr);
391         _write_eflags(eflags);
392 }
393
394 void
395 xen_cli(void)
396 {
397         CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
398         __cli();
399 }
400
401 void
402 xen_sti(void)
403 {
404         CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
405         __sti();
406 }
407
408 u_int
409 xen_rcr2(void)
410 {
411
412         return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
413 }
414
415 void
416 _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
417 {
418         SET_VCPU();
419         
420         if (__predict_true(gdtset))
421                 critical_enter();
422         XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
423         XPQ_QUEUE[XPQ_IDX].val = pfn;
424 #ifdef INVARIANTS
425         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
426         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
427 #endif          
428         xen_increment_idx();
429         if (__predict_true(gdtset))
430                 critical_exit();
431 }
432
433 void
434 _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
435 {
436         SET_VCPU();
437
438         if (__predict_true(gdtset))     
439                 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
440
441         KASSERT((ptr & 7) == 0, ("misaligned update"));
442         
443         if (__predict_true(gdtset))
444                 critical_enter();
445         
446         XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
447         XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
448 #ifdef INVARIANTS
449         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
450         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
451 #endif  
452         xen_increment_idx();
453         if (__predict_true(gdtset))
454                 critical_exit();
455 }
456
457 void 
458 xen_pgdpt_pin(vm_paddr_t ma)
459 {
460         struct mmuext_op op;
461         op.cmd = MMUEXT_PIN_L3_TABLE;
462         op.arg1.mfn = ma >> PAGE_SHIFT;
463         xen_flush_queue();
464         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
465 }
466
467 void 
468 xen_pgd_pin(vm_paddr_t ma)
469 {
470         struct mmuext_op op;
471         op.cmd = MMUEXT_PIN_L2_TABLE;
472         op.arg1.mfn = ma >> PAGE_SHIFT;
473         xen_flush_queue();
474         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
475 }
476
477 void 
478 xen_pgd_unpin(vm_paddr_t ma)
479 {
480         struct mmuext_op op;
481         op.cmd = MMUEXT_UNPIN_TABLE;
482         op.arg1.mfn = ma >> PAGE_SHIFT;
483         xen_flush_queue();
484         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
485 }
486
487 void 
488 xen_pt_pin(vm_paddr_t ma)
489 {
490         struct mmuext_op op;
491         op.cmd = MMUEXT_PIN_L1_TABLE;
492         op.arg1.mfn = ma >> PAGE_SHIFT;
493         xen_flush_queue();
494         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
495 }
496
497 void 
498 xen_pt_unpin(vm_paddr_t ma)
499 {
500         struct mmuext_op op;
501         op.cmd = MMUEXT_UNPIN_TABLE;
502         op.arg1.mfn = ma >> PAGE_SHIFT;
503         xen_flush_queue();
504         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
505 }
506
507 void 
508 xen_set_ldt(vm_paddr_t ptr, unsigned long len)
509 {
510         struct mmuext_op op;
511         op.cmd = MMUEXT_SET_LDT;
512         op.arg1.linear_addr = ptr;
513         op.arg2.nr_ents = len;
514         xen_flush_queue();
515         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
516 }
517
518 void xen_tlb_flush(void)
519 {
520         struct mmuext_op op;
521         op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
522         xen_flush_queue();
523         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
524 }
525
526 void
527 xen_update_descriptor(union descriptor *table, union descriptor *entry)
528 {
529         vm_paddr_t pa;
530         pt_entry_t *ptp;
531
532         ptp = vtopte((vm_offset_t)table);
533         pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
534         if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
535                 panic("HYPERVISOR_update_descriptor failed\n");
536 }
537
538
539 #if 0
540 /*
541  * Bitmap is indexed by page number. If bit is set, the page is part of a
542  * xen_create_contiguous_region() area of memory.
543  */
544 unsigned long *contiguous_bitmap;
545
546 static void 
547 contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
548 {
549         unsigned long start_off, end_off, curr_idx, end_idx;
550
551         curr_idx  = first_page / BITS_PER_LONG;
552         start_off = first_page & (BITS_PER_LONG-1);
553         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
554         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
555
556         if (curr_idx == end_idx) {
557                 contiguous_bitmap[curr_idx] |=
558                         ((1UL<<end_off)-1) & -(1UL<<start_off);
559         } else {
560                 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
561                 while ( ++curr_idx < end_idx )
562                         contiguous_bitmap[curr_idx] = ~0UL;
563                 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
564         }
565 }
566
567 static void 
568 contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
569 {
570         unsigned long start_off, end_off, curr_idx, end_idx;
571
572         curr_idx  = first_page / BITS_PER_LONG;
573         start_off = first_page & (BITS_PER_LONG-1);
574         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
575         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
576
577         if (curr_idx == end_idx) {
578                 contiguous_bitmap[curr_idx] &=
579                         -(1UL<<end_off) | ((1UL<<start_off)-1);
580         } else {
581                 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
582                 while ( ++curr_idx != end_idx )
583                         contiguous_bitmap[curr_idx] = 0;
584                 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
585         }
586 }
587 #endif
588
589 /* Ensure multi-page extents are contiguous in machine memory. */
590 int 
591 xen_create_contiguous_region(vm_page_t pages, int npages)
592 {
593         unsigned long  mfn, i, flags;
594         int order;
595         struct xen_memory_reservation reservation = {
596                 .nr_extents   = 1,
597                 .extent_order = 0,
598                 .domid        = DOMID_SELF
599         };
600         set_xen_guest_handle(reservation.extent_start, &mfn);
601         
602         balloon_lock(flags);
603
604         /* can currently only handle power of two allocation */
605         PANIC_IF(ffs(npages) != fls(npages));
606
607         /* 0. determine order */
608         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
609         
610         /* 1. give away machine pages. */
611         for (i = 0; i < (1 << order); i++) {
612                 int pfn;
613                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
614                 mfn = PFNTOMFN(pfn);
615                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
616                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
617         }
618
619
620         /* 2. Get a new contiguous memory extent. */
621         reservation.extent_order = order;
622         /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not 
623          * running with a broxen driver XXXEN
624          */
625         reservation.address_bits = 31; 
626         if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
627                 goto fail;
628
629         /* 3. Map the new extent in place of old pages. */
630         for (i = 0; i < (1 << order); i++) {
631                 int pfn;
632                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
633                 xen_machphys_update(mfn+i, pfn);
634                 PFNTOMFN(pfn) = mfn+i;
635         }
636
637         xen_tlb_flush();
638
639 #if 0
640         contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
641 #endif
642
643         balloon_unlock(flags);
644
645         return 0;
646
647  fail:
648         reservation.extent_order = 0;
649         reservation.address_bits = 0;
650
651         for (i = 0; i < (1 << order); i++) {
652                 int pfn;
653                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
654                 PANIC_IF(HYPERVISOR_memory_op(
655                         XENMEM_increase_reservation, &reservation) != 1);
656                 xen_machphys_update(mfn, pfn);
657                 PFNTOMFN(pfn) = mfn;
658         }
659
660         xen_tlb_flush();
661
662         balloon_unlock(flags);
663
664         return ENOMEM;
665 }
666
667 void 
668 xen_destroy_contiguous_region(void *addr, int npages)
669 {
670         unsigned long  mfn, i, flags, order, pfn0;
671         struct xen_memory_reservation reservation = {
672                 .nr_extents   = 1,
673                 .extent_order = 0,
674                 .domid        = DOMID_SELF
675         };
676         set_xen_guest_handle(reservation.extent_start, &mfn);
677         
678         pfn0 = vtophys(addr) >> PAGE_SHIFT;
679 #if 0
680         scrub_pages(vstart, 1 << order);
681 #endif
682         /* can currently only handle power of two allocation */
683         PANIC_IF(ffs(npages) != fls(npages));
684
685         /* 0. determine order */
686         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
687
688         balloon_lock(flags);
689
690 #if 0
691         contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
692 #endif
693
694         /* 1. Zap current PTEs, giving away the underlying pages. */
695         for (i = 0; i < (1 << order); i++) {
696                 int pfn;
697                 uint64_t new_val = 0;
698                 pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
699
700                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
701                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
702                 PANIC_IF(HYPERVISOR_memory_op(
703                         XENMEM_decrease_reservation, &reservation) != 1);
704         }
705
706         /* 2. Map new pages in place of old pages. */
707         for (i = 0; i < (1 << order); i++) {
708                 int pfn;
709                 uint64_t new_val;
710                 pfn = pfn0 + i;
711                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
712                 
713                 new_val = mfn << PAGE_SHIFT;
714                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), 
715                                                       new_val, PG_KERNEL));
716                 xen_machphys_update(mfn, pfn);
717                 PFNTOMFN(pfn) = mfn;
718         }
719
720         xen_tlb_flush();
721
722         balloon_unlock(flags);
723 }
724
725 extern  vm_offset_t     proc0kstack;
726 extern int vm86paddr, vm86phystk;
727 char *bootmem_start, *bootmem_current, *bootmem_end;
728
729 pteinfo_t *pteinfo_list;
730 void initvalues(start_info_t *startinfo);
731
732 struct xenstore_domain_interface;
733 extern struct xenstore_domain_interface *xen_store;
734
735 char *console_page;
736
737 void *
738 bootmem_alloc(unsigned int size) 
739 {
740         char *retptr;
741         
742         retptr = bootmem_current;
743         PANIC_IF(retptr + size > bootmem_end);
744         bootmem_current += size;
745
746         return retptr;
747 }
748
749 void 
750 bootmem_free(void *ptr, unsigned int size) 
751 {
752         char *tptr;
753         
754         tptr = ptr;
755         PANIC_IF(tptr != bootmem_current - size ||
756                 bootmem_current - size < bootmem_start);        
757
758         bootmem_current -= size;
759 }
760
761 #if 0
762 static vm_paddr_t
763 xpmap_mtop2(vm_paddr_t mpa)
764 {
765         return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
766             ) | (mpa & ~PG_FRAME);
767 }
768
769 static pd_entry_t 
770 xpmap_get_bootpde(vm_paddr_t va)
771 {
772
773         return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
774 }
775
776 static pd_entry_t
777 xpmap_get_vbootpde(vm_paddr_t va)
778 {
779         pd_entry_t pde;
780
781         pde = xpmap_get_bootpde(va);
782         if ((pde & PG_V) == 0)
783                 return (pde & ~PG_FRAME);
784         return (pde & ~PG_FRAME) |
785                 (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
786 }
787
788 static pt_entry_t 8*
789 xpmap_get_bootptep(vm_paddr_t va)
790 {
791         pd_entry_t pde;
792
793         pde = xpmap_get_vbootpde(va);
794         if ((pde & PG_V) == 0)
795                 return (void *)-1;
796 #define PT_MASK         0x003ff000      /* page table address bits */
797         return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
798 }
799
800 static pt_entry_t
801 xpmap_get_bootpte(vm_paddr_t va)
802 {
803
804         return xpmap_get_bootptep(va)[0];
805 }
806 #endif
807
808
809 #ifdef ADD_ISA_HOLE
810 static void
811 shift_phys_machine(unsigned long *phys_machine, int nr_pages)
812 {
813
814         unsigned long *tmp_page, *current_page, *next_page;
815         int i;
816
817         tmp_page = bootmem_alloc(PAGE_SIZE);
818         current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));  
819         next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));  
820         bcopy(phys_machine, tmp_page, PAGE_SIZE);
821
822         while (current_page > phys_machine) { 
823                 /*  save next page */
824                 bcopy(next_page, tmp_page, PAGE_SIZE);
825                 /* shift down page */
826                 bcopy(current_page, next_page, PAGE_SIZE);
827                 /*  finish swap */
828                 bcopy(tmp_page, current_page, PAGE_SIZE);
829           
830                 current_page -= (PAGE_SIZE/sizeof(unsigned long));
831                 next_page -= (PAGE_SIZE/sizeof(unsigned long));
832         }
833         bootmem_free(tmp_page, PAGE_SIZE);      
834         
835         for (i = 0; i < nr_pages; i++) {
836                 xen_machphys_update(phys_machine[i], i);
837         }
838         memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
839
840 }
841 #endif /* ADD_ISA_HOLE */
842
843 /*
844  * Build a directory of the pages that make up our Physical to Machine
845  * mapping table. The Xen suspend/restore code uses this to find our
846  * mapping table.
847  */
848 static void
849 init_frame_list_list(void *arg)
850 {
851         unsigned long nr_pages = xen_start_info->nr_pages;
852 #define FPP     (PAGE_SIZE/sizeof(xen_pfn_t))
853         int i, j, k;
854
855         xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
856         for (i = 0, j = 0, k = -1; i < nr_pages;
857              i += FPP, j++) {
858                 if ((j & (FPP - 1)) == 0) {
859                         k++;
860                         xen_pfn_to_mfn_frame_list[k] =
861                                 malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
862                         xen_pfn_to_mfn_frame_list_list[k] =
863                                 VTOMFN(xen_pfn_to_mfn_frame_list[k]);
864                         j = 0;
865                 }
866                 xen_pfn_to_mfn_frame_list[k][j] = 
867                         VTOMFN(&xen_phys_machine[i]);
868         }
869
870         HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
871         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
872                 = VTOMFN(xen_pfn_to_mfn_frame_list_list);
873 }       
874 SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
875
876 extern unsigned long physfree;
877
878 int pdir, curoffset;
879 extern int nkpt;
880
881 extern uint32_t kernbase;
882
883 void
884 initvalues(start_info_t *startinfo)
885
886         vm_offset_t cur_space, cur_space_pt;
887         struct physdev_set_iopl set_iopl;
888         
889         int l3_pages, l2_pages, l1_pages, offset;
890         vm_paddr_t console_page_ma, xen_store_ma;
891         vm_offset_t tmpva;
892         vm_paddr_t shinfo;
893 #ifdef PAE
894         vm_paddr_t IdlePDPTma, IdlePDPTnewma;
895         vm_paddr_t IdlePTDnewma[4];
896         pd_entry_t *IdlePDPTnew, *IdlePTDnew;
897         vm_paddr_t IdlePTDma[4];
898 #else
899         vm_paddr_t IdlePTDma[1];
900 #endif
901         unsigned long i;
902         int ncpus = MAXCPU;
903
904         nkpt = min(
905                 min(
906                         max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
907                     NPGPTD*NPDEPG - KPTDI),
908                     (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
909
910         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);      
911 #ifdef notyet
912         /*
913          * need to install handler
914          */
915         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);       
916 #endif  
917         xen_start_info = startinfo;
918         xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
919
920         IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
921         l1_pages = 0;
922         
923 #ifdef PAE
924         l3_pages = 1;
925         l2_pages = 0;
926         IdlePDPT = (pd_entry_t *)startinfo->pt_base;
927         IdlePDPTma = VTOM(startinfo->pt_base);
928         for (i = (KERNBASE >> 30);
929              (i < 4) && (IdlePDPT[i] != 0); i++)
930                         l2_pages++;
931         /*
932          * Note that only one page directory has been allocated at this point.
933          * Thus, if KERNBASE
934          */
935         for (i = 0; i < l2_pages; i++)
936                 IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
937
938         l2_pages = (l2_pages == 0) ? 1 : l2_pages;
939 #else   
940         l3_pages = 0;
941         l2_pages = 1;
942 #endif
943         for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
944              (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
945                 
946                 if (IdlePTD[i] == 0)
947                         break;
948                 l1_pages++;
949         }
950
951         /* number of pages allocated after the pts + 1*/;
952         cur_space = xen_start_info->pt_base +
953             (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
954
955         printk("initvalues(): wooh - availmem=%x,%x\n", avail_space, cur_space);
956
957         printk("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n",
958             KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
959             xen_start_info->nr_pt_frames);
960         xendebug_flags = 0; /* 0xffffffff; */
961
962 #ifdef ADD_ISA_HOLE
963         shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
964 #endif
965         XENPRINTF("IdlePTD %p\n", IdlePTD);
966         XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
967                   "mod_start: 0x%lx mod_len: 0x%lx\n",
968                   xen_start_info->nr_pages, xen_start_info->shared_info, 
969                   xen_start_info->flags, xen_start_info->pt_base, 
970                   xen_start_info->mod_start, xen_start_info->mod_len);
971
972 #ifdef PAE
973         IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
974         bzero(IdlePDPTnew, PAGE_SIZE);
975
976         IdlePDPTnewma =  VTOM(IdlePDPTnew);
977         IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
978         bzero(IdlePTDnew, 4*PAGE_SIZE);
979
980         for (i = 0; i < 4; i++) 
981                 IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
982         /*
983          * L3
984          *
985          * Copy the 4 machine addresses of the new PTDs in to the PDPT
986          * 
987          */
988         for (i = 0; i < 4; i++)
989                 IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
990
991         __asm__("nop;");
992         /*
993          *
994          * re-map the new PDPT read-only
995          */
996         PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
997         /*
998          * 
999          * Unpin the current PDPT
1000          */
1001         xen_pt_unpin(IdlePDPTma);
1002
1003 #endif  /* PAE */
1004
1005         /* Map proc0's KSTACK */
1006         proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
1007         printk("proc0kstack=%u\n", proc0kstack);
1008
1009         /* vm86/bios stack */
1010         cur_space += PAGE_SIZE;
1011
1012         /* Map space for the vm86 region */
1013         vm86paddr = (vm_offset_t)cur_space;
1014         cur_space += (PAGE_SIZE * 3);
1015
1016         /* allocate 4 pages for bootmem allocator */
1017         bootmem_start = bootmem_current = (char *)cur_space;
1018         cur_space += (4 * PAGE_SIZE);
1019         bootmem_end = (char *)cur_space;
1020         
1021         /* allocate pages for gdt */
1022         gdt = (union descriptor *)cur_space;
1023         cur_space += PAGE_SIZE*ncpus;
1024
1025         /* allocate page for ldt */
1026         ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
1027         cur_space += PAGE_SIZE;
1028         
1029         /* unmap remaining pages from initial chunk
1030          *
1031          */
1032         for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
1033              tmpva += PAGE_SIZE) {
1034                 bzero((char *)tmpva, PAGE_SIZE);
1035                 PT_SET_MA(tmpva, (vm_paddr_t)0);
1036         }
1037
1038         PT_UPDATES_FLUSH();
1039
1040         memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
1041             ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
1042             l1_pages*sizeof(pt_entry_t));
1043
1044         for (i = 0; i < 4; i++) {
1045                 PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
1046                     IdlePTDnewma[i] | PG_V);
1047         }
1048         xen_load_cr3(VTOP(IdlePDPTnew));
1049         xen_pgdpt_pin(VTOM(IdlePDPTnew));
1050
1051         /* allocate remainder of nkpt pages */
1052         cur_space_pt = cur_space;
1053         for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
1054              i++, cur_space += PAGE_SIZE) {
1055                 pdir = (offset + i) / NPDEPG;
1056                 curoffset = ((offset + i) % NPDEPG);
1057                 if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
1058                         break;
1059
1060                 /*
1061                  * make sure that all the initial page table pages
1062                  * have been zeroed
1063                  */
1064                 PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
1065                 bzero((char *)cur_space, PAGE_SIZE);
1066                 PT_SET_MA(cur_space, (vm_paddr_t)0);
1067                 xen_pt_pin(VTOM(cur_space));
1068                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1069                         curoffset*sizeof(vm_paddr_t)), 
1070                     VTOM(cur_space) | PG_KERNEL);
1071                 PT_UPDATES_FLUSH();
1072         }
1073         
1074         for (i = 0; i < 4; i++) {
1075                 pdir = (PTDPTDI + i) / NPDEPG;
1076                 curoffset = (PTDPTDI + i) % NPDEPG;
1077
1078                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1079                         curoffset*sizeof(vm_paddr_t)), 
1080                     IdlePTDnewma[i] | PG_V);
1081         }
1082
1083         PT_UPDATES_FLUSH();
1084         
1085         IdlePTD = IdlePTDnew;
1086         IdlePDPT = IdlePDPTnew;
1087         IdlePDPTma = IdlePDPTnewma;
1088         
1089         HYPERVISOR_shared_info = (shared_info_t *)cur_space;
1090         cur_space += PAGE_SIZE;
1091
1092         xen_store = (struct xenstore_domain_interface *)cur_space;
1093         cur_space += PAGE_SIZE;
1094
1095         console_page = (char *)cur_space;
1096         cur_space += PAGE_SIZE;
1097         
1098         /*
1099          * shared_info is an unsigned long so this will randomly break if
1100          * it is allocated above 4GB - I guess people are used to that
1101          * sort of thing with Xen ... sigh
1102          */
1103         shinfo = xen_start_info->shared_info;
1104         PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
1105         
1106         printk("#4\n");
1107
1108         xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
1109         PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
1110         console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
1111         PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
1112
1113         printk("#5\n");
1114
1115         set_iopl.iopl = 1;
1116         PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
1117         printk("#6\n");
1118 #if 0
1119         /* add page table for KERNBASE */
1120         xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), 
1121                             VTOM(cur_space) | PG_KERNEL);
1122         xen_flush_queue();
1123 #ifdef PAE      
1124         xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), 
1125                             VTOM(cur_space) | PG_V | PG_A);
1126 #else
1127         xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), 
1128                             VTOM(cur_space) | PG_V | PG_A);
1129 #endif  
1130         xen_flush_queue();
1131         cur_space += PAGE_SIZE;
1132         printk("#6\n");
1133 #endif /* 0 */  
1134 #ifdef notyet
1135         if (xen_start_info->flags & SIF_INITDOMAIN) {
1136                 /* Map first megabyte */
1137                 for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) 
1138                         PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
1139                 xen_flush_queue();
1140         }
1141 #endif
1142         /*
1143          * re-map kernel text read-only
1144          *
1145          */
1146         for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
1147              i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
1148                 PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
1149         
1150         printk("#7\n");
1151         physfree = VTOP(cur_space);
1152         init_first = physfree >> PAGE_SHIFT;
1153         IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
1154         IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
1155         setup_xen_features();
1156         printk("#8, proc0kstack=%u\n", proc0kstack);
1157 }
1158
1159
1160 trap_info_t trap_table[] = {
1161         { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
1162         { 1,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
1163         { 3,   3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
1164         { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
1165         /* This is UPL on Linux and KPL on BSD */
1166         { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
1167         { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
1168         { 7,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
1169         /*
1170          * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
1171          *   no handler for double fault
1172          */
1173         { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
1174         {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
1175         {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
1176         {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
1177         {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
1178         {14,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
1179         {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
1180         {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
1181         {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
1182         {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
1183         {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
1184         {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
1185         {  0, 0,           0, 0 }
1186 };
1187
1188 /* Perform a multicall and check that individual calls succeeded. */
1189 int
1190 HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
1191 {
1192         int ret = 0;
1193         int i;
1194
1195         /* Perform the multicall. */
1196         PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
1197
1198         /* Check the results of individual hypercalls. */
1199         for (i = 0; i < nr_calls; i++)
1200                 if (unlikely(call_list[i].result < 0))
1201                         ret++;
1202         if (unlikely(ret > 0))
1203                 panic("%d multicall(s) failed: cpu %d\n",
1204                     ret, smp_processor_id());
1205
1206         /* If we didn't panic already, everything succeeded. */
1207         return (0);
1208 }
1209
1210 /********** CODE WORTH KEEPING ABOVE HERE *****************/ 
1211
1212 void xen_failsafe_handler(void);
1213
1214 void
1215 xen_failsafe_handler(void)
1216 {
1217
1218         panic("xen_failsafe_handler called!\n");
1219 }
1220
1221 void xen_handle_thread_switch(struct pcb *pcb);
1222
1223 /* This is called by cpu_switch() when switching threads. */
1224 /* The pcb arg refers to the process control block of the */
1225 /* next thread which is to run */
1226 void
1227 xen_handle_thread_switch(struct pcb *pcb)
1228 {
1229     uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
1230     uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
1231     multicall_entry_t mcl[3];
1232     int i = 0;
1233
1234     /* Notify Xen of task switch */
1235     mcl[i].op = __HYPERVISOR_stack_switch;
1236     mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
1237     mcl[i++].args[1] = (unsigned long)pcb;
1238
1239     /* Check for update of fsd */
1240     if (*a != *b || *(a+1) != *(b+1)) {
1241         mcl[i].op = __HYPERVISOR_update_descriptor;
1242         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1243         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1244     }    
1245
1246     a += 2;
1247     b += 2;
1248
1249     /* Check for update of gsd */
1250     if (*a != *b || *(a+1) != *(b+1)) {
1251         mcl[i].op = __HYPERVISOR_update_descriptor;
1252         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1253         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1254     }    
1255
1256     (void)HYPERVISOR_multicall(mcl, i);
1257 }