]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/i386/xen/xen_machdep.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / i386 / xen / xen_machdep.c
1 /*
2  *
3  * Copyright (c) 2004 Christian Limpach.
4  * Copyright (c) 2004-2006,2008 Kip Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by Christian Limpach.
18  * 4. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bus.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/mount.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/kernel.h>
45 #include <sys/proc.h>
46 #include <sys/reboot.h>
47 #include <sys/rwlock.h>
48 #include <sys/sysproto.h>
49
50 #include <xen/xen-os.h>
51
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <machine/segments.h>
55 #include <machine/pcb.h>
56 #include <machine/stdarg.h>
57 #include <machine/vmparam.h>
58 #include <machine/cpu.h>
59 #include <machine/intr_machdep.h>
60 #include <machine/md_var.h>
61 #include <machine/asmacros.h>
62
63
64
65 #include <xen/hypervisor.h>
66 #include <machine/xen/xenvar.h>
67 #include <machine/xen/xenfunc.h>
68 #include <machine/xen/xenpmap.h>
69 #include <machine/xen/xenfunc.h>
70 #include <xen/interface/memory.h>
71 #include <machine/xen/features.h>
72 #ifdef SMP
73 #include <machine/privatespace.h>
74 #endif
75
76
77 #include <vm/vm_page.h>
78
79
80 #define IDTVEC(name)    __CONCAT(X,name)
81
82 extern inthand_t
83 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
84         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
85         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
86         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
87         IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
88
89
90 int xendebug_flags; 
91 start_info_t *xen_start_info;
92 shared_info_t *HYPERVISOR_shared_info;
93 xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
94 xen_pfn_t *xen_phys_machine;
95 xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
96 xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
97 int preemptable, init_first;
98 extern unsigned int avail_space;
99 int xen_vector_callback_enabled = 0;
100 enum xen_domain_type xen_domain_type = XEN_PV_DOMAIN;
101
102 void ni_cli(void);
103 void ni_sti(void);
104
105
106 void
107 ni_cli(void)
108 {
109         CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
110         __asm__("pushl %edx;"
111                 "pushl %eax;"
112                 );
113         __cli();
114         __asm__("popl %eax;"
115                 "popl %edx;"
116                 );
117 }
118
119
120 void
121 ni_sti(void)
122 {
123         __asm__("pushl %edx;"
124                 "pushl %esi;"
125                 "pushl %eax;"
126                 );
127         __sti();
128         __asm__("popl %eax;"
129                 "popl %esi;"
130                 "popl %edx;"
131                 );
132 }
133
134 void
135 force_evtchn_callback(void)
136 {
137     (void)HYPERVISOR_xen_version(0, NULL);
138 }
139
140 /*
141  * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
142  * suitable for the static env vars.
143  */
144 char *
145 xen_setbootenv(char *cmd_line)
146 {
147         char *cmd_line_next;
148     
149         /* Skip leading spaces */
150         for (; *cmd_line == ' '; cmd_line++);
151
152         xc_printf("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
153
154         for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
155         return cmd_line;
156 }
157
158 static struct 
159 {
160         const char      *ev;
161         int             mask;
162 } howto_names[] = {
163         {"boot_askname",        RB_ASKNAME},
164         {"boot_single", RB_SINGLE},
165         {"boot_nosync", RB_NOSYNC},
166         {"boot_halt",   RB_ASKNAME},
167         {"boot_serial", RB_SERIAL},
168         {"boot_cdrom",  RB_CDROM},
169         {"boot_gdb",    RB_GDB},
170         {"boot_gdb_pause",      RB_RESERVED1},
171         {"boot_verbose",        RB_VERBOSE},
172         {"boot_multicons",      RB_MULTIPLE},
173         {NULL,  0}
174 };
175
176 int 
177 xen_boothowto(char *envp)
178 {
179         int i, howto = 0;
180
181         /* get equivalents from the environment */
182         for (i = 0; howto_names[i].ev != NULL; i++)
183                 if (getenv(howto_names[i].ev) != NULL)
184                         howto |= howto_names[i].mask;
185         return howto;
186 }
187
188 #define XC_PRINTF_BUFSIZE 1024
189 void
190 xc_printf(const char *fmt, ...)
191 {
192         __va_list ap;
193         int retval;
194         static char buf[XC_PRINTF_BUFSIZE];
195
196         va_start(ap, fmt);
197         retval = vsnprintf(buf, XC_PRINTF_BUFSIZE - 1, fmt, ap);
198         va_end(ap);
199         buf[retval] = 0;
200         (void)HYPERVISOR_console_write(buf, retval);
201 }
202
203
204 #define XPQUEUE_SIZE 128
205
206 struct mmu_log {
207         char *file;
208         int line;
209 };
210
211 #ifdef SMP
212 /* per-cpu queues and indices */
213 #ifdef INVARIANTS
214 static struct mmu_log xpq_queue_log[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
215 #endif
216
217 static int xpq_idx[XEN_LEGACY_MAX_VCPUS];
218 static mmu_update_t xpq_queue[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
219
220 #define XPQ_QUEUE_LOG xpq_queue_log[vcpu]
221 #define XPQ_QUEUE xpq_queue[vcpu]
222 #define XPQ_IDX xpq_idx[vcpu]
223 #define SET_VCPU() int vcpu = smp_processor_id()
224 #else
225         
226 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
227 #ifdef INVARIANTS
228 static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
229 #endif
230 static int xpq_idx = 0;
231
232 #define XPQ_QUEUE_LOG xpq_queue_log
233 #define XPQ_QUEUE xpq_queue
234 #define XPQ_IDX xpq_idx
235 #define SET_VCPU()
236 #endif /* !SMP */
237
238 #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
239
240 #if 0
241 static void
242 xen_dump_queue(void)
243 {
244         int _xpq_idx = XPQ_IDX;
245         int i;
246
247         if (_xpq_idx <= 1)
248                 return;
249
250         xc_printf("xen_dump_queue(): %u entries\n", _xpq_idx);
251         for (i = 0; i < _xpq_idx; i++) {
252                 xc_printf(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val,
253                     XPQ_QUEUE[i].ptr);
254         }
255 }
256 #endif
257
258
259 static __inline void
260 _xen_flush_queue(void)
261 {
262         SET_VCPU();
263         int _xpq_idx = XPQ_IDX;
264         int error, i;
265
266 #ifdef INVARIANTS
267         if (__predict_true(gdtset))
268                 CRITICAL_ASSERT(curthread);
269 #endif
270
271         XPQ_IDX = 0;
272         /* Make sure index is cleared first to avoid double updates. */
273         error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
274                                       _xpq_idx, NULL, DOMID_SELF);
275     
276 #if 0
277         if (__predict_true(gdtset))
278         for (i = _xpq_idx; i > 0;) {
279                 if (i >= 3) {
280                         CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
281                             "ptr: %lx val: %lx ptr: %lx",
282                             (XPQ_QUEUE[i-1].val & 0xffffffff),
283                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
284                             (XPQ_QUEUE[i-2].val & 0xffffffff),
285                             (XPQ_QUEUE[i-2].ptr & 0xffffffff),
286                             (XPQ_QUEUE[i-3].val & 0xffffffff),
287                             (XPQ_QUEUE[i-3].ptr & 0xffffffff));
288                             i -= 3;
289                 } else if (i == 2) {
290                         CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
291                             (XPQ_QUEUE[i-1].val & 0xffffffff),
292                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
293                             (XPQ_QUEUE[i-2].val & 0xffffffff),
294                             (XPQ_QUEUE[i-2].ptr & 0xffffffff));
295                         i = 0;
296                 } else {
297                         CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", 
298                             (XPQ_QUEUE[i-1].val & 0xffffffff),
299                             (XPQ_QUEUE[i-1].ptr & 0xffffffff));
300                         i = 0;
301                 }
302         }
303 #endif  
304         if (__predict_false(error < 0)) {
305                 for (i = 0; i < _xpq_idx; i++)
306                         printf("val: %llx ptr: %llx\n",
307                             XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
308                 panic("Failed to execute MMU updates: %d", error);
309         }
310
311 }
312
313 void
314 xen_flush_queue(void)
315 {
316         SET_VCPU();
317
318         if (__predict_true(gdtset))
319                 critical_enter();
320         if (XPQ_IDX != 0) _xen_flush_queue();
321         if (__predict_true(gdtset))
322                 critical_exit();
323 }
324
325 static __inline void
326 xen_increment_idx(void)
327 {
328         SET_VCPU();
329
330         XPQ_IDX++;
331         if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
332                 xen_flush_queue();
333 }
334
335 void
336 xen_check_queue(void)
337 {
338 #ifdef INVARIANTS
339         SET_VCPU();
340         
341         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
342 #endif
343 }
344
345 void
346 xen_invlpg(vm_offset_t va)
347 {
348         struct mmuext_op op;
349         op.cmd = MMUEXT_INVLPG_ALL;
350         op.arg1.linear_addr = va & ~PAGE_MASK;
351         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
352 }
353
354 void
355 xen_load_cr3(u_int val)
356 {
357         struct mmuext_op op;
358 #ifdef INVARIANTS
359         SET_VCPU();
360         
361         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
362 #endif
363         op.cmd = MMUEXT_NEW_BASEPTR;
364         op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
365         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
366 }
367
368 #ifdef KTR
369 static __inline u_int
370 rebp(void)
371 {
372         u_int   data;
373
374         __asm __volatile("movl 4(%%ebp),%0" : "=r" (data));     
375         return (data);
376 }
377 #endif
378
379 u_int
380 read_eflags(void)
381 {
382         vcpu_info_t *_vcpu;
383         u_int eflags;
384
385         eflags = _read_eflags();
386         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; 
387         if (_vcpu->evtchn_upcall_mask)
388                 eflags &= ~PSL_I;
389
390         return (eflags);
391 }
392
393 void
394 write_eflags(u_int eflags)
395 {
396         u_int intr;
397
398         CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
399         intr = ((eflags & PSL_I) == 0);
400         __restore_flags(intr);
401         _write_eflags(eflags);
402 }
403
404 void
405 xen_cli(void)
406 {
407         CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
408         __cli();
409 }
410
411 void
412 xen_sti(void)
413 {
414         CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
415         __sti();
416 }
417
418 u_int
419 xen_rcr2(void)
420 {
421
422         return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
423 }
424
425 void
426 _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
427 {
428         SET_VCPU();
429         
430         if (__predict_true(gdtset))
431                 critical_enter();
432         XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
433         XPQ_QUEUE[XPQ_IDX].val = pfn;
434 #ifdef INVARIANTS
435         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
436         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
437 #endif          
438         xen_increment_idx();
439         if (__predict_true(gdtset))
440                 critical_exit();
441 }
442
443 extern struct rwlock pvh_global_lock;
444
445 void
446 _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
447 {
448         SET_VCPU();
449
450         if (__predict_true(gdtset))     
451                 rw_assert(&pvh_global_lock, RA_WLOCKED);
452
453         KASSERT((ptr & 7) == 0, ("misaligned update"));
454         
455         if (__predict_true(gdtset))
456                 critical_enter();
457         
458         XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
459         XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
460 #ifdef INVARIANTS
461         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
462         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
463 #endif  
464         xen_increment_idx();
465         if (__predict_true(gdtset))
466                 critical_exit();
467 }
468
469 void 
470 xen_pgdpt_pin(vm_paddr_t ma)
471 {
472         struct mmuext_op op;
473         op.cmd = MMUEXT_PIN_L3_TABLE;
474         op.arg1.mfn = ma >> PAGE_SHIFT;
475         xen_flush_queue();
476         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
477 }
478
479 void 
480 xen_pgd_pin(vm_paddr_t ma)
481 {
482         struct mmuext_op op;
483         op.cmd = MMUEXT_PIN_L2_TABLE;
484         op.arg1.mfn = ma >> PAGE_SHIFT;
485         xen_flush_queue();
486         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
487 }
488
489 void 
490 xen_pgd_unpin(vm_paddr_t ma)
491 {
492         struct mmuext_op op;
493         op.cmd = MMUEXT_UNPIN_TABLE;
494         op.arg1.mfn = ma >> PAGE_SHIFT;
495         xen_flush_queue();
496         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
497 }
498
499 void 
500 xen_pt_pin(vm_paddr_t ma)
501 {
502         struct mmuext_op op;
503         op.cmd = MMUEXT_PIN_L1_TABLE;
504         op.arg1.mfn = ma >> PAGE_SHIFT;
505         xen_flush_queue();
506         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
507 }
508
509 void 
510 xen_pt_unpin(vm_paddr_t ma)
511 {
512         struct mmuext_op op;
513         op.cmd = MMUEXT_UNPIN_TABLE;
514         op.arg1.mfn = ma >> PAGE_SHIFT;
515         xen_flush_queue();
516         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
517 }
518
519 void 
520 xen_set_ldt(vm_paddr_t ptr, unsigned long len)
521 {
522         struct mmuext_op op;
523         op.cmd = MMUEXT_SET_LDT;
524         op.arg1.linear_addr = ptr;
525         op.arg2.nr_ents = len;
526         xen_flush_queue();
527         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
528 }
529
530 void xen_tlb_flush(void)
531 {
532         struct mmuext_op op;
533         op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
534         xen_flush_queue();
535         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
536 }
537
538 void
539 xen_update_descriptor(union descriptor *table, union descriptor *entry)
540 {
541         vm_paddr_t pa;
542         pt_entry_t *ptp;
543
544         ptp = vtopte((vm_offset_t)table);
545         pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
546         if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
547                 panic("HYPERVISOR_update_descriptor failed\n");
548 }
549
550
551 #if 0
552 /*
553  * Bitmap is indexed by page number. If bit is set, the page is part of a
554  * xen_create_contiguous_region() area of memory.
555  */
556 unsigned long *contiguous_bitmap;
557
558 static void 
559 contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
560 {
561         unsigned long start_off, end_off, curr_idx, end_idx;
562
563         curr_idx  = first_page / BITS_PER_LONG;
564         start_off = first_page & (BITS_PER_LONG-1);
565         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
566         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
567
568         if (curr_idx == end_idx) {
569                 contiguous_bitmap[curr_idx] |=
570                         ((1UL<<end_off)-1) & -(1UL<<start_off);
571         } else {
572                 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
573                 while ( ++curr_idx < end_idx )
574                         contiguous_bitmap[curr_idx] = ~0UL;
575                 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
576         }
577 }
578
579 static void 
580 contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
581 {
582         unsigned long start_off, end_off, curr_idx, end_idx;
583
584         curr_idx  = first_page / BITS_PER_LONG;
585         start_off = first_page & (BITS_PER_LONG-1);
586         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
587         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
588
589         if (curr_idx == end_idx) {
590                 contiguous_bitmap[curr_idx] &=
591                         -(1UL<<end_off) | ((1UL<<start_off)-1);
592         } else {
593                 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
594                 while ( ++curr_idx != end_idx )
595                         contiguous_bitmap[curr_idx] = 0;
596                 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
597         }
598 }
599 #endif
600
601 /* Ensure multi-page extents are contiguous in machine memory. */
602 int 
603 xen_create_contiguous_region(vm_page_t pages, int npages)
604 {
605         unsigned long  mfn, i, flags;
606         int order;
607         struct xen_memory_reservation reservation = {
608                 .nr_extents   = 1,
609                 .extent_order = 0,
610                 .domid        = DOMID_SELF
611         };
612         set_xen_guest_handle(reservation.extent_start, &mfn);
613         
614         balloon_lock(flags);
615
616         /* can currently only handle power of two allocation */
617         PANIC_IF(ffs(npages) != fls(npages));
618
619         /* 0. determine order */
620         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
621         
622         /* 1. give away machine pages. */
623         for (i = 0; i < (1 << order); i++) {
624                 int pfn;
625                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
626                 mfn = PFNTOMFN(pfn);
627                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
628                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
629         }
630
631
632         /* 2. Get a new contiguous memory extent. */
633         reservation.extent_order = order;
634         /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not 
635          * running with a broxen driver XXXEN
636          */
637         reservation.address_bits = 31; 
638         if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
639                 goto fail;
640
641         /* 3. Map the new extent in place of old pages. */
642         for (i = 0; i < (1 << order); i++) {
643                 int pfn;
644                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
645                 xen_machphys_update(mfn+i, pfn);
646                 PFNTOMFN(pfn) = mfn+i;
647         }
648
649         xen_tlb_flush();
650
651 #if 0
652         contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
653 #endif
654
655         balloon_unlock(flags);
656
657         return 0;
658
659  fail:
660         reservation.extent_order = 0;
661         reservation.address_bits = 0;
662
663         for (i = 0; i < (1 << order); i++) {
664                 int pfn;
665                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
666                 PANIC_IF(HYPERVISOR_memory_op(
667                         XENMEM_increase_reservation, &reservation) != 1);
668                 xen_machphys_update(mfn, pfn);
669                 PFNTOMFN(pfn) = mfn;
670         }
671
672         xen_tlb_flush();
673
674         balloon_unlock(flags);
675
676         return ENOMEM;
677 }
678
679 void 
680 xen_destroy_contiguous_region(void *addr, int npages)
681 {
682         unsigned long  mfn, i, flags, order, pfn0;
683         struct xen_memory_reservation reservation = {
684                 .nr_extents   = 1,
685                 .extent_order = 0,
686                 .domid        = DOMID_SELF
687         };
688         set_xen_guest_handle(reservation.extent_start, &mfn);
689         
690         pfn0 = vtophys(addr) >> PAGE_SHIFT;
691 #if 0
692         scrub_pages(vstart, 1 << order);
693 #endif
694         /* can currently only handle power of two allocation */
695         PANIC_IF(ffs(npages) != fls(npages));
696
697         /* 0. determine order */
698         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
699
700         balloon_lock(flags);
701
702 #if 0
703         contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
704 #endif
705
706         /* 1. Zap current PTEs, giving away the underlying pages. */
707         for (i = 0; i < (1 << order); i++) {
708                 int pfn;
709                 uint64_t new_val = 0;
710                 pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
711
712                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
713                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
714                 PANIC_IF(HYPERVISOR_memory_op(
715                         XENMEM_decrease_reservation, &reservation) != 1);
716         }
717
718         /* 2. Map new pages in place of old pages. */
719         for (i = 0; i < (1 << order); i++) {
720                 int pfn;
721                 uint64_t new_val;
722                 pfn = pfn0 + i;
723                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
724                 
725                 new_val = mfn << PAGE_SHIFT;
726                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), 
727                                                       new_val, PG_KERNEL));
728                 xen_machphys_update(mfn, pfn);
729                 PFNTOMFN(pfn) = mfn;
730         }
731
732         xen_tlb_flush();
733
734         balloon_unlock(flags);
735 }
736
737 extern  vm_offset_t     proc0kstack;
738 extern int vm86paddr, vm86phystk;
739 char *bootmem_start, *bootmem_current, *bootmem_end;
740
741 pteinfo_t *pteinfo_list;
742 void initvalues(start_info_t *startinfo);
743
744 struct xenstore_domain_interface;
745 extern struct xenstore_domain_interface *xen_store;
746
747 char *console_page;
748
749 void *
750 bootmem_alloc(unsigned int size) 
751 {
752         char *retptr;
753         
754         retptr = bootmem_current;
755         PANIC_IF(retptr + size > bootmem_end);
756         bootmem_current += size;
757
758         return retptr;
759 }
760
761 void 
762 bootmem_free(void *ptr, unsigned int size) 
763 {
764         char *tptr;
765         
766         tptr = ptr;
767         PANIC_IF(tptr != bootmem_current - size ||
768                 bootmem_current - size < bootmem_start);        
769
770         bootmem_current -= size;
771 }
772
773 #if 0
774 static vm_paddr_t
775 xpmap_mtop2(vm_paddr_t mpa)
776 {
777         return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
778             ) | (mpa & ~PG_FRAME);
779 }
780
781 static pd_entry_t 
782 xpmap_get_bootpde(vm_paddr_t va)
783 {
784
785         return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
786 }
787
788 static pd_entry_t
789 xpmap_get_vbootpde(vm_paddr_t va)
790 {
791         pd_entry_t pde;
792
793         pde = xpmap_get_bootpde(va);
794         if ((pde & PG_V) == 0)
795                 return (pde & ~PG_FRAME);
796         return (pde & ~PG_FRAME) |
797                 (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
798 }
799
800 static pt_entry_t 8*
801 xpmap_get_bootptep(vm_paddr_t va)
802 {
803         pd_entry_t pde;
804
805         pde = xpmap_get_vbootpde(va);
806         if ((pde & PG_V) == 0)
807                 return (void *)-1;
808 #define PT_MASK         0x003ff000      /* page table address bits */
809         return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
810 }
811
812 static pt_entry_t
813 xpmap_get_bootpte(vm_paddr_t va)
814 {
815
816         return xpmap_get_bootptep(va)[0];
817 }
818 #endif
819
820
821 #ifdef ADD_ISA_HOLE
822 static void
823 shift_phys_machine(unsigned long *phys_machine, int nr_pages)
824 {
825
826         unsigned long *tmp_page, *current_page, *next_page;
827         int i;
828
829         tmp_page = bootmem_alloc(PAGE_SIZE);
830         current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));  
831         next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));  
832         bcopy(phys_machine, tmp_page, PAGE_SIZE);
833
834         while (current_page > phys_machine) { 
835                 /*  save next page */
836                 bcopy(next_page, tmp_page, PAGE_SIZE);
837                 /* shift down page */
838                 bcopy(current_page, next_page, PAGE_SIZE);
839                 /*  finish swap */
840                 bcopy(tmp_page, current_page, PAGE_SIZE);
841           
842                 current_page -= (PAGE_SIZE/sizeof(unsigned long));
843                 next_page -= (PAGE_SIZE/sizeof(unsigned long));
844         }
845         bootmem_free(tmp_page, PAGE_SIZE);      
846         
847         for (i = 0; i < nr_pages; i++) {
848                 xen_machphys_update(phys_machine[i], i);
849         }
850         memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
851
852 }
853 #endif /* ADD_ISA_HOLE */
854
855 /*
856  * Build a directory of the pages that make up our Physical to Machine
857  * mapping table. The Xen suspend/restore code uses this to find our
858  * mapping table.
859  */
860 static void
861 init_frame_list_list(void *arg)
862 {
863         unsigned long nr_pages = xen_start_info->nr_pages;
864 #define FPP     (PAGE_SIZE/sizeof(xen_pfn_t))
865         int i, j, k;
866
867         xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
868         for (i = 0, j = 0, k = -1; i < nr_pages;
869              i += FPP, j++) {
870                 if ((j & (FPP - 1)) == 0) {
871                         k++;
872                         xen_pfn_to_mfn_frame_list[k] =
873                                 malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
874                         xen_pfn_to_mfn_frame_list_list[k] =
875                                 VTOMFN(xen_pfn_to_mfn_frame_list[k]);
876                         j = 0;
877                 }
878                 xen_pfn_to_mfn_frame_list[k][j] = 
879                         VTOMFN(&xen_phys_machine[i]);
880         }
881
882         HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
883         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
884                 = VTOMFN(xen_pfn_to_mfn_frame_list_list);
885 }       
886 SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
887
888 extern unsigned long physfree;
889
890 int pdir, curoffset;
891 extern int nkpt;
892
893 extern uint32_t kernbase;
894
895 void
896 initvalues(start_info_t *startinfo)
897
898         vm_offset_t cur_space, cur_space_pt;
899         struct physdev_set_iopl set_iopl;
900         
901         int l3_pages, l2_pages, l1_pages, offset;
902         vm_paddr_t console_page_ma, xen_store_ma;
903         vm_offset_t tmpva;
904         vm_paddr_t shinfo;
905 #ifdef PAE
906         vm_paddr_t IdlePDPTma, IdlePDPTnewma;
907         vm_paddr_t IdlePTDnewma[4];
908         pd_entry_t *IdlePDPTnew, *IdlePTDnew;
909         vm_paddr_t IdlePTDma[4];
910 #else
911         vm_paddr_t IdlePTDma[1];
912 #endif
913         unsigned long i;
914         int ncpus = MAXCPU;
915
916         nkpt = min(
917                 min(
918                         max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
919                     NPGPTD*NPDEPG - KPTDI),
920                     (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
921
922         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);      
923 #ifdef notyet
924         /*
925          * need to install handler
926          */
927         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);       
928 #endif  
929         xen_start_info = startinfo;
930         xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
931
932         IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
933         l1_pages = 0;
934         
935 #ifdef PAE
936         l3_pages = 1;
937         l2_pages = 0;
938         IdlePDPT = (pd_entry_t *)startinfo->pt_base;
939         IdlePDPTma = VTOM(startinfo->pt_base);
940         for (i = (KERNBASE >> 30);
941              (i < 4) && (IdlePDPT[i] != 0); i++)
942                         l2_pages++;
943         /*
944          * Note that only one page directory has been allocated at this point.
945          * Thus, if KERNBASE
946          */
947         for (i = 0; i < l2_pages; i++)
948                 IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
949
950         l2_pages = (l2_pages == 0) ? 1 : l2_pages;
951 #else   
952         l3_pages = 0;
953         l2_pages = 1;
954 #endif
955         for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
956              (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
957                 
958                 if (IdlePTD[i] == 0)
959                         break;
960                 l1_pages++;
961         }
962
963         /* number of pages allocated after the pts + 1*/;
964         cur_space = xen_start_info->pt_base +
965             (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
966
967         xc_printf("initvalues(): wooh - availmem=%x,%x\n", avail_space,
968             cur_space);
969
970         xc_printf("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n",
971             KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
972             xen_start_info->nr_pt_frames);
973         xendebug_flags = 0; /* 0xffffffff; */
974
975 #ifdef ADD_ISA_HOLE
976         shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
977 #endif
978         XENPRINTF("IdlePTD %p\n", IdlePTD);
979         XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
980                   "mod_start: 0x%lx mod_len: 0x%lx\n",
981                   xen_start_info->nr_pages, xen_start_info->shared_info, 
982                   xen_start_info->flags, xen_start_info->pt_base, 
983                   xen_start_info->mod_start, xen_start_info->mod_len);
984
985 #ifdef PAE
986         IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
987         bzero(IdlePDPTnew, PAGE_SIZE);
988
989         IdlePDPTnewma =  VTOM(IdlePDPTnew);
990         IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
991         bzero(IdlePTDnew, 4*PAGE_SIZE);
992
993         for (i = 0; i < 4; i++) 
994                 IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
995         /*
996          * L3
997          *
998          * Copy the 4 machine addresses of the new PTDs in to the PDPT
999          * 
1000          */
1001         for (i = 0; i < 4; i++)
1002                 IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
1003
1004         __asm__("nop;");
1005         /*
1006          *
1007          * re-map the new PDPT read-only
1008          */
1009         PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
1010         /*
1011          * 
1012          * Unpin the current PDPT
1013          */
1014         xen_pt_unpin(IdlePDPTma);
1015
1016 #endif  /* PAE */
1017
1018         /* Map proc0's KSTACK */
1019         proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
1020         xc_printf("proc0kstack=%u\n", proc0kstack);
1021
1022         /* vm86/bios stack */
1023         cur_space += PAGE_SIZE;
1024
1025         /* Map space for the vm86 region */
1026         vm86paddr = (vm_offset_t)cur_space;
1027         cur_space += (PAGE_SIZE * 3);
1028
1029         /* allocate 4 pages for bootmem allocator */
1030         bootmem_start = bootmem_current = (char *)cur_space;
1031         cur_space += (4 * PAGE_SIZE);
1032         bootmem_end = (char *)cur_space;
1033         
1034         /* allocate pages for gdt */
1035         gdt = (union descriptor *)cur_space;
1036         cur_space += PAGE_SIZE*ncpus;
1037
1038         /* allocate page for ldt */
1039         ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
1040         cur_space += PAGE_SIZE;
1041         
1042         /* unmap remaining pages from initial chunk
1043          *
1044          */
1045         for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
1046              tmpva += PAGE_SIZE) {
1047                 bzero((char *)tmpva, PAGE_SIZE);
1048                 PT_SET_MA(tmpva, (vm_paddr_t)0);
1049         }
1050
1051         PT_UPDATES_FLUSH();
1052
1053         memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
1054             ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
1055             l1_pages*sizeof(pt_entry_t));
1056
1057         for (i = 0; i < 4; i++) {
1058                 PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
1059                     IdlePTDnewma[i] | PG_V);
1060         }
1061         xen_load_cr3(VTOP(IdlePDPTnew));
1062         xen_pgdpt_pin(VTOM(IdlePDPTnew));
1063
1064         /* allocate remainder of nkpt pages */
1065         cur_space_pt = cur_space;
1066         for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
1067              i++, cur_space += PAGE_SIZE) {
1068                 pdir = (offset + i) / NPDEPG;
1069                 curoffset = ((offset + i) % NPDEPG);
1070                 if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
1071                         break;
1072
1073                 /*
1074                  * make sure that all the initial page table pages
1075                  * have been zeroed
1076                  */
1077                 PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
1078                 bzero((char *)cur_space, PAGE_SIZE);
1079                 PT_SET_MA(cur_space, (vm_paddr_t)0);
1080                 xen_pt_pin(VTOM(cur_space));
1081                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1082                         curoffset*sizeof(vm_paddr_t)), 
1083                     VTOM(cur_space) | PG_KERNEL);
1084                 PT_UPDATES_FLUSH();
1085         }
1086         
1087         for (i = 0; i < 4; i++) {
1088                 pdir = (PTDPTDI + i) / NPDEPG;
1089                 curoffset = (PTDPTDI + i) % NPDEPG;
1090
1091                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1092                         curoffset*sizeof(vm_paddr_t)), 
1093                     IdlePTDnewma[i] | PG_V);
1094         }
1095
1096         PT_UPDATES_FLUSH();
1097         
1098         IdlePTD = IdlePTDnew;
1099         IdlePDPT = IdlePDPTnew;
1100         IdlePDPTma = IdlePDPTnewma;
1101         
1102         HYPERVISOR_shared_info = (shared_info_t *)cur_space;
1103         cur_space += PAGE_SIZE;
1104
1105         xen_store = (struct xenstore_domain_interface *)cur_space;
1106         cur_space += PAGE_SIZE;
1107
1108         console_page = (char *)cur_space;
1109         cur_space += PAGE_SIZE;
1110         
1111         /*
1112          * shared_info is an unsigned long so this will randomly break if
1113          * it is allocated above 4GB - I guess people are used to that
1114          * sort of thing with Xen ... sigh
1115          */
1116         shinfo = xen_start_info->shared_info;
1117         PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
1118         
1119         xc_printf("#4\n");
1120
1121         xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
1122         PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
1123         console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
1124         PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
1125
1126         xc_printf("#5\n");
1127
1128         set_iopl.iopl = 1;
1129         PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
1130         xc_printf("#6\n");
1131 #if 0
1132         /* add page table for KERNBASE */
1133         xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), 
1134                             VTOM(cur_space) | PG_KERNEL);
1135         xen_flush_queue();
1136 #ifdef PAE      
1137         xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), 
1138                             VTOM(cur_space) | PG_V | PG_A);
1139 #else
1140         xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), 
1141                             VTOM(cur_space) | PG_V | PG_A);
1142 #endif  
1143         xen_flush_queue();
1144         cur_space += PAGE_SIZE;
1145         xc_printf("#6\n");
1146 #endif /* 0 */  
1147 #ifdef notyet
1148         if (xen_start_info->flags & SIF_INITDOMAIN) {
1149                 /* Map first megabyte */
1150                 for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) 
1151                         PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
1152                 xen_flush_queue();
1153         }
1154 #endif
1155         /*
1156          * re-map kernel text read-only
1157          *
1158          */
1159         for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
1160              i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
1161                 PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
1162         
1163         xc_printf("#7\n");
1164         physfree = VTOP(cur_space);
1165         init_first = physfree >> PAGE_SHIFT;
1166         IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
1167         IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
1168         setup_xen_features();
1169         xc_printf("#8, proc0kstack=%u\n", proc0kstack);
1170 }
1171
1172
1173 trap_info_t trap_table[] = {
1174         { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
1175         { 1,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
1176         { 3,   3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
1177         { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
1178         /* This is UPL on Linux and KPL on BSD */
1179         { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
1180         { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
1181         { 7,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
1182         /*
1183          * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
1184          *   no handler for double fault
1185          */
1186         { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
1187         {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
1188         {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
1189         {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
1190         {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
1191         {14,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
1192         {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
1193         {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
1194         {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
1195         {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
1196         {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
1197         {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
1198         {  0, 0,           0, 0 }
1199 };
1200
1201 /* Perform a multicall and check that individual calls succeeded. */
1202 int
1203 HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
1204 {
1205         int ret = 0;
1206         int i;
1207
1208         /* Perform the multicall. */
1209         PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
1210
1211         /* Check the results of individual hypercalls. */
1212         for (i = 0; i < nr_calls; i++)
1213                 if (__predict_false(call_list[i].result < 0))
1214                         ret++;
1215         if (__predict_false(ret > 0))
1216                 panic("%d multicall(s) failed: cpu %d\n",
1217                     ret, smp_processor_id());
1218
1219         /* If we didn't panic already, everything succeeded. */
1220         return (0);
1221 }
1222
1223 /********** CODE WORTH KEEPING ABOVE HERE *****************/ 
1224
1225 void xen_failsafe_handler(void);
1226
1227 void
1228 xen_failsafe_handler(void)
1229 {
1230
1231         panic("xen_failsafe_handler called!\n");
1232 }
1233
1234 void xen_handle_thread_switch(struct pcb *pcb);
1235
1236 /* This is called by cpu_switch() when switching threads. */
1237 /* The pcb arg refers to the process control block of the */
1238 /* next thread which is to run */
1239 void
1240 xen_handle_thread_switch(struct pcb *pcb)
1241 {
1242     uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
1243     uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
1244     multicall_entry_t mcl[3];
1245     int i = 0;
1246
1247     /* Notify Xen of task switch */
1248     mcl[i].op = __HYPERVISOR_stack_switch;
1249     mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
1250     mcl[i++].args[1] = (unsigned long)pcb;
1251
1252     /* Check for update of fsd */
1253     if (*a != *b || *(a+1) != *(b+1)) {
1254         mcl[i].op = __HYPERVISOR_update_descriptor;
1255         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1256         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1257     }    
1258
1259     a += 2;
1260     b += 2;
1261
1262     /* Check for update of gsd */
1263     if (*a != *b || *(a+1) != *(b+1)) {
1264         mcl[i].op = __HYPERVISOR_update_descriptor;
1265         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1266         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1267     }    
1268
1269     (void)HYPERVISOR_multicall(mcl, i);
1270 }