]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/i386/xen/xen_machdep.c
Adjust i386 Xen PV support for updated Xen interface files.
[FreeBSD/FreeBSD.git] / sys / i386 / xen / xen_machdep.c
1 /*
2  *
3  * Copyright (c) 2004 Christian Limpach.
4  * Copyright (c) 2004-2006,2008 Kip Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by Christian Limpach.
18  * 4. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bus.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/mount.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/kernel.h>
45 #include <sys/proc.h>
46 #include <sys/reboot.h>
47 #include <sys/rwlock.h>
48 #include <sys/sysproto.h>
49
50 #include <machine/xen/xen-os.h>
51
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <machine/segments.h>
55 #include <machine/pcb.h>
56 #include <machine/stdarg.h>
57 #include <machine/vmparam.h>
58 #include <machine/cpu.h>
59 #include <machine/intr_machdep.h>
60 #include <machine/md_var.h>
61 #include <machine/asmacros.h>
62
63
64
65 #include <xen/hypervisor.h>
66 #include <machine/xen/xenvar.h>
67 #include <machine/xen/xenfunc.h>
68 #include <machine/xen/xenpmap.h>
69 #include <machine/xen/xenfunc.h>
70 #include <xen/interface/memory.h>
71 #include <machine/xen/features.h>
72 #ifdef SMP
73 #include <machine/privatespace.h>
74 #endif
75
76
77 #include <vm/vm_page.h>
78
79
80 #define IDTVEC(name)    __CONCAT(X,name)
81
82 extern inthand_t
83 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
84         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
85         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
86         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
87         IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
88
89
90 int xendebug_flags; 
91 start_info_t *xen_start_info;
92 shared_info_t *HYPERVISOR_shared_info;
93 xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
94 xen_pfn_t *xen_phys_machine;
95 xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
96 xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
97 int preemptable, init_first;
98 extern unsigned int avail_space;
99
100 void ni_cli(void);
101 void ni_sti(void);
102
103
104 void
105 ni_cli(void)
106 {
107         CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
108         __asm__("pushl %edx;"
109                 "pushl %eax;"
110                 );
111         __cli();
112         __asm__("popl %eax;"
113                 "popl %edx;"
114                 );
115 }
116
117
118 void
119 ni_sti(void)
120 {
121         __asm__("pushl %edx;"
122                 "pushl %esi;"
123                 "pushl %eax;"
124                 );
125         __sti();
126         __asm__("popl %eax;"
127                 "popl %esi;"
128                 "popl %edx;"
129                 );
130 }
131
132 /*
133  * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
134  * suitable for the static env vars.
135  */
136 char *
137 xen_setbootenv(char *cmd_line)
138 {
139         char *cmd_line_next;
140     
141         /* Skip leading spaces */
142         for (; *cmd_line == ' '; cmd_line++);
143
144         printk("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
145
146         for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
147         return cmd_line;
148 }
149
150 static struct 
151 {
152         const char      *ev;
153         int             mask;
154 } howto_names[] = {
155         {"boot_askname",        RB_ASKNAME},
156         {"boot_single", RB_SINGLE},
157         {"boot_nosync", RB_NOSYNC},
158         {"boot_halt",   RB_ASKNAME},
159         {"boot_serial", RB_SERIAL},
160         {"boot_cdrom",  RB_CDROM},
161         {"boot_gdb",    RB_GDB},
162         {"boot_gdb_pause",      RB_RESERVED1},
163         {"boot_verbose",        RB_VERBOSE},
164         {"boot_multicons",      RB_MULTIPLE},
165         {NULL,  0}
166 };
167
168 int 
169 xen_boothowto(char *envp)
170 {
171         int i, howto = 0;
172
173         /* get equivalents from the environment */
174         for (i = 0; howto_names[i].ev != NULL; i++)
175                 if (getenv(howto_names[i].ev) != NULL)
176                         howto |= howto_names[i].mask;
177         return howto;
178 }
179
180 #define PRINTK_BUFSIZE 1024
181 void
182 printk(const char *fmt, ...)
183 {
184         __va_list ap;
185         int retval;
186         static char buf[PRINTK_BUFSIZE];
187
188         va_start(ap, fmt);
189         retval = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
190         va_end(ap);
191         buf[retval] = 0;
192         (void)HYPERVISOR_console_write(buf, retval);
193 }
194
195
196 #define XPQUEUE_SIZE 128
197
198 struct mmu_log {
199         char *file;
200         int line;
201 };
202
203 #ifdef SMP
204 /* per-cpu queues and indices */
205 #ifdef INVARIANTS
206 static struct mmu_log xpq_queue_log[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
207 #endif
208
209 static int xpq_idx[XEN_LEGACY_MAX_VCPUS];
210 static mmu_update_t xpq_queue[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
211
212 #define XPQ_QUEUE_LOG xpq_queue_log[vcpu]
213 #define XPQ_QUEUE xpq_queue[vcpu]
214 #define XPQ_IDX xpq_idx[vcpu]
215 #define SET_VCPU() int vcpu = smp_processor_id()
216 #else
217         
218 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
219 #ifdef INVARIANTS
220 static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
221 #endif
222 static int xpq_idx = 0;
223
224 #define XPQ_QUEUE_LOG xpq_queue_log
225 #define XPQ_QUEUE xpq_queue
226 #define XPQ_IDX xpq_idx
227 #define SET_VCPU()
228 #endif /* !SMP */
229
230 #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
231
232 #if 0
233 static void
234 xen_dump_queue(void)
235 {
236         int _xpq_idx = XPQ_IDX;
237         int i;
238
239         if (_xpq_idx <= 1)
240                 return;
241
242         printk("xen_dump_queue(): %u entries\n", _xpq_idx);
243         for (i = 0; i < _xpq_idx; i++) {
244                 printk(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
245         }
246 }
247 #endif
248
249
250 static __inline void
251 _xen_flush_queue(void)
252 {
253         SET_VCPU();
254         int _xpq_idx = XPQ_IDX;
255         int error, i;
256
257 #ifdef INVARIANTS
258         if (__predict_true(gdtset))
259                 CRITICAL_ASSERT(curthread);
260 #endif
261
262         XPQ_IDX = 0;
263         /* Make sure index is cleared first to avoid double updates. */
264         error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
265                                       _xpq_idx, NULL, DOMID_SELF);
266     
267 #if 0
268         if (__predict_true(gdtset))
269         for (i = _xpq_idx; i > 0;) {
270                 if (i >= 3) {
271                         CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
272                             "ptr: %lx val: %lx ptr: %lx",
273                             (XPQ_QUEUE[i-1].val & 0xffffffff),
274                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
275                             (XPQ_QUEUE[i-2].val & 0xffffffff),
276                             (XPQ_QUEUE[i-2].ptr & 0xffffffff),
277                             (XPQ_QUEUE[i-3].val & 0xffffffff),
278                             (XPQ_QUEUE[i-3].ptr & 0xffffffff));
279                             i -= 3;
280                 } else if (i == 2) {
281                         CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
282                             (XPQ_QUEUE[i-1].val & 0xffffffff),
283                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
284                             (XPQ_QUEUE[i-2].val & 0xffffffff),
285                             (XPQ_QUEUE[i-2].ptr & 0xffffffff));
286                         i = 0;
287                 } else {
288                         CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", 
289                             (XPQ_QUEUE[i-1].val & 0xffffffff),
290                             (XPQ_QUEUE[i-1].ptr & 0xffffffff));
291                         i = 0;
292                 }
293         }
294 #endif  
295         if (__predict_false(error < 0)) {
296                 for (i = 0; i < _xpq_idx; i++)
297                         printf("val: %llx ptr: %llx\n",
298                             XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
299                 panic("Failed to execute MMU updates: %d", error);
300         }
301
302 }
303
304 void
305 xen_flush_queue(void)
306 {
307         SET_VCPU();
308
309         if (__predict_true(gdtset))
310                 critical_enter();
311         if (XPQ_IDX != 0) _xen_flush_queue();
312         if (__predict_true(gdtset))
313                 critical_exit();
314 }
315
316 static __inline void
317 xen_increment_idx(void)
318 {
319         SET_VCPU();
320
321         XPQ_IDX++;
322         if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
323                 xen_flush_queue();
324 }
325
326 void
327 xen_check_queue(void)
328 {
329 #ifdef INVARIANTS
330         SET_VCPU();
331         
332         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
333 #endif
334 }
335
336 void
337 xen_invlpg(vm_offset_t va)
338 {
339         struct mmuext_op op;
340         op.cmd = MMUEXT_INVLPG_ALL;
341         op.arg1.linear_addr = va & ~PAGE_MASK;
342         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
343 }
344
345 void
346 xen_load_cr3(u_int val)
347 {
348         struct mmuext_op op;
349 #ifdef INVARIANTS
350         SET_VCPU();
351         
352         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
353 #endif
354         op.cmd = MMUEXT_NEW_BASEPTR;
355         op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
356         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
357 }
358
359 #ifdef KTR
360 static __inline u_int
361 rebp(void)
362 {
363         u_int   data;
364
365         __asm __volatile("movl 4(%%ebp),%0" : "=r" (data));     
366         return (data);
367 }
368 #endif
369
370 u_int
371 read_eflags(void)
372 {
373         vcpu_info_t *_vcpu;
374         u_int eflags;
375
376         eflags = _read_eflags();
377         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; 
378         if (_vcpu->evtchn_upcall_mask)
379                 eflags &= ~PSL_I;
380
381         return (eflags);
382 }
383
384 void
385 write_eflags(u_int eflags)
386 {
387         u_int intr;
388
389         CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
390         intr = ((eflags & PSL_I) == 0);
391         __restore_flags(intr);
392         _write_eflags(eflags);
393 }
394
395 void
396 xen_cli(void)
397 {
398         CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
399         __cli();
400 }
401
402 void
403 xen_sti(void)
404 {
405         CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
406         __sti();
407 }
408
409 u_int
410 xen_rcr2(void)
411 {
412
413         return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
414 }
415
416 void
417 _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
418 {
419         SET_VCPU();
420         
421         if (__predict_true(gdtset))
422                 critical_enter();
423         XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
424         XPQ_QUEUE[XPQ_IDX].val = pfn;
425 #ifdef INVARIANTS
426         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
427         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
428 #endif          
429         xen_increment_idx();
430         if (__predict_true(gdtset))
431                 critical_exit();
432 }
433
434 extern struct rwlock pvh_global_lock;
435
436 void
437 _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
438 {
439         SET_VCPU();
440
441         if (__predict_true(gdtset))     
442                 rw_assert(&pvh_global_lock, RA_WLOCKED);
443
444         KASSERT((ptr & 7) == 0, ("misaligned update"));
445         
446         if (__predict_true(gdtset))
447                 critical_enter();
448         
449         XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
450         XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
451 #ifdef INVARIANTS
452         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
453         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
454 #endif  
455         xen_increment_idx();
456         if (__predict_true(gdtset))
457                 critical_exit();
458 }
459
460 void 
461 xen_pgdpt_pin(vm_paddr_t ma)
462 {
463         struct mmuext_op op;
464         op.cmd = MMUEXT_PIN_L3_TABLE;
465         op.arg1.mfn = ma >> PAGE_SHIFT;
466         xen_flush_queue();
467         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
468 }
469
470 void 
471 xen_pgd_pin(vm_paddr_t ma)
472 {
473         struct mmuext_op op;
474         op.cmd = MMUEXT_PIN_L2_TABLE;
475         op.arg1.mfn = ma >> PAGE_SHIFT;
476         xen_flush_queue();
477         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
478 }
479
480 void 
481 xen_pgd_unpin(vm_paddr_t ma)
482 {
483         struct mmuext_op op;
484         op.cmd = MMUEXT_UNPIN_TABLE;
485         op.arg1.mfn = ma >> PAGE_SHIFT;
486         xen_flush_queue();
487         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
488 }
489
490 void 
491 xen_pt_pin(vm_paddr_t ma)
492 {
493         struct mmuext_op op;
494         op.cmd = MMUEXT_PIN_L1_TABLE;
495         op.arg1.mfn = ma >> PAGE_SHIFT;
496         xen_flush_queue();
497         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
498 }
499
500 void 
501 xen_pt_unpin(vm_paddr_t ma)
502 {
503         struct mmuext_op op;
504         op.cmd = MMUEXT_UNPIN_TABLE;
505         op.arg1.mfn = ma >> PAGE_SHIFT;
506         xen_flush_queue();
507         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
508 }
509
510 void 
511 xen_set_ldt(vm_paddr_t ptr, unsigned long len)
512 {
513         struct mmuext_op op;
514         op.cmd = MMUEXT_SET_LDT;
515         op.arg1.linear_addr = ptr;
516         op.arg2.nr_ents = len;
517         xen_flush_queue();
518         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
519 }
520
521 void xen_tlb_flush(void)
522 {
523         struct mmuext_op op;
524         op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
525         xen_flush_queue();
526         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
527 }
528
529 void
530 xen_update_descriptor(union descriptor *table, union descriptor *entry)
531 {
532         vm_paddr_t pa;
533         pt_entry_t *ptp;
534
535         ptp = vtopte((vm_offset_t)table);
536         pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
537         if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
538                 panic("HYPERVISOR_update_descriptor failed\n");
539 }
540
541
542 #if 0
543 /*
544  * Bitmap is indexed by page number. If bit is set, the page is part of a
545  * xen_create_contiguous_region() area of memory.
546  */
547 unsigned long *contiguous_bitmap;
548
549 static void 
550 contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
551 {
552         unsigned long start_off, end_off, curr_idx, end_idx;
553
554         curr_idx  = first_page / BITS_PER_LONG;
555         start_off = first_page & (BITS_PER_LONG-1);
556         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
557         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
558
559         if (curr_idx == end_idx) {
560                 contiguous_bitmap[curr_idx] |=
561                         ((1UL<<end_off)-1) & -(1UL<<start_off);
562         } else {
563                 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
564                 while ( ++curr_idx < end_idx )
565                         contiguous_bitmap[curr_idx] = ~0UL;
566                 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
567         }
568 }
569
570 static void 
571 contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
572 {
573         unsigned long start_off, end_off, curr_idx, end_idx;
574
575         curr_idx  = first_page / BITS_PER_LONG;
576         start_off = first_page & (BITS_PER_LONG-1);
577         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
578         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
579
580         if (curr_idx == end_idx) {
581                 contiguous_bitmap[curr_idx] &=
582                         -(1UL<<end_off) | ((1UL<<start_off)-1);
583         } else {
584                 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
585                 while ( ++curr_idx != end_idx )
586                         contiguous_bitmap[curr_idx] = 0;
587                 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
588         }
589 }
590 #endif
591
592 /* Ensure multi-page extents are contiguous in machine memory. */
593 int 
594 xen_create_contiguous_region(vm_page_t pages, int npages)
595 {
596         unsigned long  mfn, i, flags;
597         int order;
598         struct xen_memory_reservation reservation = {
599                 .nr_extents   = 1,
600                 .extent_order = 0,
601                 .domid        = DOMID_SELF
602         };
603         set_xen_guest_handle(reservation.extent_start, &mfn);
604         
605         balloon_lock(flags);
606
607         /* can currently only handle power of two allocation */
608         PANIC_IF(ffs(npages) != fls(npages));
609
610         /* 0. determine order */
611         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
612         
613         /* 1. give away machine pages. */
614         for (i = 0; i < (1 << order); i++) {
615                 int pfn;
616                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
617                 mfn = PFNTOMFN(pfn);
618                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
619                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
620         }
621
622
623         /* 2. Get a new contiguous memory extent. */
624         reservation.extent_order = order;
625         /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not 
626          * running with a broxen driver XXXEN
627          */
628         reservation.address_bits = 31; 
629         if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
630                 goto fail;
631
632         /* 3. Map the new extent in place of old pages. */
633         for (i = 0; i < (1 << order); i++) {
634                 int pfn;
635                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
636                 xen_machphys_update(mfn+i, pfn);
637                 PFNTOMFN(pfn) = mfn+i;
638         }
639
640         xen_tlb_flush();
641
642 #if 0
643         contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
644 #endif
645
646         balloon_unlock(flags);
647
648         return 0;
649
650  fail:
651         reservation.extent_order = 0;
652         reservation.address_bits = 0;
653
654         for (i = 0; i < (1 << order); i++) {
655                 int pfn;
656                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
657                 PANIC_IF(HYPERVISOR_memory_op(
658                         XENMEM_increase_reservation, &reservation) != 1);
659                 xen_machphys_update(mfn, pfn);
660                 PFNTOMFN(pfn) = mfn;
661         }
662
663         xen_tlb_flush();
664
665         balloon_unlock(flags);
666
667         return ENOMEM;
668 }
669
670 void 
671 xen_destroy_contiguous_region(void *addr, int npages)
672 {
673         unsigned long  mfn, i, flags, order, pfn0;
674         struct xen_memory_reservation reservation = {
675                 .nr_extents   = 1,
676                 .extent_order = 0,
677                 .domid        = DOMID_SELF
678         };
679         set_xen_guest_handle(reservation.extent_start, &mfn);
680         
681         pfn0 = vtophys(addr) >> PAGE_SHIFT;
682 #if 0
683         scrub_pages(vstart, 1 << order);
684 #endif
685         /* can currently only handle power of two allocation */
686         PANIC_IF(ffs(npages) != fls(npages));
687
688         /* 0. determine order */
689         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
690
691         balloon_lock(flags);
692
693 #if 0
694         contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
695 #endif
696
697         /* 1. Zap current PTEs, giving away the underlying pages. */
698         for (i = 0; i < (1 << order); i++) {
699                 int pfn;
700                 uint64_t new_val = 0;
701                 pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
702
703                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
704                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
705                 PANIC_IF(HYPERVISOR_memory_op(
706                         XENMEM_decrease_reservation, &reservation) != 1);
707         }
708
709         /* 2. Map new pages in place of old pages. */
710         for (i = 0; i < (1 << order); i++) {
711                 int pfn;
712                 uint64_t new_val;
713                 pfn = pfn0 + i;
714                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
715                 
716                 new_val = mfn << PAGE_SHIFT;
717                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), 
718                                                       new_val, PG_KERNEL));
719                 xen_machphys_update(mfn, pfn);
720                 PFNTOMFN(pfn) = mfn;
721         }
722
723         xen_tlb_flush();
724
725         balloon_unlock(flags);
726 }
727
728 extern  vm_offset_t     proc0kstack;
729 extern int vm86paddr, vm86phystk;
730 char *bootmem_start, *bootmem_current, *bootmem_end;
731
732 pteinfo_t *pteinfo_list;
733 void initvalues(start_info_t *startinfo);
734
735 struct xenstore_domain_interface;
736 extern struct xenstore_domain_interface *xen_store;
737
738 char *console_page;
739
740 void *
741 bootmem_alloc(unsigned int size) 
742 {
743         char *retptr;
744         
745         retptr = bootmem_current;
746         PANIC_IF(retptr + size > bootmem_end);
747         bootmem_current += size;
748
749         return retptr;
750 }
751
752 void 
753 bootmem_free(void *ptr, unsigned int size) 
754 {
755         char *tptr;
756         
757         tptr = ptr;
758         PANIC_IF(tptr != bootmem_current - size ||
759                 bootmem_current - size < bootmem_start);        
760
761         bootmem_current -= size;
762 }
763
764 #if 0
765 static vm_paddr_t
766 xpmap_mtop2(vm_paddr_t mpa)
767 {
768         return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
769             ) | (mpa & ~PG_FRAME);
770 }
771
772 static pd_entry_t 
773 xpmap_get_bootpde(vm_paddr_t va)
774 {
775
776         return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
777 }
778
779 static pd_entry_t
780 xpmap_get_vbootpde(vm_paddr_t va)
781 {
782         pd_entry_t pde;
783
784         pde = xpmap_get_bootpde(va);
785         if ((pde & PG_V) == 0)
786                 return (pde & ~PG_FRAME);
787         return (pde & ~PG_FRAME) |
788                 (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
789 }
790
791 static pt_entry_t 8*
792 xpmap_get_bootptep(vm_paddr_t va)
793 {
794         pd_entry_t pde;
795
796         pde = xpmap_get_vbootpde(va);
797         if ((pde & PG_V) == 0)
798                 return (void *)-1;
799 #define PT_MASK         0x003ff000      /* page table address bits */
800         return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
801 }
802
803 static pt_entry_t
804 xpmap_get_bootpte(vm_paddr_t va)
805 {
806
807         return xpmap_get_bootptep(va)[0];
808 }
809 #endif
810
811
812 #ifdef ADD_ISA_HOLE
813 static void
814 shift_phys_machine(unsigned long *phys_machine, int nr_pages)
815 {
816
817         unsigned long *tmp_page, *current_page, *next_page;
818         int i;
819
820         tmp_page = bootmem_alloc(PAGE_SIZE);
821         current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));  
822         next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));  
823         bcopy(phys_machine, tmp_page, PAGE_SIZE);
824
825         while (current_page > phys_machine) { 
826                 /*  save next page */
827                 bcopy(next_page, tmp_page, PAGE_SIZE);
828                 /* shift down page */
829                 bcopy(current_page, next_page, PAGE_SIZE);
830                 /*  finish swap */
831                 bcopy(tmp_page, current_page, PAGE_SIZE);
832           
833                 current_page -= (PAGE_SIZE/sizeof(unsigned long));
834                 next_page -= (PAGE_SIZE/sizeof(unsigned long));
835         }
836         bootmem_free(tmp_page, PAGE_SIZE);      
837         
838         for (i = 0; i < nr_pages; i++) {
839                 xen_machphys_update(phys_machine[i], i);
840         }
841         memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
842
843 }
844 #endif /* ADD_ISA_HOLE */
845
846 /*
847  * Build a directory of the pages that make up our Physical to Machine
848  * mapping table. The Xen suspend/restore code uses this to find our
849  * mapping table.
850  */
851 static void
852 init_frame_list_list(void *arg)
853 {
854         unsigned long nr_pages = xen_start_info->nr_pages;
855 #define FPP     (PAGE_SIZE/sizeof(xen_pfn_t))
856         int i, j, k;
857
858         xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
859         for (i = 0, j = 0, k = -1; i < nr_pages;
860              i += FPP, j++) {
861                 if ((j & (FPP - 1)) == 0) {
862                         k++;
863                         xen_pfn_to_mfn_frame_list[k] =
864                                 malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
865                         xen_pfn_to_mfn_frame_list_list[k] =
866                                 VTOMFN(xen_pfn_to_mfn_frame_list[k]);
867                         j = 0;
868                 }
869                 xen_pfn_to_mfn_frame_list[k][j] = 
870                         VTOMFN(&xen_phys_machine[i]);
871         }
872
873         HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
874         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
875                 = VTOMFN(xen_pfn_to_mfn_frame_list_list);
876 }       
877 SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
878
879 extern unsigned long physfree;
880
881 int pdir, curoffset;
882 extern int nkpt;
883
884 extern uint32_t kernbase;
885
886 void
887 initvalues(start_info_t *startinfo)
888
889         vm_offset_t cur_space, cur_space_pt;
890         struct physdev_set_iopl set_iopl;
891         
892         int l3_pages, l2_pages, l1_pages, offset;
893         vm_paddr_t console_page_ma, xen_store_ma;
894         vm_offset_t tmpva;
895         vm_paddr_t shinfo;
896 #ifdef PAE
897         vm_paddr_t IdlePDPTma, IdlePDPTnewma;
898         vm_paddr_t IdlePTDnewma[4];
899         pd_entry_t *IdlePDPTnew, *IdlePTDnew;
900         vm_paddr_t IdlePTDma[4];
901 #else
902         vm_paddr_t IdlePTDma[1];
903 #endif
904         unsigned long i;
905         int ncpus = MAXCPU;
906
907         nkpt = min(
908                 min(
909                         max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
910                     NPGPTD*NPDEPG - KPTDI),
911                     (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
912
913         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);      
914 #ifdef notyet
915         /*
916          * need to install handler
917          */
918         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);       
919 #endif  
920         xen_start_info = startinfo;
921         xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
922
923         IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
924         l1_pages = 0;
925         
926 #ifdef PAE
927         l3_pages = 1;
928         l2_pages = 0;
929         IdlePDPT = (pd_entry_t *)startinfo->pt_base;
930         IdlePDPTma = VTOM(startinfo->pt_base);
931         for (i = (KERNBASE >> 30);
932              (i < 4) && (IdlePDPT[i] != 0); i++)
933                         l2_pages++;
934         /*
935          * Note that only one page directory has been allocated at this point.
936          * Thus, if KERNBASE
937          */
938         for (i = 0; i < l2_pages; i++)
939                 IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
940
941         l2_pages = (l2_pages == 0) ? 1 : l2_pages;
942 #else   
943         l3_pages = 0;
944         l2_pages = 1;
945 #endif
946         for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
947              (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
948                 
949                 if (IdlePTD[i] == 0)
950                         break;
951                 l1_pages++;
952         }
953
954         /* number of pages allocated after the pts + 1*/;
955         cur_space = xen_start_info->pt_base +
956             (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
957
958         printk("initvalues(): wooh - availmem=%x,%x\n", avail_space, cur_space);
959
960         printk("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n",
961             KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
962             xen_start_info->nr_pt_frames);
963         xendebug_flags = 0; /* 0xffffffff; */
964
965 #ifdef ADD_ISA_HOLE
966         shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
967 #endif
968         XENPRINTF("IdlePTD %p\n", IdlePTD);
969         XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
970                   "mod_start: 0x%lx mod_len: 0x%lx\n",
971                   xen_start_info->nr_pages, xen_start_info->shared_info, 
972                   xen_start_info->flags, xen_start_info->pt_base, 
973                   xen_start_info->mod_start, xen_start_info->mod_len);
974
975 #ifdef PAE
976         IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
977         bzero(IdlePDPTnew, PAGE_SIZE);
978
979         IdlePDPTnewma =  VTOM(IdlePDPTnew);
980         IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
981         bzero(IdlePTDnew, 4*PAGE_SIZE);
982
983         for (i = 0; i < 4; i++) 
984                 IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
985         /*
986          * L3
987          *
988          * Copy the 4 machine addresses of the new PTDs in to the PDPT
989          * 
990          */
991         for (i = 0; i < 4; i++)
992                 IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
993
994         __asm__("nop;");
995         /*
996          *
997          * re-map the new PDPT read-only
998          */
999         PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
1000         /*
1001          * 
1002          * Unpin the current PDPT
1003          */
1004         xen_pt_unpin(IdlePDPTma);
1005
1006 #endif  /* PAE */
1007
1008         /* Map proc0's KSTACK */
1009         proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
1010         printk("proc0kstack=%u\n", proc0kstack);
1011
1012         /* vm86/bios stack */
1013         cur_space += PAGE_SIZE;
1014
1015         /* Map space for the vm86 region */
1016         vm86paddr = (vm_offset_t)cur_space;
1017         cur_space += (PAGE_SIZE * 3);
1018
1019         /* allocate 4 pages for bootmem allocator */
1020         bootmem_start = bootmem_current = (char *)cur_space;
1021         cur_space += (4 * PAGE_SIZE);
1022         bootmem_end = (char *)cur_space;
1023         
1024         /* allocate pages for gdt */
1025         gdt = (union descriptor *)cur_space;
1026         cur_space += PAGE_SIZE*ncpus;
1027
1028         /* allocate page for ldt */
1029         ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
1030         cur_space += PAGE_SIZE;
1031         
1032         /* unmap remaining pages from initial chunk
1033          *
1034          */
1035         for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
1036              tmpva += PAGE_SIZE) {
1037                 bzero((char *)tmpva, PAGE_SIZE);
1038                 PT_SET_MA(tmpva, (vm_paddr_t)0);
1039         }
1040
1041         PT_UPDATES_FLUSH();
1042
1043         memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
1044             ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
1045             l1_pages*sizeof(pt_entry_t));
1046
1047         for (i = 0; i < 4; i++) {
1048                 PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
1049                     IdlePTDnewma[i] | PG_V);
1050         }
1051         xen_load_cr3(VTOP(IdlePDPTnew));
1052         xen_pgdpt_pin(VTOM(IdlePDPTnew));
1053
1054         /* allocate remainder of nkpt pages */
1055         cur_space_pt = cur_space;
1056         for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
1057              i++, cur_space += PAGE_SIZE) {
1058                 pdir = (offset + i) / NPDEPG;
1059                 curoffset = ((offset + i) % NPDEPG);
1060                 if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
1061                         break;
1062
1063                 /*
1064                  * make sure that all the initial page table pages
1065                  * have been zeroed
1066                  */
1067                 PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
1068                 bzero((char *)cur_space, PAGE_SIZE);
1069                 PT_SET_MA(cur_space, (vm_paddr_t)0);
1070                 xen_pt_pin(VTOM(cur_space));
1071                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1072                         curoffset*sizeof(vm_paddr_t)), 
1073                     VTOM(cur_space) | PG_KERNEL);
1074                 PT_UPDATES_FLUSH();
1075         }
1076         
1077         for (i = 0; i < 4; i++) {
1078                 pdir = (PTDPTDI + i) / NPDEPG;
1079                 curoffset = (PTDPTDI + i) % NPDEPG;
1080
1081                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1082                         curoffset*sizeof(vm_paddr_t)), 
1083                     IdlePTDnewma[i] | PG_V);
1084         }
1085
1086         PT_UPDATES_FLUSH();
1087         
1088         IdlePTD = IdlePTDnew;
1089         IdlePDPT = IdlePDPTnew;
1090         IdlePDPTma = IdlePDPTnewma;
1091         
1092         HYPERVISOR_shared_info = (shared_info_t *)cur_space;
1093         cur_space += PAGE_SIZE;
1094
1095         xen_store = (struct xenstore_domain_interface *)cur_space;
1096         cur_space += PAGE_SIZE;
1097
1098         console_page = (char *)cur_space;
1099         cur_space += PAGE_SIZE;
1100         
1101         /*
1102          * shared_info is an unsigned long so this will randomly break if
1103          * it is allocated above 4GB - I guess people are used to that
1104          * sort of thing with Xen ... sigh
1105          */
1106         shinfo = xen_start_info->shared_info;
1107         PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
1108         
1109         printk("#4\n");
1110
1111         xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
1112         PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
1113         console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
1114         PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
1115
1116         printk("#5\n");
1117
1118         set_iopl.iopl = 1;
1119         PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
1120         printk("#6\n");
1121 #if 0
1122         /* add page table for KERNBASE */
1123         xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), 
1124                             VTOM(cur_space) | PG_KERNEL);
1125         xen_flush_queue();
1126 #ifdef PAE      
1127         xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), 
1128                             VTOM(cur_space) | PG_V | PG_A);
1129 #else
1130         xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), 
1131                             VTOM(cur_space) | PG_V | PG_A);
1132 #endif  
1133         xen_flush_queue();
1134         cur_space += PAGE_SIZE;
1135         printk("#6\n");
1136 #endif /* 0 */  
1137 #ifdef notyet
1138         if (xen_start_info->flags & SIF_INITDOMAIN) {
1139                 /* Map first megabyte */
1140                 for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) 
1141                         PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
1142                 xen_flush_queue();
1143         }
1144 #endif
1145         /*
1146          * re-map kernel text read-only
1147          *
1148          */
1149         for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
1150              i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
1151                 PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
1152         
1153         printk("#7\n");
1154         physfree = VTOP(cur_space);
1155         init_first = physfree >> PAGE_SHIFT;
1156         IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
1157         IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
1158         setup_xen_features();
1159         printk("#8, proc0kstack=%u\n", proc0kstack);
1160 }
1161
1162
1163 trap_info_t trap_table[] = {
1164         { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
1165         { 1,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
1166         { 3,   3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
1167         { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
1168         /* This is UPL on Linux and KPL on BSD */
1169         { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
1170         { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
1171         { 7,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
1172         /*
1173          * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
1174          *   no handler for double fault
1175          */
1176         { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
1177         {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
1178         {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
1179         {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
1180         {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
1181         {14,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
1182         {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
1183         {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
1184         {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
1185         {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
1186         {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
1187         {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
1188         {  0, 0,           0, 0 }
1189 };
1190
1191 /* Perform a multicall and check that individual calls succeeded. */
1192 int
1193 HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
1194 {
1195         int ret = 0;
1196         int i;
1197
1198         /* Perform the multicall. */
1199         PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
1200
1201         /* Check the results of individual hypercalls. */
1202         for (i = 0; i < nr_calls; i++)
1203                 if (unlikely(call_list[i].result < 0))
1204                         ret++;
1205         if (unlikely(ret > 0))
1206                 panic("%d multicall(s) failed: cpu %d\n",
1207                     ret, smp_processor_id());
1208
1209         /* If we didn't panic already, everything succeeded. */
1210         return (0);
1211 }
1212
1213 /********** CODE WORTH KEEPING ABOVE HERE *****************/ 
1214
1215 void xen_failsafe_handler(void);
1216
1217 void
1218 xen_failsafe_handler(void)
1219 {
1220
1221         panic("xen_failsafe_handler called!\n");
1222 }
1223
1224 void xen_handle_thread_switch(struct pcb *pcb);
1225
1226 /* This is called by cpu_switch() when switching threads. */
1227 /* The pcb arg refers to the process control block of the */
1228 /* next thread which is to run */
1229 void
1230 xen_handle_thread_switch(struct pcb *pcb)
1231 {
1232     uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
1233     uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
1234     multicall_entry_t mcl[3];
1235     int i = 0;
1236
1237     /* Notify Xen of task switch */
1238     mcl[i].op = __HYPERVISOR_stack_switch;
1239     mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
1240     mcl[i++].args[1] = (unsigned long)pcb;
1241
1242     /* Check for update of fsd */
1243     if (*a != *b || *(a+1) != *(b+1)) {
1244         mcl[i].op = __HYPERVISOR_update_descriptor;
1245         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1246         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1247     }    
1248
1249     a += 2;
1250     b += 2;
1251
1252     /* Check for update of gsd */
1253     if (*a != *b || *(a+1) != *(b+1)) {
1254         mcl[i].op = __HYPERVISOR_update_descriptor;
1255         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1256         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1257     }    
1258
1259     (void)HYPERVISOR_multicall(mcl, i);
1260 }