]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - sys/i386/xen/xen_machdep.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / sys / i386 / xen / xen_machdep.c
1 /*
2  *
3  * Copyright (c) 2004 Christian Limpach.
4  * Copyright (c) 2004-2006,2008 Kip Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by Christian Limpach.
18  * 4. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bus.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/mount.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/kernel.h>
45 #include <sys/proc.h>
46 #include <sys/reboot.h>
47 #include <sys/rwlock.h>
48 #include <sys/sysproto.h>
49 #include <sys/boot.h>
50
51 #include <xen/xen-os.h>
52
53 #include <vm/vm.h>
54 #include <vm/pmap.h>
55 #include <machine/segments.h>
56 #include <machine/pcb.h>
57 #include <machine/stdarg.h>
58 #include <machine/vmparam.h>
59 #include <machine/cpu.h>
60 #include <machine/intr_machdep.h>
61 #include <machine/md_var.h>
62 #include <machine/asmacros.h>
63
64
65
66 #include <xen/hypervisor.h>
67 #include <machine/xen/xenvar.h>
68 #include <machine/xen/xenfunc.h>
69 #include <machine/xen/xenpmap.h>
70 #include <machine/xen/xenfunc.h>
71 #include <xen/interface/memory.h>
72 #include <machine/xen/features.h>
73 #ifdef SMP
74 #include <machine/privatespace.h>
75 #endif
76
77
78 #include <vm/vm_page.h>
79
80
81 #define IDTVEC(name)    __CONCAT(X,name)
82
83 extern inthand_t
84 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
85         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
86         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
87         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
88         IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
89
90
91 int xendebug_flags; 
92 start_info_t *xen_start_info;
93 shared_info_t *HYPERVISOR_shared_info;
94 xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
95 xen_pfn_t *xen_phys_machine;
96 xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
97 xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
98 int preemptable, init_first;
99 extern unsigned int avail_space;
100 int xen_vector_callback_enabled = 0;
101 enum xen_domain_type xen_domain_type = XEN_PV_DOMAIN;
102
103 void ni_cli(void);
104 void ni_sti(void);
105
106
107 void
108 ni_cli(void)
109 {
110         CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
111         __asm__("pushl %edx;"
112                 "pushl %eax;"
113                 );
114         __cli();
115         __asm__("popl %eax;"
116                 "popl %edx;"
117                 );
118 }
119
120
121 void
122 ni_sti(void)
123 {
124         __asm__("pushl %edx;"
125                 "pushl %esi;"
126                 "pushl %eax;"
127                 );
128         __sti();
129         __asm__("popl %eax;"
130                 "popl %esi;"
131                 "popl %edx;"
132                 );
133 }
134
135 void
136 force_evtchn_callback(void)
137 {
138     (void)HYPERVISOR_xen_version(0, NULL);
139 }
140
141 /*
142  * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
143  * suitable for the static env vars.
144  */
145 char *
146 xen_setbootenv(char *cmd_line)
147 {
148         char *cmd_line_next;
149     
150         /* Skip leading spaces */
151         for (; *cmd_line == ' '; cmd_line++);
152
153         xc_printf("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
154
155         for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
156         return cmd_line;
157 }
158
159 int 
160 xen_boothowto(char *envp)
161 {
162         int i, howto = 0;
163
164         /* get equivalents from the environment */
165         for (i = 0; howto_names[i].ev != NULL; i++)
166                 if (getenv(howto_names[i].ev) != NULL)
167                         howto |= howto_names[i].mask;
168         return howto;
169 }
170
171 #define XC_PRINTF_BUFSIZE 1024
172 void
173 xc_printf(const char *fmt, ...)
174 {
175         __va_list ap;
176         int retval;
177         static char buf[XC_PRINTF_BUFSIZE];
178
179         va_start(ap, fmt);
180         retval = vsnprintf(buf, XC_PRINTF_BUFSIZE - 1, fmt, ap);
181         va_end(ap);
182         buf[retval] = 0;
183         (void)HYPERVISOR_console_write(buf, retval);
184 }
185
186
187 #define XPQUEUE_SIZE 128
188
189 struct mmu_log {
190         char *file;
191         int line;
192 };
193
194 #ifdef SMP
195 /* per-cpu queues and indices */
196 #ifdef INVARIANTS
197 static struct mmu_log xpq_queue_log[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
198 #endif
199
200 static int xpq_idx[XEN_LEGACY_MAX_VCPUS];
201 static mmu_update_t xpq_queue[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
202
203 #define XPQ_QUEUE_LOG xpq_queue_log[vcpu]
204 #define XPQ_QUEUE xpq_queue[vcpu]
205 #define XPQ_IDX xpq_idx[vcpu]
206 #define SET_VCPU() int vcpu = smp_processor_id()
207 #else
208         
209 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
210 #ifdef INVARIANTS
211 static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
212 #endif
213 static int xpq_idx = 0;
214
215 #define XPQ_QUEUE_LOG xpq_queue_log
216 #define XPQ_QUEUE xpq_queue
217 #define XPQ_IDX xpq_idx
218 #define SET_VCPU()
219 #endif /* !SMP */
220
221 #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
222
223 #if 0
224 static void
225 xen_dump_queue(void)
226 {
227         int _xpq_idx = XPQ_IDX;
228         int i;
229
230         if (_xpq_idx <= 1)
231                 return;
232
233         xc_printf("xen_dump_queue(): %u entries\n", _xpq_idx);
234         for (i = 0; i < _xpq_idx; i++) {
235                 xc_printf(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val,
236                     XPQ_QUEUE[i].ptr);
237         }
238 }
239 #endif
240
241
242 static __inline void
243 _xen_flush_queue(void)
244 {
245         SET_VCPU();
246         int _xpq_idx = XPQ_IDX;
247         int error, i;
248
249 #ifdef INVARIANTS
250         if (__predict_true(gdtset))
251                 CRITICAL_ASSERT(curthread);
252 #endif
253
254         XPQ_IDX = 0;
255         /* Make sure index is cleared first to avoid double updates. */
256         error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
257                                       _xpq_idx, NULL, DOMID_SELF);
258     
259 #if 0
260         if (__predict_true(gdtset))
261         for (i = _xpq_idx; i > 0;) {
262                 if (i >= 3) {
263                         CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
264                             "ptr: %lx val: %lx ptr: %lx",
265                             (XPQ_QUEUE[i-1].val & 0xffffffff),
266                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
267                             (XPQ_QUEUE[i-2].val & 0xffffffff),
268                             (XPQ_QUEUE[i-2].ptr & 0xffffffff),
269                             (XPQ_QUEUE[i-3].val & 0xffffffff),
270                             (XPQ_QUEUE[i-3].ptr & 0xffffffff));
271                             i -= 3;
272                 } else if (i == 2) {
273                         CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
274                             (XPQ_QUEUE[i-1].val & 0xffffffff),
275                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
276                             (XPQ_QUEUE[i-2].val & 0xffffffff),
277                             (XPQ_QUEUE[i-2].ptr & 0xffffffff));
278                         i = 0;
279                 } else {
280                         CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", 
281                             (XPQ_QUEUE[i-1].val & 0xffffffff),
282                             (XPQ_QUEUE[i-1].ptr & 0xffffffff));
283                         i = 0;
284                 }
285         }
286 #endif  
287         if (__predict_false(error < 0)) {
288                 for (i = 0; i < _xpq_idx; i++)
289                         printf("val: %llx ptr: %llx\n",
290                             XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
291                 panic("Failed to execute MMU updates: %d", error);
292         }
293
294 }
295
296 void
297 xen_flush_queue(void)
298 {
299         SET_VCPU();
300
301         if (__predict_true(gdtset))
302                 critical_enter();
303         if (XPQ_IDX != 0) _xen_flush_queue();
304         if (__predict_true(gdtset))
305                 critical_exit();
306 }
307
308 static __inline void
309 xen_increment_idx(void)
310 {
311         SET_VCPU();
312
313         XPQ_IDX++;
314         if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
315                 xen_flush_queue();
316 }
317
318 void
319 xen_check_queue(void)
320 {
321 #ifdef INVARIANTS
322         SET_VCPU();
323         
324         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
325 #endif
326 }
327
328 void
329 xen_invlpg(vm_offset_t va)
330 {
331         struct mmuext_op op;
332         op.cmd = MMUEXT_INVLPG_ALL;
333         op.arg1.linear_addr = va & ~PAGE_MASK;
334         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
335 }
336
337 void
338 xen_load_cr3(u_int val)
339 {
340         struct mmuext_op op;
341 #ifdef INVARIANTS
342         SET_VCPU();
343         
344         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
345 #endif
346         op.cmd = MMUEXT_NEW_BASEPTR;
347         op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
348         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
349 }
350
351 #ifdef KTR
352 static __inline u_int
353 rebp(void)
354 {
355         u_int   data;
356
357         __asm __volatile("movl 4(%%ebp),%0" : "=r" (data));     
358         return (data);
359 }
360 #endif
361
362 u_int
363 read_eflags(void)
364 {
365         vcpu_info_t *_vcpu;
366         u_int eflags;
367
368         eflags = _read_eflags();
369         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; 
370         if (_vcpu->evtchn_upcall_mask)
371                 eflags &= ~PSL_I;
372
373         return (eflags);
374 }
375
376 void
377 write_eflags(u_int eflags)
378 {
379         u_int intr;
380
381         CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
382         intr = ((eflags & PSL_I) == 0);
383         __restore_flags(intr);
384         _write_eflags(eflags);
385 }
386
387 void
388 xen_cli(void)
389 {
390         CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
391         __cli();
392 }
393
394 void
395 xen_sti(void)
396 {
397         CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
398         __sti();
399 }
400
401 u_int
402 xen_rcr2(void)
403 {
404
405         return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
406 }
407
408 void
409 _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
410 {
411         SET_VCPU();
412         
413         if (__predict_true(gdtset))
414                 critical_enter();
415         XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
416         XPQ_QUEUE[XPQ_IDX].val = pfn;
417 #ifdef INVARIANTS
418         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
419         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
420 #endif          
421         xen_increment_idx();
422         if (__predict_true(gdtset))
423                 critical_exit();
424 }
425
426 extern struct rwlock pvh_global_lock;
427
428 void
429 _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
430 {
431         SET_VCPU();
432
433         if (__predict_true(gdtset))     
434                 rw_assert(&pvh_global_lock, RA_WLOCKED);
435
436         KASSERT((ptr & 7) == 0, ("misaligned update"));
437         
438         if (__predict_true(gdtset))
439                 critical_enter();
440         
441         XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
442         XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
443 #ifdef INVARIANTS
444         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
445         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
446 #endif  
447         xen_increment_idx();
448         if (__predict_true(gdtset))
449                 critical_exit();
450 }
451
452 void 
453 xen_pgdpt_pin(vm_paddr_t ma)
454 {
455         struct mmuext_op op;
456         op.cmd = MMUEXT_PIN_L3_TABLE;
457         op.arg1.mfn = ma >> PAGE_SHIFT;
458         xen_flush_queue();
459         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
460 }
461
462 void 
463 xen_pgd_pin(vm_paddr_t ma)
464 {
465         struct mmuext_op op;
466         op.cmd = MMUEXT_PIN_L2_TABLE;
467         op.arg1.mfn = ma >> PAGE_SHIFT;
468         xen_flush_queue();
469         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
470 }
471
472 void 
473 xen_pgd_unpin(vm_paddr_t ma)
474 {
475         struct mmuext_op op;
476         op.cmd = MMUEXT_UNPIN_TABLE;
477         op.arg1.mfn = ma >> PAGE_SHIFT;
478         xen_flush_queue();
479         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
480 }
481
482 void 
483 xen_pt_pin(vm_paddr_t ma)
484 {
485         struct mmuext_op op;
486         op.cmd = MMUEXT_PIN_L1_TABLE;
487         op.arg1.mfn = ma >> PAGE_SHIFT;
488         xen_flush_queue();
489         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
490 }
491
492 void 
493 xen_pt_unpin(vm_paddr_t ma)
494 {
495         struct mmuext_op op;
496         op.cmd = MMUEXT_UNPIN_TABLE;
497         op.arg1.mfn = ma >> PAGE_SHIFT;
498         xen_flush_queue();
499         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
500 }
501
502 void 
503 xen_set_ldt(vm_paddr_t ptr, unsigned long len)
504 {
505         struct mmuext_op op;
506         op.cmd = MMUEXT_SET_LDT;
507         op.arg1.linear_addr = ptr;
508         op.arg2.nr_ents = len;
509         xen_flush_queue();
510         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
511 }
512
513 void xen_tlb_flush(void)
514 {
515         struct mmuext_op op;
516         op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
517         xen_flush_queue();
518         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
519 }
520
521 void
522 xen_update_descriptor(union descriptor *table, union descriptor *entry)
523 {
524         vm_paddr_t pa;
525         pt_entry_t *ptp;
526
527         ptp = vtopte((vm_offset_t)table);
528         pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
529         if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
530                 panic("HYPERVISOR_update_descriptor failed\n");
531 }
532
533
534 #if 0
535 /*
536  * Bitmap is indexed by page number. If bit is set, the page is part of a
537  * xen_create_contiguous_region() area of memory.
538  */
539 unsigned long *contiguous_bitmap;
540
541 static void 
542 contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
543 {
544         unsigned long start_off, end_off, curr_idx, end_idx;
545
546         curr_idx  = first_page / BITS_PER_LONG;
547         start_off = first_page & (BITS_PER_LONG-1);
548         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
549         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
550
551         if (curr_idx == end_idx) {
552                 contiguous_bitmap[curr_idx] |=
553                         ((1UL<<end_off)-1) & -(1UL<<start_off);
554         } else {
555                 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
556                 while ( ++curr_idx < end_idx )
557                         contiguous_bitmap[curr_idx] = ~0UL;
558                 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
559         }
560 }
561
562 static void 
563 contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
564 {
565         unsigned long start_off, end_off, curr_idx, end_idx;
566
567         curr_idx  = first_page / BITS_PER_LONG;
568         start_off = first_page & (BITS_PER_LONG-1);
569         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
570         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
571
572         if (curr_idx == end_idx) {
573                 contiguous_bitmap[curr_idx] &=
574                         -(1UL<<end_off) | ((1UL<<start_off)-1);
575         } else {
576                 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
577                 while ( ++curr_idx != end_idx )
578                         contiguous_bitmap[curr_idx] = 0;
579                 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
580         }
581 }
582 #endif
583
584 /* Ensure multi-page extents are contiguous in machine memory. */
585 int 
586 xen_create_contiguous_region(vm_page_t pages, int npages)
587 {
588         unsigned long  mfn, i, flags;
589         int order;
590         struct xen_memory_reservation reservation = {
591                 .nr_extents   = 1,
592                 .extent_order = 0,
593                 .domid        = DOMID_SELF
594         };
595         set_xen_guest_handle(reservation.extent_start, &mfn);
596         
597         balloon_lock(flags);
598
599         /* can currently only handle power of two allocation */
600         PANIC_IF(ffs(npages) != fls(npages));
601
602         /* 0. determine order */
603         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
604         
605         /* 1. give away machine pages. */
606         for (i = 0; i < (1 << order); i++) {
607                 int pfn;
608                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
609                 mfn = PFNTOMFN(pfn);
610                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
611                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
612         }
613
614
615         /* 2. Get a new contiguous memory extent. */
616         reservation.extent_order = order;
617         /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not 
618          * running with a broxen driver XXXEN
619          */
620         reservation.address_bits = 31; 
621         if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
622                 goto fail;
623
624         /* 3. Map the new extent in place of old pages. */
625         for (i = 0; i < (1 << order); i++) {
626                 int pfn;
627                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
628                 xen_machphys_update(mfn+i, pfn);
629                 PFNTOMFN(pfn) = mfn+i;
630         }
631
632         xen_tlb_flush();
633
634 #if 0
635         contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
636 #endif
637
638         balloon_unlock(flags);
639
640         return 0;
641
642  fail:
643         reservation.extent_order = 0;
644         reservation.address_bits = 0;
645
646         for (i = 0; i < (1 << order); i++) {
647                 int pfn;
648                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
649                 PANIC_IF(HYPERVISOR_memory_op(
650                         XENMEM_increase_reservation, &reservation) != 1);
651                 xen_machphys_update(mfn, pfn);
652                 PFNTOMFN(pfn) = mfn;
653         }
654
655         xen_tlb_flush();
656
657         balloon_unlock(flags);
658
659         return ENOMEM;
660 }
661
662 void 
663 xen_destroy_contiguous_region(void *addr, int npages)
664 {
665         unsigned long  mfn, i, flags, order, pfn0;
666         struct xen_memory_reservation reservation = {
667                 .nr_extents   = 1,
668                 .extent_order = 0,
669                 .domid        = DOMID_SELF
670         };
671         set_xen_guest_handle(reservation.extent_start, &mfn);
672         
673         pfn0 = vtophys(addr) >> PAGE_SHIFT;
674 #if 0
675         scrub_pages(vstart, 1 << order);
676 #endif
677         /* can currently only handle power of two allocation */
678         PANIC_IF(ffs(npages) != fls(npages));
679
680         /* 0. determine order */
681         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
682
683         balloon_lock(flags);
684
685 #if 0
686         contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
687 #endif
688
689         /* 1. Zap current PTEs, giving away the underlying pages. */
690         for (i = 0; i < (1 << order); i++) {
691                 int pfn;
692                 uint64_t new_val = 0;
693                 pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
694
695                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
696                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
697                 PANIC_IF(HYPERVISOR_memory_op(
698                         XENMEM_decrease_reservation, &reservation) != 1);
699         }
700
701         /* 2. Map new pages in place of old pages. */
702         for (i = 0; i < (1 << order); i++) {
703                 int pfn;
704                 uint64_t new_val;
705                 pfn = pfn0 + i;
706                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
707                 
708                 new_val = mfn << PAGE_SHIFT;
709                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), 
710                                                       new_val, PG_KERNEL));
711                 xen_machphys_update(mfn, pfn);
712                 PFNTOMFN(pfn) = mfn;
713         }
714
715         xen_tlb_flush();
716
717         balloon_unlock(flags);
718 }
719
720 extern  vm_offset_t     proc0kstack;
721 extern int vm86paddr, vm86phystk;
722 char *bootmem_start, *bootmem_current, *bootmem_end;
723
724 pteinfo_t *pteinfo_list;
725 void initvalues(start_info_t *startinfo);
726
727 struct xenstore_domain_interface;
728 extern struct xenstore_domain_interface *xen_store;
729
730 char *console_page;
731
732 void *
733 bootmem_alloc(unsigned int size) 
734 {
735         char *retptr;
736         
737         retptr = bootmem_current;
738         PANIC_IF(retptr + size > bootmem_end);
739         bootmem_current += size;
740
741         return retptr;
742 }
743
744 void 
745 bootmem_free(void *ptr, unsigned int size) 
746 {
747         char *tptr;
748         
749         tptr = ptr;
750         PANIC_IF(tptr != bootmem_current - size ||
751                 bootmem_current - size < bootmem_start);        
752
753         bootmem_current -= size;
754 }
755
756 #if 0
757 static vm_paddr_t
758 xpmap_mtop2(vm_paddr_t mpa)
759 {
760         return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
761             ) | (mpa & ~PG_FRAME);
762 }
763
764 static pd_entry_t 
765 xpmap_get_bootpde(vm_paddr_t va)
766 {
767
768         return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
769 }
770
771 static pd_entry_t
772 xpmap_get_vbootpde(vm_paddr_t va)
773 {
774         pd_entry_t pde;
775
776         pde = xpmap_get_bootpde(va);
777         if ((pde & PG_V) == 0)
778                 return (pde & ~PG_FRAME);
779         return (pde & ~PG_FRAME) |
780                 (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
781 }
782
783 static pt_entry_t 8*
784 xpmap_get_bootptep(vm_paddr_t va)
785 {
786         pd_entry_t pde;
787
788         pde = xpmap_get_vbootpde(va);
789         if ((pde & PG_V) == 0)
790                 return (void *)-1;
791 #define PT_MASK         0x003ff000      /* page table address bits */
792         return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
793 }
794
795 static pt_entry_t
796 xpmap_get_bootpte(vm_paddr_t va)
797 {
798
799         return xpmap_get_bootptep(va)[0];
800 }
801 #endif
802
803
804 #ifdef ADD_ISA_HOLE
805 static void
806 shift_phys_machine(unsigned long *phys_machine, int nr_pages)
807 {
808
809         unsigned long *tmp_page, *current_page, *next_page;
810         int i;
811
812         tmp_page = bootmem_alloc(PAGE_SIZE);
813         current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));  
814         next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));  
815         bcopy(phys_machine, tmp_page, PAGE_SIZE);
816
817         while (current_page > phys_machine) { 
818                 /*  save next page */
819                 bcopy(next_page, tmp_page, PAGE_SIZE);
820                 /* shift down page */
821                 bcopy(current_page, next_page, PAGE_SIZE);
822                 /*  finish swap */
823                 bcopy(tmp_page, current_page, PAGE_SIZE);
824           
825                 current_page -= (PAGE_SIZE/sizeof(unsigned long));
826                 next_page -= (PAGE_SIZE/sizeof(unsigned long));
827         }
828         bootmem_free(tmp_page, PAGE_SIZE);      
829         
830         for (i = 0; i < nr_pages; i++) {
831                 xen_machphys_update(phys_machine[i], i);
832         }
833         memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
834
835 }
836 #endif /* ADD_ISA_HOLE */
837
838 /*
839  * Build a directory of the pages that make up our Physical to Machine
840  * mapping table. The Xen suspend/restore code uses this to find our
841  * mapping table.
842  */
843 static void
844 init_frame_list_list(void *arg)
845 {
846         unsigned long nr_pages = xen_start_info->nr_pages;
847 #define FPP     (PAGE_SIZE/sizeof(xen_pfn_t))
848         int i, j, k;
849
850         xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
851         for (i = 0, j = 0, k = -1; i < nr_pages;
852              i += FPP, j++) {
853                 if ((j & (FPP - 1)) == 0) {
854                         k++;
855                         xen_pfn_to_mfn_frame_list[k] =
856                                 malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
857                         xen_pfn_to_mfn_frame_list_list[k] =
858                                 VTOMFN(xen_pfn_to_mfn_frame_list[k]);
859                         j = 0;
860                 }
861                 xen_pfn_to_mfn_frame_list[k][j] = 
862                         VTOMFN(&xen_phys_machine[i]);
863         }
864
865         HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
866         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
867                 = VTOMFN(xen_pfn_to_mfn_frame_list_list);
868 }       
869 SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
870
871 extern unsigned long physfree;
872
873 int pdir, curoffset;
874 extern int nkpt;
875
876 extern uint32_t kernbase;
877
878 void
879 initvalues(start_info_t *startinfo)
880
881         vm_offset_t cur_space, cur_space_pt;
882         struct physdev_set_iopl set_iopl;
883         
884         int l3_pages, l2_pages, l1_pages, offset;
885         vm_paddr_t console_page_ma, xen_store_ma;
886         vm_offset_t tmpva;
887         vm_paddr_t shinfo;
888 #ifdef PAE
889         vm_paddr_t IdlePDPTma, IdlePDPTnewma;
890         vm_paddr_t IdlePTDnewma[4];
891         pd_entry_t *IdlePDPTnew, *IdlePTDnew;
892         vm_paddr_t IdlePTDma[4];
893 #else
894         vm_paddr_t IdlePTDma[1];
895 #endif
896         unsigned long i;
897         int ncpus = MAXCPU;
898
899         nkpt = min(
900                 min(
901                         max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
902                     NPGPTD*NPDEPG - KPTDI),
903                     (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
904
905         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);      
906 #ifdef notyet
907         /*
908          * need to install handler
909          */
910         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);       
911 #endif  
912         xen_start_info = startinfo;
913         xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
914
915         IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
916         l1_pages = 0;
917         
918 #ifdef PAE
919         l3_pages = 1;
920         l2_pages = 0;
921         IdlePDPT = (pd_entry_t *)startinfo->pt_base;
922         IdlePDPTma = VTOM(startinfo->pt_base);
923         for (i = (KERNBASE >> 30);
924              (i < 4) && (IdlePDPT[i] != 0); i++)
925                         l2_pages++;
926         /*
927          * Note that only one page directory has been allocated at this point.
928          * Thus, if KERNBASE
929          */
930         for (i = 0; i < l2_pages; i++)
931                 IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
932
933         l2_pages = (l2_pages == 0) ? 1 : l2_pages;
934 #else   
935         l3_pages = 0;
936         l2_pages = 1;
937 #endif
938         for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
939              (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
940                 
941                 if (IdlePTD[i] == 0)
942                         break;
943                 l1_pages++;
944         }
945
946         /* number of pages allocated after the pts + 1*/;
947         cur_space = xen_start_info->pt_base +
948             (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
949
950         xc_printf("initvalues(): wooh - availmem=%x,%x\n", avail_space,
951             cur_space);
952
953         xc_printf("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n",
954             KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
955             xen_start_info->nr_pt_frames);
956         xendebug_flags = 0; /* 0xffffffff; */
957
958 #ifdef ADD_ISA_HOLE
959         shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
960 #endif
961         XENPRINTF("IdlePTD %p\n", IdlePTD);
962         XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
963                   "mod_start: 0x%lx mod_len: 0x%lx\n",
964                   xen_start_info->nr_pages, xen_start_info->shared_info, 
965                   xen_start_info->flags, xen_start_info->pt_base, 
966                   xen_start_info->mod_start, xen_start_info->mod_len);
967
968 #ifdef PAE
969         IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
970         bzero(IdlePDPTnew, PAGE_SIZE);
971
972         IdlePDPTnewma =  VTOM(IdlePDPTnew);
973         IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
974         bzero(IdlePTDnew, 4*PAGE_SIZE);
975
976         for (i = 0; i < 4; i++) 
977                 IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
978         /*
979          * L3
980          *
981          * Copy the 4 machine addresses of the new PTDs in to the PDPT
982          * 
983          */
984         for (i = 0; i < 4; i++)
985                 IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
986
987         __asm__("nop;");
988         /*
989          *
990          * re-map the new PDPT read-only
991          */
992         PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
993         /*
994          * 
995          * Unpin the current PDPT
996          */
997         xen_pt_unpin(IdlePDPTma);
998
999 #endif  /* PAE */
1000
1001         /* Map proc0's KSTACK */
1002         proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
1003         xc_printf("proc0kstack=%u\n", proc0kstack);
1004
1005         /* vm86/bios stack */
1006         cur_space += PAGE_SIZE;
1007
1008         /* Map space for the vm86 region */
1009         vm86paddr = (vm_offset_t)cur_space;
1010         cur_space += (PAGE_SIZE * 3);
1011
1012         /* allocate 4 pages for bootmem allocator */
1013         bootmem_start = bootmem_current = (char *)cur_space;
1014         cur_space += (4 * PAGE_SIZE);
1015         bootmem_end = (char *)cur_space;
1016         
1017         /* allocate pages for gdt */
1018         gdt = (union descriptor *)cur_space;
1019         cur_space += PAGE_SIZE*ncpus;
1020
1021         /* allocate page for ldt */
1022         ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
1023         cur_space += PAGE_SIZE;
1024         
1025         /* unmap remaining pages from initial chunk
1026          *
1027          */
1028         for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
1029              tmpva += PAGE_SIZE) {
1030                 bzero((char *)tmpva, PAGE_SIZE);
1031                 PT_SET_MA(tmpva, (vm_paddr_t)0);
1032         }
1033
1034         PT_UPDATES_FLUSH();
1035
1036         memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
1037             ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
1038             l1_pages*sizeof(pt_entry_t));
1039
1040         for (i = 0; i < 4; i++) {
1041                 PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
1042                     IdlePTDnewma[i] | PG_V);
1043         }
1044         xen_load_cr3(VTOP(IdlePDPTnew));
1045         xen_pgdpt_pin(VTOM(IdlePDPTnew));
1046
1047         /* allocate remainder of nkpt pages */
1048         cur_space_pt = cur_space;
1049         for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
1050              i++, cur_space += PAGE_SIZE) {
1051                 pdir = (offset + i) / NPDEPG;
1052                 curoffset = ((offset + i) % NPDEPG);
1053                 if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
1054                         break;
1055
1056                 /*
1057                  * make sure that all the initial page table pages
1058                  * have been zeroed
1059                  */
1060                 PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
1061                 bzero((char *)cur_space, PAGE_SIZE);
1062                 PT_SET_MA(cur_space, (vm_paddr_t)0);
1063                 xen_pt_pin(VTOM(cur_space));
1064                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1065                         curoffset*sizeof(vm_paddr_t)), 
1066                     VTOM(cur_space) | PG_KERNEL);
1067                 PT_UPDATES_FLUSH();
1068         }
1069         
1070         for (i = 0; i < 4; i++) {
1071                 pdir = (PTDPTDI + i) / NPDEPG;
1072                 curoffset = (PTDPTDI + i) % NPDEPG;
1073
1074                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1075                         curoffset*sizeof(vm_paddr_t)), 
1076                     IdlePTDnewma[i] | PG_V);
1077         }
1078
1079         PT_UPDATES_FLUSH();
1080         
1081         IdlePTD = IdlePTDnew;
1082         IdlePDPT = IdlePDPTnew;
1083         IdlePDPTma = IdlePDPTnewma;
1084         
1085         HYPERVISOR_shared_info = (shared_info_t *)cur_space;
1086         cur_space += PAGE_SIZE;
1087
1088         xen_store = (struct xenstore_domain_interface *)cur_space;
1089         cur_space += PAGE_SIZE;
1090
1091         console_page = (char *)cur_space;
1092         cur_space += PAGE_SIZE;
1093         
1094         /*
1095          * shared_info is an unsigned long so this will randomly break if
1096          * it is allocated above 4GB - I guess people are used to that
1097          * sort of thing with Xen ... sigh
1098          */
1099         shinfo = xen_start_info->shared_info;
1100         PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
1101         
1102         xc_printf("#4\n");
1103
1104         xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
1105         PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
1106         console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
1107         PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
1108
1109         xc_printf("#5\n");
1110
1111         set_iopl.iopl = 1;
1112         PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
1113         xc_printf("#6\n");
1114 #if 0
1115         /* add page table for KERNBASE */
1116         xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), 
1117                             VTOM(cur_space) | PG_KERNEL);
1118         xen_flush_queue();
1119 #ifdef PAE      
1120         xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), 
1121                             VTOM(cur_space) | PG_V | PG_A);
1122 #else
1123         xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), 
1124                             VTOM(cur_space) | PG_V | PG_A);
1125 #endif  
1126         xen_flush_queue();
1127         cur_space += PAGE_SIZE;
1128         xc_printf("#6\n");
1129 #endif /* 0 */  
1130 #ifdef notyet
1131         if (xen_start_info->flags & SIF_INITDOMAIN) {
1132                 /* Map first megabyte */
1133                 for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) 
1134                         PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
1135                 xen_flush_queue();
1136         }
1137 #endif
1138         /*
1139          * re-map kernel text read-only
1140          *
1141          */
1142         for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
1143              i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
1144                 PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
1145         
1146         xc_printf("#7\n");
1147         physfree = VTOP(cur_space);
1148         init_first = physfree >> PAGE_SHIFT;
1149         IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
1150         IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
1151         setup_xen_features();
1152         xc_printf("#8, proc0kstack=%u\n", proc0kstack);
1153 }
1154
1155
1156 trap_info_t trap_table[] = {
1157         { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
1158         { 1,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
1159         { 3,   3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
1160         { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
1161         /* This is UPL on Linux and KPL on BSD */
1162         { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
1163         { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
1164         { 7,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
1165         /*
1166          * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
1167          *   no handler for double fault
1168          */
1169         { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
1170         {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
1171         {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
1172         {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
1173         {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
1174         {14,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
1175         {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
1176         {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
1177         {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
1178         {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
1179         {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
1180         {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
1181         {  0, 0,           0, 0 }
1182 };
1183
1184 /* Perform a multicall and check that individual calls succeeded. */
1185 int
1186 HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
1187 {
1188         int ret = 0;
1189         int i;
1190
1191         /* Perform the multicall. */
1192         PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
1193
1194         /* Check the results of individual hypercalls. */
1195         for (i = 0; i < nr_calls; i++)
1196                 if (__predict_false(call_list[i].result < 0))
1197                         ret++;
1198         if (__predict_false(ret > 0))
1199                 panic("%d multicall(s) failed: cpu %d\n",
1200                     ret, smp_processor_id());
1201
1202         /* If we didn't panic already, everything succeeded. */
1203         return (0);
1204 }
1205
1206 /********** CODE WORTH KEEPING ABOVE HERE *****************/ 
1207
1208 void xen_failsafe_handler(void);
1209
1210 void
1211 xen_failsafe_handler(void)
1212 {
1213
1214         panic("xen_failsafe_handler called!\n");
1215 }
1216
1217 void xen_handle_thread_switch(struct pcb *pcb);
1218
1219 /* This is called by cpu_switch() when switching threads. */
1220 /* The pcb arg refers to the process control block of the */
1221 /* next thread which is to run */
1222 void
1223 xen_handle_thread_switch(struct pcb *pcb)
1224 {
1225     uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
1226     uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
1227     multicall_entry_t mcl[3];
1228     int i = 0;
1229
1230     /* Notify Xen of task switch */
1231     mcl[i].op = __HYPERVISOR_stack_switch;
1232     mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
1233     mcl[i++].args[1] = (unsigned long)pcb;
1234
1235     /* Check for update of fsd */
1236     if (*a != *b || *(a+1) != *(b+1)) {
1237         mcl[i].op = __HYPERVISOR_update_descriptor;
1238         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1239         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1240     }    
1241
1242     a += 2;
1243     b += 2;
1244
1245     /* Check for update of gsd */
1246     if (*a != *b || *(a+1) != *(b+1)) {
1247         mcl[i].op = __HYPERVISOR_update_descriptor;
1248         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1249         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1250     }    
1251
1252     (void)HYPERVISOR_multicall(mcl, i);
1253 }