]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/i386/xen/xen_machdep.c
Merge r262907 from ^/projects/release-embedded:
[FreeBSD/FreeBSD.git] / sys / i386 / xen / xen_machdep.c
1 /*
2  *
3  * Copyright (c) 2004 Christian Limpach.
4  * Copyright (c) 2004-2006,2008 Kip Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by Christian Limpach.
18  * 4. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bus.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/mount.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/kernel.h>
45 #include <sys/proc.h>
46 #include <sys/reboot.h>
47 #include <sys/rwlock.h>
48 #include <sys/sysproto.h>
49 #include <sys/boot.h>
50
51 #include <xen/xen-os.h>
52
53 #include <vm/vm.h>
54 #include <vm/pmap.h>
55 #include <machine/segments.h>
56 #include <machine/pcb.h>
57 #include <machine/stdarg.h>
58 #include <machine/vmparam.h>
59 #include <machine/cpu.h>
60 #include <machine/intr_machdep.h>
61 #include <machine/md_var.h>
62 #include <machine/asmacros.h>
63
64
65
66 #include <xen/hypervisor.h>
67 #include <machine/xen/xenvar.h>
68 #include <machine/xen/xenfunc.h>
69 #include <machine/xen/xenpmap.h>
70 #include <machine/xen/xenfunc.h>
71 #include <xen/interface/memory.h>
72 #include <machine/xen/features.h>
73 #ifdef SMP
74 #include <machine/privatespace.h>
75 #endif
76
77
78 #include <vm/vm_page.h>
79
80
81 #define IDTVEC(name)    __CONCAT(X,name)
82
83 extern inthand_t
84 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
85         IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
86         IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
87         IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
88         IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
89
90
91 int xendebug_flags; 
92 start_info_t *xen_start_info;
93 start_info_t *HYPERVISOR_start_info;
94 shared_info_t *HYPERVISOR_shared_info;
95 xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
96 xen_pfn_t *xen_phys_machine;
97 xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
98 xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
99 int preemptable, init_first;
100 extern unsigned int avail_space;
101 int xen_vector_callback_enabled = 0;
102 enum xen_domain_type xen_domain_type = XEN_PV_DOMAIN;
103
104 void ni_cli(void);
105 void ni_sti(void);
106
107
108 void
109 ni_cli(void)
110 {
111         CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
112         __asm__("pushl %edx;"
113                 "pushl %eax;"
114                 );
115         __cli();
116         __asm__("popl %eax;"
117                 "popl %edx;"
118                 );
119 }
120
121
122 void
123 ni_sti(void)
124 {
125         __asm__("pushl %edx;"
126                 "pushl %esi;"
127                 "pushl %eax;"
128                 );
129         __sti();
130         __asm__("popl %eax;"
131                 "popl %esi;"
132                 "popl %edx;"
133                 );
134 }
135
136 void
137 force_evtchn_callback(void)
138 {
139     (void)HYPERVISOR_xen_version(0, NULL);
140 }
141
142 /*
143  * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
144  * suitable for the static env vars.
145  */
146 char *
147 xen_setbootenv(char *cmd_line)
148 {
149         char *cmd_line_next;
150     
151         /* Skip leading spaces */
152         for (; *cmd_line == ' '; cmd_line++);
153
154         xc_printf("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
155
156         for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
157         return cmd_line;
158 }
159
160 int 
161 xen_boothowto(char *envp)
162 {
163         int i, howto = 0;
164
165         /* get equivalents from the environment */
166         for (i = 0; howto_names[i].ev != NULL; i++)
167                 if (getenv(howto_names[i].ev) != NULL)
168                         howto |= howto_names[i].mask;
169         return howto;
170 }
171
172
173 #define XPQUEUE_SIZE 128
174
175 struct mmu_log {
176         char *file;
177         int line;
178 };
179
180 #ifdef SMP
181 /* per-cpu queues and indices */
182 #ifdef INVARIANTS
183 static struct mmu_log xpq_queue_log[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
184 #endif
185
186 static int xpq_idx[XEN_LEGACY_MAX_VCPUS];
187 static mmu_update_t xpq_queue[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
188
189 #define XPQ_QUEUE_LOG xpq_queue_log[vcpu]
190 #define XPQ_QUEUE xpq_queue[vcpu]
191 #define XPQ_IDX xpq_idx[vcpu]
192 #define SET_VCPU() int vcpu = smp_processor_id()
193 #else
194         
195 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
196 #ifdef INVARIANTS
197 static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
198 #endif
199 static int xpq_idx = 0;
200
201 #define XPQ_QUEUE_LOG xpq_queue_log
202 #define XPQ_QUEUE xpq_queue
203 #define XPQ_IDX xpq_idx
204 #define SET_VCPU()
205 #endif /* !SMP */
206
207 #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
208
209 #if 0
210 static void
211 xen_dump_queue(void)
212 {
213         int _xpq_idx = XPQ_IDX;
214         int i;
215
216         if (_xpq_idx <= 1)
217                 return;
218
219         xc_printf("xen_dump_queue(): %u entries\n", _xpq_idx);
220         for (i = 0; i < _xpq_idx; i++) {
221                 xc_printf(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val,
222                     XPQ_QUEUE[i].ptr);
223         }
224 }
225 #endif
226
227
228 static __inline void
229 _xen_flush_queue(void)
230 {
231         SET_VCPU();
232         int _xpq_idx = XPQ_IDX;
233         int error, i;
234
235 #ifdef INVARIANTS
236         if (__predict_true(gdtset))
237                 CRITICAL_ASSERT(curthread);
238 #endif
239
240         XPQ_IDX = 0;
241         /* Make sure index is cleared first to avoid double updates. */
242         error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
243                                       _xpq_idx, NULL, DOMID_SELF);
244     
245 #if 0
246         if (__predict_true(gdtset))
247         for (i = _xpq_idx; i > 0;) {
248                 if (i >= 3) {
249                         CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
250                             "ptr: %lx val: %lx ptr: %lx",
251                             (XPQ_QUEUE[i-1].val & 0xffffffff),
252                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
253                             (XPQ_QUEUE[i-2].val & 0xffffffff),
254                             (XPQ_QUEUE[i-2].ptr & 0xffffffff),
255                             (XPQ_QUEUE[i-3].val & 0xffffffff),
256                             (XPQ_QUEUE[i-3].ptr & 0xffffffff));
257                             i -= 3;
258                 } else if (i == 2) {
259                         CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
260                             (XPQ_QUEUE[i-1].val & 0xffffffff),
261                             (XPQ_QUEUE[i-1].ptr & 0xffffffff),
262                             (XPQ_QUEUE[i-2].val & 0xffffffff),
263                             (XPQ_QUEUE[i-2].ptr & 0xffffffff));
264                         i = 0;
265                 } else {
266                         CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", 
267                             (XPQ_QUEUE[i-1].val & 0xffffffff),
268                             (XPQ_QUEUE[i-1].ptr & 0xffffffff));
269                         i = 0;
270                 }
271         }
272 #endif  
273         if (__predict_false(error < 0)) {
274                 for (i = 0; i < _xpq_idx; i++)
275                         printf("val: %llx ptr: %llx\n",
276                             XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
277                 panic("Failed to execute MMU updates: %d", error);
278         }
279
280 }
281
282 void
283 xen_flush_queue(void)
284 {
285         SET_VCPU();
286
287         if (__predict_true(gdtset))
288                 critical_enter();
289         if (XPQ_IDX != 0) _xen_flush_queue();
290         if (__predict_true(gdtset))
291                 critical_exit();
292 }
293
294 static __inline void
295 xen_increment_idx(void)
296 {
297         SET_VCPU();
298
299         XPQ_IDX++;
300         if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
301                 xen_flush_queue();
302 }
303
304 void
305 xen_check_queue(void)
306 {
307 #ifdef INVARIANTS
308         SET_VCPU();
309         
310         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
311 #endif
312 }
313
314 void
315 xen_invlpg(vm_offset_t va)
316 {
317         struct mmuext_op op;
318         op.cmd = MMUEXT_INVLPG_ALL;
319         op.arg1.linear_addr = va & ~PAGE_MASK;
320         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
321 }
322
323 void
324 xen_load_cr3(u_int val)
325 {
326         struct mmuext_op op;
327 #ifdef INVARIANTS
328         SET_VCPU();
329         
330         KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
331 #endif
332         op.cmd = MMUEXT_NEW_BASEPTR;
333         op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
334         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
335 }
336
337 #ifdef KTR
338 static __inline u_int
339 rebp(void)
340 {
341         u_int   data;
342
343         __asm __volatile("movl 4(%%ebp),%0" : "=r" (data));     
344         return (data);
345 }
346 #endif
347
348 u_int
349 read_eflags(void)
350 {
351         vcpu_info_t *_vcpu;
352         u_int eflags;
353
354         eflags = _read_eflags();
355         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; 
356         if (_vcpu->evtchn_upcall_mask)
357                 eflags &= ~PSL_I;
358
359         return (eflags);
360 }
361
362 void
363 write_eflags(u_int eflags)
364 {
365         u_int intr;
366
367         CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
368         intr = ((eflags & PSL_I) == 0);
369         __restore_flags(intr);
370         _write_eflags(eflags);
371 }
372
373 void
374 xen_cli(void)
375 {
376         CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
377         __cli();
378 }
379
380 void
381 xen_sti(void)
382 {
383         CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
384         __sti();
385 }
386
387 u_int
388 xen_rcr2(void)
389 {
390
391         return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
392 }
393
394 void
395 _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
396 {
397         SET_VCPU();
398         
399         if (__predict_true(gdtset))
400                 critical_enter();
401         XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
402         XPQ_QUEUE[XPQ_IDX].val = pfn;
403 #ifdef INVARIANTS
404         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
405         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
406 #endif          
407         xen_increment_idx();
408         if (__predict_true(gdtset))
409                 critical_exit();
410 }
411
412 extern struct rwlock pvh_global_lock;
413
414 void
415 _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
416 {
417         SET_VCPU();
418
419         if (__predict_true(gdtset))     
420                 rw_assert(&pvh_global_lock, RA_WLOCKED);
421
422         KASSERT((ptr & 7) == 0, ("misaligned update"));
423         
424         if (__predict_true(gdtset))
425                 critical_enter();
426         
427         XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
428         XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
429 #ifdef INVARIANTS
430         XPQ_QUEUE_LOG[XPQ_IDX].file = file;
431         XPQ_QUEUE_LOG[XPQ_IDX].line = line;     
432 #endif  
433         xen_increment_idx();
434         if (__predict_true(gdtset))
435                 critical_exit();
436 }
437
438 void 
439 xen_pgdpt_pin(vm_paddr_t ma)
440 {
441         struct mmuext_op op;
442         op.cmd = MMUEXT_PIN_L3_TABLE;
443         op.arg1.mfn = ma >> PAGE_SHIFT;
444         xen_flush_queue();
445         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
446 }
447
448 void 
449 xen_pgd_pin(vm_paddr_t ma)
450 {
451         struct mmuext_op op;
452         op.cmd = MMUEXT_PIN_L2_TABLE;
453         op.arg1.mfn = ma >> PAGE_SHIFT;
454         xen_flush_queue();
455         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
456 }
457
458 void 
459 xen_pgd_unpin(vm_paddr_t ma)
460 {
461         struct mmuext_op op;
462         op.cmd = MMUEXT_UNPIN_TABLE;
463         op.arg1.mfn = ma >> PAGE_SHIFT;
464         xen_flush_queue();
465         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
466 }
467
468 void 
469 xen_pt_pin(vm_paddr_t ma)
470 {
471         struct mmuext_op op;
472         op.cmd = MMUEXT_PIN_L1_TABLE;
473         op.arg1.mfn = ma >> PAGE_SHIFT;
474         xen_flush_queue();
475         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
476 }
477
478 void 
479 xen_pt_unpin(vm_paddr_t ma)
480 {
481         struct mmuext_op op;
482         op.cmd = MMUEXT_UNPIN_TABLE;
483         op.arg1.mfn = ma >> PAGE_SHIFT;
484         xen_flush_queue();
485         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
486 }
487
488 void 
489 xen_set_ldt(vm_paddr_t ptr, unsigned long len)
490 {
491         struct mmuext_op op;
492         op.cmd = MMUEXT_SET_LDT;
493         op.arg1.linear_addr = ptr;
494         op.arg2.nr_ents = len;
495         xen_flush_queue();
496         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
497 }
498
499 void xen_tlb_flush(void)
500 {
501         struct mmuext_op op;
502         op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
503         xen_flush_queue();
504         PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
505 }
506
507 void
508 xen_update_descriptor(union descriptor *table, union descriptor *entry)
509 {
510         vm_paddr_t pa;
511         pt_entry_t *ptp;
512
513         ptp = vtopte((vm_offset_t)table);
514         pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
515         if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
516                 panic("HYPERVISOR_update_descriptor failed\n");
517 }
518
519
520 #if 0
521 /*
522  * Bitmap is indexed by page number. If bit is set, the page is part of a
523  * xen_create_contiguous_region() area of memory.
524  */
525 unsigned long *contiguous_bitmap;
526
527 static void 
528 contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
529 {
530         unsigned long start_off, end_off, curr_idx, end_idx;
531
532         curr_idx  = first_page / BITS_PER_LONG;
533         start_off = first_page & (BITS_PER_LONG-1);
534         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
535         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
536
537         if (curr_idx == end_idx) {
538                 contiguous_bitmap[curr_idx] |=
539                         ((1UL<<end_off)-1) & -(1UL<<start_off);
540         } else {
541                 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
542                 while ( ++curr_idx < end_idx )
543                         contiguous_bitmap[curr_idx] = ~0UL;
544                 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
545         }
546 }
547
548 static void 
549 contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
550 {
551         unsigned long start_off, end_off, curr_idx, end_idx;
552
553         curr_idx  = first_page / BITS_PER_LONG;
554         start_off = first_page & (BITS_PER_LONG-1);
555         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
556         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
557
558         if (curr_idx == end_idx) {
559                 contiguous_bitmap[curr_idx] &=
560                         -(1UL<<end_off) | ((1UL<<start_off)-1);
561         } else {
562                 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
563                 while ( ++curr_idx != end_idx )
564                         contiguous_bitmap[curr_idx] = 0;
565                 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
566         }
567 }
568 #endif
569
570 /* Ensure multi-page extents are contiguous in machine memory. */
571 int 
572 xen_create_contiguous_region(vm_page_t pages, int npages)
573 {
574         unsigned long  mfn, i, flags;
575         int order;
576         struct xen_memory_reservation reservation = {
577                 .nr_extents   = 1,
578                 .extent_order = 0,
579                 .domid        = DOMID_SELF
580         };
581         set_xen_guest_handle(reservation.extent_start, &mfn);
582         
583         balloon_lock(flags);
584
585         /* can currently only handle power of two allocation */
586         PANIC_IF(ffs(npages) != fls(npages));
587
588         /* 0. determine order */
589         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
590         
591         /* 1. give away machine pages. */
592         for (i = 0; i < (1 << order); i++) {
593                 int pfn;
594                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
595                 mfn = PFNTOMFN(pfn);
596                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
597                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
598         }
599
600
601         /* 2. Get a new contiguous memory extent. */
602         reservation.extent_order = order;
603         /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not 
604          * running with a broxen driver XXXEN
605          */
606         reservation.address_bits = 31; 
607         if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
608                 goto fail;
609
610         /* 3. Map the new extent in place of old pages. */
611         for (i = 0; i < (1 << order); i++) {
612                 int pfn;
613                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
614                 xen_machphys_update(mfn+i, pfn);
615                 PFNTOMFN(pfn) = mfn+i;
616         }
617
618         xen_tlb_flush();
619
620 #if 0
621         contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
622 #endif
623
624         balloon_unlock(flags);
625
626         return 0;
627
628  fail:
629         reservation.extent_order = 0;
630         reservation.address_bits = 0;
631
632         for (i = 0; i < (1 << order); i++) {
633                 int pfn;
634                 pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
635                 PANIC_IF(HYPERVISOR_memory_op(
636                         XENMEM_increase_reservation, &reservation) != 1);
637                 xen_machphys_update(mfn, pfn);
638                 PFNTOMFN(pfn) = mfn;
639         }
640
641         xen_tlb_flush();
642
643         balloon_unlock(flags);
644
645         return ENOMEM;
646 }
647
648 void 
649 xen_destroy_contiguous_region(void *addr, int npages)
650 {
651         unsigned long  mfn, i, flags, order, pfn0;
652         struct xen_memory_reservation reservation = {
653                 .nr_extents   = 1,
654                 .extent_order = 0,
655                 .domid        = DOMID_SELF
656         };
657         set_xen_guest_handle(reservation.extent_start, &mfn);
658         
659         pfn0 = vtophys(addr) >> PAGE_SHIFT;
660 #if 0
661         scrub_pages(vstart, 1 << order);
662 #endif
663         /* can currently only handle power of two allocation */
664         PANIC_IF(ffs(npages) != fls(npages));
665
666         /* 0. determine order */
667         order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
668
669         balloon_lock(flags);
670
671 #if 0
672         contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
673 #endif
674
675         /* 1. Zap current PTEs, giving away the underlying pages. */
676         for (i = 0; i < (1 << order); i++) {
677                 int pfn;
678                 uint64_t new_val = 0;
679                 pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
680
681                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
682                 PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
683                 PANIC_IF(HYPERVISOR_memory_op(
684                         XENMEM_decrease_reservation, &reservation) != 1);
685         }
686
687         /* 2. Map new pages in place of old pages. */
688         for (i = 0; i < (1 << order); i++) {
689                 int pfn;
690                 uint64_t new_val;
691                 pfn = pfn0 + i;
692                 PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
693                 
694                 new_val = mfn << PAGE_SHIFT;
695                 PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), 
696                                                       new_val, PG_KERNEL));
697                 xen_machphys_update(mfn, pfn);
698                 PFNTOMFN(pfn) = mfn;
699         }
700
701         xen_tlb_flush();
702
703         balloon_unlock(flags);
704 }
705
706 extern  vm_offset_t     proc0kstack;
707 extern int vm86paddr, vm86phystk;
708 char *bootmem_start, *bootmem_current, *bootmem_end;
709
710 pteinfo_t *pteinfo_list;
711 void initvalues(start_info_t *startinfo);
712
713 struct xenstore_domain_interface;
714 extern struct xenstore_domain_interface *xen_store;
715
716 void *
717 bootmem_alloc(unsigned int size) 
718 {
719         char *retptr;
720         
721         retptr = bootmem_current;
722         PANIC_IF(retptr + size > bootmem_end);
723         bootmem_current += size;
724
725         return retptr;
726 }
727
728 void 
729 bootmem_free(void *ptr, unsigned int size) 
730 {
731         char *tptr;
732         
733         tptr = ptr;
734         PANIC_IF(tptr != bootmem_current - size ||
735                 bootmem_current - size < bootmem_start);        
736
737         bootmem_current -= size;
738 }
739
740 #if 0
741 static vm_paddr_t
742 xpmap_mtop2(vm_paddr_t mpa)
743 {
744         return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
745             ) | (mpa & ~PG_FRAME);
746 }
747
748 static pd_entry_t 
749 xpmap_get_bootpde(vm_paddr_t va)
750 {
751
752         return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
753 }
754
755 static pd_entry_t
756 xpmap_get_vbootpde(vm_paddr_t va)
757 {
758         pd_entry_t pde;
759
760         pde = xpmap_get_bootpde(va);
761         if ((pde & PG_V) == 0)
762                 return (pde & ~PG_FRAME);
763         return (pde & ~PG_FRAME) |
764                 (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
765 }
766
767 static pt_entry_t 8*
768 xpmap_get_bootptep(vm_paddr_t va)
769 {
770         pd_entry_t pde;
771
772         pde = xpmap_get_vbootpde(va);
773         if ((pde & PG_V) == 0)
774                 return (void *)-1;
775 #define PT_MASK         0x003ff000      /* page table address bits */
776         return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
777 }
778
779 static pt_entry_t
780 xpmap_get_bootpte(vm_paddr_t va)
781 {
782
783         return xpmap_get_bootptep(va)[0];
784 }
785 #endif
786
787
788 #ifdef ADD_ISA_HOLE
789 static void
790 shift_phys_machine(unsigned long *phys_machine, int nr_pages)
791 {
792
793         unsigned long *tmp_page, *current_page, *next_page;
794         int i;
795
796         tmp_page = bootmem_alloc(PAGE_SIZE);
797         current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));  
798         next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));  
799         bcopy(phys_machine, tmp_page, PAGE_SIZE);
800
801         while (current_page > phys_machine) { 
802                 /*  save next page */
803                 bcopy(next_page, tmp_page, PAGE_SIZE);
804                 /* shift down page */
805                 bcopy(current_page, next_page, PAGE_SIZE);
806                 /*  finish swap */
807                 bcopy(tmp_page, current_page, PAGE_SIZE);
808           
809                 current_page -= (PAGE_SIZE/sizeof(unsigned long));
810                 next_page -= (PAGE_SIZE/sizeof(unsigned long));
811         }
812         bootmem_free(tmp_page, PAGE_SIZE);      
813         
814         for (i = 0; i < nr_pages; i++) {
815                 xen_machphys_update(phys_machine[i], i);
816         }
817         memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
818
819 }
820 #endif /* ADD_ISA_HOLE */
821
822 /*
823  * Build a directory of the pages that make up our Physical to Machine
824  * mapping table. The Xen suspend/restore code uses this to find our
825  * mapping table.
826  */
827 static void
828 init_frame_list_list(void *arg)
829 {
830         unsigned long nr_pages = xen_start_info->nr_pages;
831 #define FPP     (PAGE_SIZE/sizeof(xen_pfn_t))
832         int i, j, k;
833
834         xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
835         for (i = 0, j = 0, k = -1; i < nr_pages;
836              i += FPP, j++) {
837                 if ((j & (FPP - 1)) == 0) {
838                         k++;
839                         xen_pfn_to_mfn_frame_list[k] =
840                                 malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
841                         xen_pfn_to_mfn_frame_list_list[k] =
842                                 VTOMFN(xen_pfn_to_mfn_frame_list[k]);
843                         j = 0;
844                 }
845                 xen_pfn_to_mfn_frame_list[k][j] = 
846                         VTOMFN(&xen_phys_machine[i]);
847         }
848
849         HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
850         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
851                 = VTOMFN(xen_pfn_to_mfn_frame_list_list);
852 }       
853 SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
854
855 extern unsigned long physfree;
856
857 int pdir, curoffset;
858 extern int nkpt;
859
860 extern uint32_t kernbase;
861
862 void
863 initvalues(start_info_t *startinfo)
864
865         vm_offset_t cur_space, cur_space_pt;
866         struct physdev_set_iopl set_iopl;
867         
868         int l3_pages, l2_pages, l1_pages, offset;
869         vm_paddr_t console_page_ma, xen_store_ma;
870         vm_offset_t tmpva;
871         vm_paddr_t shinfo;
872 #ifdef PAE
873         vm_paddr_t IdlePDPTma, IdlePDPTnewma;
874         vm_paddr_t IdlePTDnewma[4];
875         pd_entry_t *IdlePDPTnew, *IdlePTDnew;
876         vm_paddr_t IdlePTDma[4];
877 #else
878         vm_paddr_t IdlePTDma[1];
879 #endif
880         unsigned long i;
881         int ncpus = MAXCPU;
882
883         nkpt = min(
884                 min(
885                         max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
886                     NPGPTD*NPDEPG - KPTDI),
887                     (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
888
889         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);      
890 #ifdef notyet
891         /*
892          * need to install handler
893          */
894         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);       
895 #endif  
896         xen_start_info = startinfo;
897         HYPERVISOR_start_info = startinfo;
898         xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
899
900         IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
901         l1_pages = 0;
902         
903 #ifdef PAE
904         l3_pages = 1;
905         l2_pages = 0;
906         IdlePDPT = (pd_entry_t *)startinfo->pt_base;
907         IdlePDPTma = VTOM(startinfo->pt_base);
908         for (i = (KERNBASE >> 30);
909              (i < 4) && (IdlePDPT[i] != 0); i++)
910                         l2_pages++;
911         /*
912          * Note that only one page directory has been allocated at this point.
913          * Thus, if KERNBASE
914          */
915         for (i = 0; i < l2_pages; i++)
916                 IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
917
918         l2_pages = (l2_pages == 0) ? 1 : l2_pages;
919 #else   
920         l3_pages = 0;
921         l2_pages = 1;
922 #endif
923         for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
924              (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
925                 
926                 if (IdlePTD[i] == 0)
927                         break;
928                 l1_pages++;
929         }
930
931         /* number of pages allocated after the pts + 1*/;
932         cur_space = xen_start_info->pt_base +
933             (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
934
935         xc_printf("initvalues(): wooh - availmem=%x,%x\n", avail_space,
936             cur_space);
937
938         xc_printf("KERNBASE=%x,pt_base=%lx, VTOPFN(base)=%x, nr_pt_frames=%lx\n",
939             KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
940             xen_start_info->nr_pt_frames);
941         xendebug_flags = 0; /* 0xffffffff; */
942
943 #ifdef ADD_ISA_HOLE
944         shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
945 #endif
946         XENPRINTF("IdlePTD %p\n", IdlePTD);
947         XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%x pt_base: 0x%lx "
948                   "mod_start: 0x%lx mod_len: 0x%lx\n",
949                   xen_start_info->nr_pages, xen_start_info->shared_info, 
950                   xen_start_info->flags, xen_start_info->pt_base, 
951                   xen_start_info->mod_start, xen_start_info->mod_len);
952
953 #ifdef PAE
954         IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
955         bzero(IdlePDPTnew, PAGE_SIZE);
956
957         IdlePDPTnewma =  VTOM(IdlePDPTnew);
958         IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
959         bzero(IdlePTDnew, 4*PAGE_SIZE);
960
961         for (i = 0; i < 4; i++) 
962                 IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
963         /*
964          * L3
965          *
966          * Copy the 4 machine addresses of the new PTDs in to the PDPT
967          * 
968          */
969         for (i = 0; i < 4; i++)
970                 IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
971
972         __asm__("nop;");
973         /*
974          *
975          * re-map the new PDPT read-only
976          */
977         PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
978         /*
979          * 
980          * Unpin the current PDPT
981          */
982         xen_pt_unpin(IdlePDPTma);
983
984 #endif  /* PAE */
985
986         /* Map proc0's KSTACK */
987         proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
988         xc_printf("proc0kstack=%u\n", proc0kstack);
989
990         /* vm86/bios stack */
991         cur_space += PAGE_SIZE;
992
993         /* Map space for the vm86 region */
994         vm86paddr = (vm_offset_t)cur_space;
995         cur_space += (PAGE_SIZE * 3);
996
997         /* allocate 4 pages for bootmem allocator */
998         bootmem_start = bootmem_current = (char *)cur_space;
999         cur_space += (4 * PAGE_SIZE);
1000         bootmem_end = (char *)cur_space;
1001         
1002         /* allocate pages for gdt */
1003         gdt = (union descriptor *)cur_space;
1004         cur_space += PAGE_SIZE*ncpus;
1005
1006         /* allocate page for ldt */
1007         ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
1008         cur_space += PAGE_SIZE;
1009         
1010         /* unmap remaining pages from initial chunk
1011          *
1012          */
1013         for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
1014              tmpva += PAGE_SIZE) {
1015                 bzero((char *)tmpva, PAGE_SIZE);
1016                 PT_SET_MA(tmpva, (vm_paddr_t)0);
1017         }
1018
1019         PT_UPDATES_FLUSH();
1020
1021         memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
1022             ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
1023             l1_pages*sizeof(pt_entry_t));
1024
1025         for (i = 0; i < 4; i++) {
1026                 PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
1027                     IdlePTDnewma[i] | PG_V);
1028         }
1029         xen_load_cr3(VTOP(IdlePDPTnew));
1030         xen_pgdpt_pin(VTOM(IdlePDPTnew));
1031
1032         /* allocate remainder of nkpt pages */
1033         cur_space_pt = cur_space;
1034         for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
1035              i++, cur_space += PAGE_SIZE) {
1036                 pdir = (offset + i) / NPDEPG;
1037                 curoffset = ((offset + i) % NPDEPG);
1038                 if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
1039                         break;
1040
1041                 /*
1042                  * make sure that all the initial page table pages
1043                  * have been zeroed
1044                  */
1045                 PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
1046                 bzero((char *)cur_space, PAGE_SIZE);
1047                 PT_SET_MA(cur_space, (vm_paddr_t)0);
1048                 xen_pt_pin(VTOM(cur_space));
1049                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1050                         curoffset*sizeof(vm_paddr_t)), 
1051                     VTOM(cur_space) | PG_KERNEL);
1052                 PT_UPDATES_FLUSH();
1053         }
1054         
1055         for (i = 0; i < 4; i++) {
1056                 pdir = (PTDPTDI + i) / NPDEPG;
1057                 curoffset = (PTDPTDI + i) % NPDEPG;
1058
1059                 xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1060                         curoffset*sizeof(vm_paddr_t)), 
1061                     IdlePTDnewma[i] | PG_V);
1062         }
1063
1064         PT_UPDATES_FLUSH();
1065         
1066         IdlePTD = IdlePTDnew;
1067         IdlePDPT = IdlePDPTnew;
1068         IdlePDPTma = IdlePDPTnewma;
1069         
1070         HYPERVISOR_shared_info = (shared_info_t *)cur_space;
1071         cur_space += PAGE_SIZE;
1072
1073         xen_store = (struct xenstore_domain_interface *)cur_space;
1074         cur_space += PAGE_SIZE;
1075
1076         console_page = (char *)cur_space;
1077         cur_space += PAGE_SIZE;
1078         
1079         /*
1080          * shared_info is an unsigned long so this will randomly break if
1081          * it is allocated above 4GB - I guess people are used to that
1082          * sort of thing with Xen ... sigh
1083          */
1084         shinfo = xen_start_info->shared_info;
1085         PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
1086         
1087         xc_printf("#4\n");
1088
1089         xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
1090         PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
1091         console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
1092         PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
1093
1094         xc_printf("#5\n");
1095
1096         set_iopl.iopl = 1;
1097         PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
1098         xc_printf("#6\n");
1099 #if 0
1100         /* add page table for KERNBASE */
1101         xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), 
1102                             VTOM(cur_space) | PG_KERNEL);
1103         xen_flush_queue();
1104 #ifdef PAE      
1105         xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), 
1106                             VTOM(cur_space) | PG_V | PG_A);
1107 #else
1108         xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), 
1109                             VTOM(cur_space) | PG_V | PG_A);
1110 #endif  
1111         xen_flush_queue();
1112         cur_space += PAGE_SIZE;
1113         xc_printf("#6\n");
1114 #endif /* 0 */  
1115 #ifdef notyet
1116         if (xen_start_info->flags & SIF_INITDOMAIN) {
1117                 /* Map first megabyte */
1118                 for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) 
1119                         PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
1120                 xen_flush_queue();
1121         }
1122 #endif
1123         /*
1124          * re-map kernel text read-only
1125          *
1126          */
1127         for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
1128              i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
1129                 PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
1130         
1131         xc_printf("#7\n");
1132         physfree = VTOP(cur_space);
1133         init_first = physfree >> PAGE_SHIFT;
1134         IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
1135         IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
1136         setup_xen_features();
1137         xc_printf("#8, proc0kstack=%u\n", proc0kstack);
1138 }
1139
1140
1141 trap_info_t trap_table[] = {
1142         { 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
1143         { 1,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
1144         { 3,   3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
1145         { 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
1146         /* This is UPL on Linux and KPL on BSD */
1147         { 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
1148         { 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
1149         { 7,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
1150         /*
1151          * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
1152          *   no handler for double fault
1153          */
1154         { 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
1155         {10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
1156         {11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
1157         {12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
1158         {13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
1159         {14,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
1160         {15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
1161         {16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
1162         {17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
1163         {18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
1164         {19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
1165         {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
1166         {  0, 0,           0, 0 }
1167 };
1168
1169 /* Perform a multicall and check that individual calls succeeded. */
1170 int
1171 HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
1172 {
1173         int ret = 0;
1174         int i;
1175
1176         /* Perform the multicall. */
1177         PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
1178
1179         /* Check the results of individual hypercalls. */
1180         for (i = 0; i < nr_calls; i++)
1181                 if (__predict_false(call_list[i].result < 0))
1182                         ret++;
1183         if (__predict_false(ret > 0))
1184                 panic("%d multicall(s) failed: cpu %d\n",
1185                     ret, smp_processor_id());
1186
1187         /* If we didn't panic already, everything succeeded. */
1188         return (0);
1189 }
1190
1191 /********** CODE WORTH KEEPING ABOVE HERE *****************/ 
1192
1193 void xen_failsafe_handler(void);
1194
1195 void
1196 xen_failsafe_handler(void)
1197 {
1198
1199         panic("xen_failsafe_handler called!\n");
1200 }
1201
1202 void xen_handle_thread_switch(struct pcb *pcb);
1203
1204 /* This is called by cpu_switch() when switching threads. */
1205 /* The pcb arg refers to the process control block of the */
1206 /* next thread which is to run */
1207 void
1208 xen_handle_thread_switch(struct pcb *pcb)
1209 {
1210     uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
1211     uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
1212     multicall_entry_t mcl[3];
1213     int i = 0;
1214
1215     /* Notify Xen of task switch */
1216     mcl[i].op = __HYPERVISOR_stack_switch;
1217     mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
1218     mcl[i++].args[1] = (unsigned long)pcb;
1219
1220     /* Check for update of fsd */
1221     if (*a != *b || *(a+1) != *(b+1)) {
1222         mcl[i].op = __HYPERVISOR_update_descriptor;
1223         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1224         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1225     }    
1226
1227     a += 2;
1228     b += 2;
1229
1230     /* Check for update of gsd */
1231     if (*a != *b || *(a+1) != *(b+1)) {
1232         mcl[i].op = __HYPERVISOR_update_descriptor;
1233         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1234         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1235     }    
1236
1237     (void)HYPERVISOR_multicall(mcl, i);
1238 }