]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/i386/i386/vm86.c
zfs: merge openzfs/zfs@887a3c533
[FreeBSD/FreeBSD.git] / sys / i386 / i386 / vm86.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1997 Jonathan Lemon
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/priv.h>
33 #include <sys/proc.h>
34 #include <sys/lock.h>
35 #include <sys/malloc.h>
36 #include <sys/mutex.h>
37
38 #include <vm/vm.h>
39 #include <vm/pmap.h>
40 #include <vm/vm_map.h>
41 #include <vm/vm_page.h>
42
43 #include <machine/md_var.h>
44 #include <machine/pcb.h>
45 #include <machine/pcb_ext.h>
46 #include <machine/psl.h>
47 #include <machine/specialreg.h>
48 #include <machine/sysarch.h>
49
50 extern int vm86pa;
51 extern struct pcb *vm86pcb;
52
53 static struct mtx vm86_lock;
54
55 extern int vm86_bioscall(struct vm86frame *);
56 extern void vm86_biosret(struct vm86frame *);
57
58 void vm86_prepcall(struct vm86frame *);
59
60 struct system_map {
61         int             type;
62         vm_offset_t     start;
63         vm_offset_t     end;
64 };
65
66 #define HLT     0xf4
67 #define CLI     0xfa
68 #define STI     0xfb
69 #define PUSHF   0x9c
70 #define POPF    0x9d
71 #define INTn    0xcd
72 #define IRET    0xcf
73 #define CALLm   0xff
74 #define OPERAND_SIZE_PREFIX     0x66
75 #define ADDRESS_SIZE_PREFIX     0x67
76 #define PUSH_MASK       ~(PSL_VM | PSL_RF | PSL_I)
77 #define POP_MASK        ~(PSL_VIP | PSL_VIF | PSL_VM | PSL_RF | PSL_IOPL)
78
79 static int
80 vm86_suword16(volatile void *base, int word)
81 {
82
83         if (curthread->td_critnest != 0) {
84                 *(volatile uint16_t *)base = word;
85                 return (0);
86         }
87         return (suword16(base, word));
88 }
89
90 static int
91 vm86_suword(volatile void *base, long word)
92 {
93
94         if (curthread->td_critnest != 0) {
95                 *(volatile long *)base = word;
96                 return (0);
97         }
98         return (suword(base, word));
99 }
100
101 static int
102 vm86_fubyte(volatile const void *base)
103 {
104
105         if (curthread->td_critnest != 0)
106                 return (*(volatile const u_char *)base);
107         return (fubyte(base));
108 }
109
110 static int
111 vm86_fuword16(volatile const void *base)
112 {
113
114         if (curthread->td_critnest != 0)
115                 return (*(volatile const uint16_t *)base);
116         return (fuword16(base));
117 }
118
119 static long
120 vm86_fuword(volatile const void *base)
121 {
122
123         if (curthread->td_critnest != 0)
124                 return (*(volatile const long *)base);
125         return (fuword(base));
126 }
127
128 static __inline caddr_t
129 MAKE_ADDR(u_short sel, u_short off)
130 {
131         return ((caddr_t)((sel << 4) + off));
132 }
133
134 static __inline void
135 GET_VEC(u_int vec, u_short *sel, u_short *off)
136 {
137         *sel = vec >> 16;
138         *off = vec & 0xffff;
139 }
140
141 static __inline u_int
142 MAKE_VEC(u_short sel, u_short off)
143 {
144         return ((sel << 16) | off);
145 }
146
147 static __inline void
148 PUSH(u_short x, struct vm86frame *vmf)
149 {
150         vmf->vmf_sp -= 2;
151         vm86_suword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
152 }
153
154 static __inline void
155 PUSHL(u_int x, struct vm86frame *vmf)
156 {
157         vmf->vmf_sp -= 4;
158         vm86_suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
159 }
160
161 static __inline u_short
162 POP(struct vm86frame *vmf)
163 {
164         u_short x = vm86_fuword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
165
166         vmf->vmf_sp += 2;
167         return (x);
168 }
169
170 static __inline u_int
171 POPL(struct vm86frame *vmf)
172 {
173         u_int x = vm86_fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
174
175         vmf->vmf_sp += 4;
176         return (x);
177 }
178
179 int
180 vm86_emulate(struct vm86frame *vmf)
181 {
182         struct vm86_kernel *vm86;
183         caddr_t addr;
184         u_char i_byte;
185         u_int temp_flags;
186         int inc_ip = 1;
187         int retcode = 0;
188
189         /*
190          * pcb_ext contains the address of the extension area, or zero if
191          * the extension is not present.  (This check should not be needed,
192          * as we can't enter vm86 mode until we set up an extension area)
193          */
194         if (curpcb->pcb_ext == 0)
195                 return (SIGBUS);
196         vm86 = &curpcb->pcb_ext->ext_vm86;
197
198         if (vmf->vmf_eflags & PSL_T)
199                 retcode = SIGTRAP;
200
201         addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
202         i_byte = vm86_fubyte(addr);
203         if (i_byte == ADDRESS_SIZE_PREFIX) {
204                 i_byte = vm86_fubyte(++addr);
205                 inc_ip++;
206         }
207
208         if (vm86->vm86_has_vme) {
209                 switch (i_byte) {
210                 case OPERAND_SIZE_PREFIX:
211                         i_byte = vm86_fubyte(++addr);
212                         inc_ip++;
213                         switch (i_byte) {
214                         case PUSHF:
215                                 if (vmf->vmf_eflags & PSL_VIF)
216                                         PUSHL((vmf->vmf_eflags & PUSH_MASK)
217                                             | PSL_IOPL | PSL_I, vmf);
218                                 else
219                                         PUSHL((vmf->vmf_eflags & PUSH_MASK)
220                                             | PSL_IOPL, vmf);
221                                 vmf->vmf_ip += inc_ip;
222                                 return (retcode);
223
224                         case POPF:
225                                 temp_flags = POPL(vmf) & POP_MASK;
226                                 vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
227                                     | temp_flags | PSL_VM | PSL_I;
228                                 vmf->vmf_ip += inc_ip;
229                                 if (temp_flags & PSL_I) {
230                                         vmf->vmf_eflags |= PSL_VIF;
231                                         if (vmf->vmf_eflags & PSL_VIP)
232                                                 break;
233                                 } else {
234                                         vmf->vmf_eflags &= ~PSL_VIF;
235                                 }
236                                 return (retcode);
237                         }
238                         break;
239
240                 /* VME faults here if VIP is set, but does not set VIF. */
241                 case STI:
242                         vmf->vmf_eflags |= PSL_VIF;
243                         vmf->vmf_ip += inc_ip;
244                         if ((vmf->vmf_eflags & PSL_VIP) == 0) {
245                                 uprintf("fatal sti\n");
246                                 return (SIGKILL);
247                         }
248                         break;
249
250                 /* VME if no redirection support */
251                 case INTn:
252                         break;
253
254                 /* VME if trying to set PSL_T, or PSL_I when VIP is set */
255                 case POPF:
256                         temp_flags = POP(vmf) & POP_MASK;
257                         vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
258                             | temp_flags | PSL_VM | PSL_I;
259                         vmf->vmf_ip += inc_ip;
260                         if (temp_flags & PSL_I) {
261                                 vmf->vmf_eflags |= PSL_VIF;
262                                 if (vmf->vmf_eflags & PSL_VIP)
263                                         break;
264                         } else {
265                                 vmf->vmf_eflags &= ~PSL_VIF;
266                         }
267                         return (retcode);
268
269                 /* VME if trying to set PSL_T, or PSL_I when VIP is set */
270                 case IRET:
271                         vmf->vmf_ip = POP(vmf);
272                         vmf->vmf_cs = POP(vmf);
273                         temp_flags = POP(vmf) & POP_MASK;
274                         vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
275                             | temp_flags | PSL_VM | PSL_I;
276                         if (temp_flags & PSL_I) {
277                                 vmf->vmf_eflags |= PSL_VIF;
278                                 if (vmf->vmf_eflags & PSL_VIP)
279                                         break;
280                         } else {
281                                 vmf->vmf_eflags &= ~PSL_VIF;
282                         }
283                         return (retcode);
284                 }
285                 return (SIGBUS);
286         }
287
288         switch (i_byte) {
289         case OPERAND_SIZE_PREFIX:
290                 i_byte = vm86_fubyte(++addr);
291                 inc_ip++;
292                 switch (i_byte) {
293                 case PUSHF:
294                         if (vm86->vm86_eflags & PSL_VIF)
295                                 PUSHL((vmf->vmf_flags & PUSH_MASK)
296                                     | PSL_IOPL | PSL_I, vmf);
297                         else
298                                 PUSHL((vmf->vmf_flags & PUSH_MASK)
299                                     | PSL_IOPL, vmf);
300                         vmf->vmf_ip += inc_ip;
301                         return (retcode);
302
303                 case POPF:
304                         temp_flags = POPL(vmf) & POP_MASK;
305                         vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
306                             | temp_flags | PSL_VM | PSL_I;
307                         vmf->vmf_ip += inc_ip;
308                         if (temp_flags & PSL_I) {
309                                 vm86->vm86_eflags |= PSL_VIF;
310                                 if (vm86->vm86_eflags & PSL_VIP)
311                                         break;
312                         } else {
313                                 vm86->vm86_eflags &= ~PSL_VIF;
314                         }
315                         return (retcode);
316                 }
317                 return (SIGBUS);
318
319         case CLI:
320                 vm86->vm86_eflags &= ~PSL_VIF;
321                 vmf->vmf_ip += inc_ip;
322                 return (retcode);
323
324         case STI:
325                 /* if there is a pending interrupt, go to the emulator */
326                 vm86->vm86_eflags |= PSL_VIF;
327                 vmf->vmf_ip += inc_ip;
328                 if (vm86->vm86_eflags & PSL_VIP)
329                         break;
330                 return (retcode);
331
332         case PUSHF:
333                 if (vm86->vm86_eflags & PSL_VIF)
334                         PUSH((vmf->vmf_flags & PUSH_MASK)
335                             | PSL_IOPL | PSL_I, vmf);
336                 else
337                         PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
338                 vmf->vmf_ip += inc_ip;
339                 return (retcode);
340
341         case INTn:
342                 i_byte = vm86_fubyte(addr + 1);
343                 if ((vm86->vm86_intmap[i_byte >> 3] & (1 << (i_byte & 7))) != 0)
344                         break;
345                 if (vm86->vm86_eflags & PSL_VIF)
346                         PUSH((vmf->vmf_flags & PUSH_MASK)
347                             | PSL_IOPL | PSL_I, vmf);
348                 else
349                         PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
350                 PUSH(vmf->vmf_cs, vmf);
351                 PUSH(vmf->vmf_ip + inc_ip + 1, vmf);    /* increment IP */
352                 GET_VEC(vm86_fuword((caddr_t)(i_byte * 4)),
353                      &vmf->vmf_cs, &vmf->vmf_ip);
354                 vmf->vmf_flags &= ~PSL_T;
355                 vm86->vm86_eflags &= ~PSL_VIF;
356                 return (retcode);
357
358         case IRET:
359                 vmf->vmf_ip = POP(vmf);
360                 vmf->vmf_cs = POP(vmf);
361                 temp_flags = POP(vmf) & POP_MASK;
362                 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
363                     | temp_flags | PSL_VM | PSL_I;
364                 if (temp_flags & PSL_I) {
365                         vm86->vm86_eflags |= PSL_VIF;
366                         if (vm86->vm86_eflags & PSL_VIP)
367                                 break;
368                 } else {
369                         vm86->vm86_eflags &= ~PSL_VIF;
370                 }
371                 return (retcode);
372
373         case POPF:
374                 temp_flags = POP(vmf) & POP_MASK;
375                 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
376                     | temp_flags | PSL_VM | PSL_I;
377                 vmf->vmf_ip += inc_ip;
378                 if (temp_flags & PSL_I) {
379                         vm86->vm86_eflags |= PSL_VIF;
380                         if (vm86->vm86_eflags & PSL_VIP)
381                                 break;
382                 } else {
383                         vm86->vm86_eflags &= ~PSL_VIF;
384                 }
385                 return (retcode);
386         }
387         return (SIGBUS);
388 }
389
390 #define PGTABLE_SIZE    ((1024 + 64) * 1024 / PAGE_SIZE)
391 #define INTMAP_SIZE     32
392 #define IOMAP_SIZE      ctob(IOPAGES)
393 #define TSS_SIZE \
394         (sizeof(struct pcb_ext) - sizeof(struct segment_descriptor) + \
395          INTMAP_SIZE + IOMAP_SIZE + 1)
396
397 struct vm86_layout_pae {
398         uint64_t        vml_pgtbl[PGTABLE_SIZE];
399         struct  pcb vml_pcb;
400         struct  pcb_ext vml_ext;
401         char    vml_intmap[INTMAP_SIZE];
402         char    vml_iomap[IOMAP_SIZE];
403         char    vml_iomap_trailer;
404 };
405
406 struct vm86_layout_nopae {
407         uint32_t        vml_pgtbl[PGTABLE_SIZE];
408         struct  pcb vml_pcb;
409         struct  pcb_ext vml_ext;
410         char    vml_intmap[INTMAP_SIZE];
411         char    vml_iomap[IOMAP_SIZE];
412         char    vml_iomap_trailer;
413 };
414
415 _Static_assert(sizeof(struct vm86_layout_pae) <= ctob(3),
416     "struct vm86_layout_pae exceeds space allocated in locore.s");
417 _Static_assert(sizeof(struct vm86_layout_nopae) <= ctob(3),
418     "struct vm86_layout_nopae exceeds space allocated in locore.s");
419
420 static void
421 vm86_initialize_pae(void)
422 {
423         int i;
424         u_int *addr;
425         struct vm86_layout_pae *vml;
426         struct pcb *pcb;
427         struct pcb_ext *ext;
428         struct soft_segment_descriptor ssd = {
429                 0,                      /* segment base address (overwritten) */
430                 0,                      /* length (overwritten) */
431                 SDT_SYS386TSS,          /* segment type */
432                 0,                      /* priority level */
433                 1,                      /* descriptor present */
434                 0, 0,
435                 0,                      /* default 16 size */
436                 0                       /* granularity */
437         };
438
439         /*
440          * Below is the memory layout that we use for the vm86 region.
441          *
442          * +--------+
443          * |        | 
444          * |        |
445          * | page 0 |       
446          * |        | +--------+
447          * |        | | stack  |
448          * +--------+ +--------+ <--------- vm86paddr
449          * |        | |Page Tbl| 1M + 64K = 272 entries = 1088 bytes
450          * |        | +--------+
451          * |        | |  PCB   | size: ~240 bytes
452          * | page 1 | |PCB Ext | size: ~140 bytes (includes TSS)
453          * |        | +--------+
454          * |        | |int map |
455          * |        | +--------+
456          * +--------+ |        |
457          * | page 2 | |  I/O   |
458          * +--------+ | bitmap |
459          * | page 3 | |        |
460          * |        | +--------+
461          * +--------+ 
462          */
463
464         /*
465          * A rudimentary PCB must be installed, in order to get to the
466          * PCB extension area.  We use the PCB area as a scratchpad for
467          * data storage, the layout of which is shown below.
468          *
469          * pcb_esi      = new PTD entry 0
470          * pcb_ebp      = pointer to frame on vm86 stack
471          * pcb_esp      =    stack frame pointer at time of switch
472          * pcb_ebx      = va of vm86 page table
473          * pcb_eip      =    argument pointer to initial call
474          * pcb_vm86[0]  =    saved TSS descriptor, word 0
475          * pcb_vm86[1]  =    saved TSS descriptor, word 1
476          */
477 #define new_ptd         pcb_esi
478 #define vm86_frame      pcb_ebp
479 #define pgtable_va      pcb_ebx
480
481         vml = (struct vm86_layout_pae *)vm86paddr;
482         pcb = &vml->vml_pcb;
483         ext = &vml->vml_ext;
484
485         mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF);
486
487         bzero(pcb, sizeof(struct pcb));
488         pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
489         pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
490         pcb->pgtable_va = vm86paddr;
491         pcb->pcb_flags = PCB_VM86CALL;
492         pcb->pcb_ext = ext;
493
494         bzero(ext, sizeof(struct pcb_ext));
495         ext->ext_tss.tss_esp0 = vm86paddr;
496         ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
497         ext->ext_tss.tss_ioopt =
498                 ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
499         ext->ext_iomap = vml->vml_iomap;
500         ext->ext_vm86.vm86_intmap = vml->vml_intmap;
501
502         if (cpu_feature & CPUID_VME)
503                 ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
504
505         addr = (u_int *)ext->ext_vm86.vm86_intmap;
506         for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
507                 *addr++ = 0;
508         vml->vml_iomap_trailer = 0xff;
509
510         ssd.ssd_base = (u_int)&ext->ext_tss;
511         ssd.ssd_limit = TSS_SIZE - 1;
512         ssdtosd(&ssd, &ext->ext_tssd);
513
514         vm86pcb = pcb;
515
516 #if 0
517         /*
518          * use whatever is leftover of the vm86 page layout as a
519          * message buffer so we can capture early output.
520          */
521         msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
522             ctob(3) - sizeof(struct vm86_layout));
523 #endif
524 }
525
526 static void
527 vm86_initialize_nopae(void)
528 {
529         int i;
530         u_int *addr;
531         struct vm86_layout_nopae *vml;
532         struct pcb *pcb;
533         struct pcb_ext *ext;
534         struct soft_segment_descriptor ssd = {
535                 0,                      /* segment base address (overwritten) */
536                 0,                      /* length (overwritten) */
537                 SDT_SYS386TSS,          /* segment type */
538                 0,                      /* priority level */
539                 1,                      /* descriptor present */
540                 0, 0,
541                 0,                      /* default 16 size */
542                 0                       /* granularity */
543         };
544
545         vml = (struct vm86_layout_nopae *)vm86paddr;
546         pcb = &vml->vml_pcb;
547         ext = &vml->vml_ext;
548
549         mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF);
550
551         bzero(pcb, sizeof(struct pcb));
552         pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
553         pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
554         pcb->pgtable_va = vm86paddr;
555         pcb->pcb_flags = PCB_VM86CALL;
556         pcb->pcb_ext = ext;
557
558         bzero(ext, sizeof(struct pcb_ext));
559         ext->ext_tss.tss_esp0 = vm86paddr;
560         ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
561         ext->ext_tss.tss_ioopt =
562                 ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
563         ext->ext_iomap = vml->vml_iomap;
564         ext->ext_vm86.vm86_intmap = vml->vml_intmap;
565
566         if (cpu_feature & CPUID_VME)
567                 ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
568
569         addr = (u_int *)ext->ext_vm86.vm86_intmap;
570         for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
571                 *addr++ = 0;
572         vml->vml_iomap_trailer = 0xff;
573
574         ssd.ssd_base = (u_int)&ext->ext_tss;
575         ssd.ssd_limit = TSS_SIZE - 1;
576         ssdtosd(&ssd, &ext->ext_tssd);
577
578         vm86pcb = pcb;
579
580 #if 0
581         /*
582          * use whatever is leftover of the vm86 page layout as a
583          * message buffer so we can capture early output.
584          */
585         msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
586             ctob(3) - sizeof(struct vm86_layout));
587 #endif
588 }
589
590 void
591 vm86_initialize(void)
592 {
593
594         if (pae_mode)
595                 vm86_initialize_pae();
596         else
597                 vm86_initialize_nopae();
598 }
599
600 vm_offset_t
601 vm86_getpage(struct vm86context *vmc, int pagenum)
602 {
603         int i;
604
605         for (i = 0; i < vmc->npages; i++)
606                 if (vmc->pmap[i].pte_num == pagenum)
607                         return (vmc->pmap[i].kva);
608         return (0);
609 }
610
611 vm_offset_t
612 vm86_addpage(struct vm86context *vmc, int pagenum, vm_offset_t kva)
613 {
614         int i, flags = 0;
615
616         for (i = 0; i < vmc->npages; i++)
617                 if (vmc->pmap[i].pte_num == pagenum)
618                         goto overlap;
619
620         if (vmc->npages == VM86_PMAPSIZE)
621                 goto full;                      /* XXX grow map? */
622
623         if (kva == 0) {
624                 kva = (vm_offset_t)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
625                 flags = VMAP_MALLOC;
626         }
627
628         i = vmc->npages++;
629         vmc->pmap[i].flags = flags;
630         vmc->pmap[i].kva = kva;
631         vmc->pmap[i].pte_num = pagenum;
632         return (kva);
633 overlap:
634         panic("vm86_addpage: overlap");
635 full:
636         panic("vm86_addpage: not enough room");
637 }
638
639 /*
640  * called from vm86_bioscall, while in vm86 address space, to finalize setup.
641  */
642 void
643 vm86_prepcall(struct vm86frame *vmf)
644 {
645         struct vm86_kernel *vm86;
646         uint32_t *stack;
647         uint8_t *code;
648
649         code = (void *)0xa00;
650         stack = (void *)(0x1000 - 2);   /* keep aligned */
651         if ((vmf->vmf_trapno & PAGE_MASK) <= 0xff) {
652                 /* interrupt call requested */
653                 code[0] = INTn;
654                 code[1] = vmf->vmf_trapno & 0xff;
655                 code[2] = HLT;
656                 vmf->vmf_ip = (uintptr_t)code;
657                 vmf->vmf_cs = 0;
658         } else {
659                 code[0] = HLT;
660                 stack--;
661                 stack[0] = MAKE_VEC(0, (uintptr_t)code);
662         }
663         vmf->vmf_sp = (uintptr_t)stack;
664         vmf->vmf_ss = 0;
665         vmf->kernel_fs = vmf->kernel_es = vmf->kernel_ds = 0;
666         vmf->vmf_eflags = PSL_VIF | PSL_VM | PSL_USER;
667
668         vm86 = &curpcb->pcb_ext->ext_vm86;
669         if (!vm86->vm86_has_vme) 
670                 vm86->vm86_eflags = vmf->vmf_eflags;  /* save VIF, VIP */
671 }
672
673 /*
674  * vm86 trap handler; determines whether routine succeeded or not.
675  * Called while in vm86 space, returns to calling process.
676  */
677 void
678 vm86_trap(struct vm86frame *vmf)
679 {
680         void (*p)(struct vm86frame *);
681         caddr_t addr;
682
683         /* "should not happen" */
684         if ((vmf->vmf_eflags & PSL_VM) == 0)
685                 panic("vm86_trap called, but not in vm86 mode");
686
687         addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
688         if (*(u_char *)addr == HLT)
689                 vmf->vmf_trapno = vmf->vmf_eflags & PSL_C;
690         else
691                 vmf->vmf_trapno = vmf->vmf_trapno << 16;
692
693         p = (void (*)(struct vm86frame *))((uintptr_t)vm86_biosret +
694             setidt_disp);
695         p(vmf);
696 }
697
698 int
699 vm86_intcall(int intnum, struct vm86frame *vmf)
700 {
701         int (*p)(struct vm86frame *);
702         int retval;
703
704         if (intnum < 0 || intnum > 0xff)
705                 return (EINVAL);
706
707         vmf->vmf_trapno = intnum;
708         p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
709             setidt_disp);
710         mtx_lock(&vm86_lock);
711         critical_enter();
712         retval = p(vmf);
713         critical_exit();
714         mtx_unlock(&vm86_lock);
715         return (retval);
716 }
717
718 /*
719  * struct vm86context contains the page table to use when making
720  * vm86 calls.  If intnum is a valid interrupt number (0-255), then
721  * the "interrupt trampoline" will be used, otherwise we use the
722  * caller's cs:ip routine.  
723  */
724 int
725 vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
726 {
727         uint64_t *pte_pae;
728         uint32_t *pte_nopae;
729         int (*p)(struct vm86frame *);
730         vm_paddr_t page;
731         int i, entry, retval;
732
733         mtx_lock(&vm86_lock);
734         if (pae_mode) {
735                 pte_pae = (uint64_t *)vm86paddr;
736                 for (i = 0; i < vmc->npages; i++) {
737                         page = vtophys(vmc->pmap[i].kva & PG_FRAME_PAE);
738                         entry = vmc->pmap[i].pte_num;
739                         vmc->pmap[i].old_pte = pte_pae[entry];
740                         pte_pae[entry] = page | PG_V | PG_RW | PG_U;
741                         pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
742                 }
743         } else {
744                 pte_nopae = (uint32_t *)vm86paddr;
745                 for (i = 0; i < vmc->npages; i++) {
746                         page = vtophys(vmc->pmap[i].kva & PG_FRAME_NOPAE);
747                         entry = vmc->pmap[i].pte_num;
748                         vmc->pmap[i].old_pte = pte_nopae[entry];
749                         pte_nopae[entry] = page | PG_V | PG_RW | PG_U;
750                         pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
751                 }
752         }
753
754         vmf->vmf_trapno = intnum;
755         p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
756             setidt_disp);
757         critical_enter();
758         retval = p(vmf);
759         critical_exit();
760
761         if (pae_mode) {
762                 for (i = 0; i < vmc->npages; i++) {
763                         entry = vmc->pmap[i].pte_num;
764                         pte_pae[entry] = vmc->pmap[i].old_pte;
765                         pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
766                 }
767         } else {
768                 for (i = 0; i < vmc->npages; i++) {
769                         entry = vmc->pmap[i].pte_num;
770                         pte_nopae[entry] = vmc->pmap[i].old_pte;
771                         pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
772                 }
773         }
774         mtx_unlock(&vm86_lock);
775
776         return (retval);
777 }
778
779 vm_offset_t
780 vm86_getaddr(struct vm86context *vmc, u_short sel, u_short off)
781 {
782         int i, page;
783         vm_offset_t addr;
784
785         addr = (vm_offset_t)MAKE_ADDR(sel, off);
786         page = addr >> PAGE_SHIFT;
787         for (i = 0; i < vmc->npages; i++)
788                 if (page == vmc->pmap[i].pte_num)
789                         return (vmc->pmap[i].kva + (addr & PAGE_MASK));
790         return (0);
791 }
792
793 int
794 vm86_getptr(struct vm86context *vmc, vm_offset_t kva, u_short *sel,
795      u_short *off)
796 {
797         int i;
798
799         for (i = 0; i < vmc->npages; i++)
800                 if (kva >= vmc->pmap[i].kva &&
801                     kva < vmc->pmap[i].kva + PAGE_SIZE) {
802                         *off = kva - vmc->pmap[i].kva;
803                         *sel = vmc->pmap[i].pte_num << 8;
804                         return (1);
805                 }
806         return (0);
807 }
808
809 int
810 vm86_sysarch(struct thread *td, char *args)
811 {
812         int error = 0;
813         struct i386_vm86_args ua;
814         struct vm86_kernel *vm86;
815
816         if ((error = copyin(args, &ua, sizeof(struct i386_vm86_args))) != 0)
817                 return (error);
818
819         if (td->td_pcb->pcb_ext == 0)
820                 if ((error = i386_extend_pcb(td)) != 0)
821                         return (error);
822         vm86 = &td->td_pcb->pcb_ext->ext_vm86;
823
824         switch (ua.sub_op) {
825         case VM86_INIT: {
826                 struct vm86_init_args sa;
827
828                 if ((error = copyin(ua.sub_args, &sa, sizeof(sa))) != 0)
829                         return (error);
830                 if (cpu_feature & CPUID_VME)
831                         vm86->vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
832                 else
833                         vm86->vm86_has_vme = 0;
834                 vm86->vm86_inited = 1;
835                 vm86->vm86_debug = sa.debug;
836                 bcopy(&sa.int_map, vm86->vm86_intmap, 32);
837                 }
838                 break;
839
840 #if 0
841         case VM86_SET_VME: {
842                 struct vm86_vme_args sa;
843
844                 if ((cpu_feature & CPUID_VME) == 0)
845                         return (ENODEV);
846
847                 if (error = copyin(ua.sub_args, &sa, sizeof(sa)))
848                         return (error);
849                 if (sa.state)
850                         load_cr4(rcr4() | CR4_VME);
851                 else
852                         load_cr4(rcr4() & ~CR4_VME);
853                 }
854                 break;
855 #endif
856
857         case VM86_GET_VME: {
858                 struct vm86_vme_args sa;
859
860                 sa.state = (rcr4() & CR4_VME ? 1 : 0);
861                 error = copyout(&sa, ua.sub_args, sizeof(sa));
862                 }
863                 break;
864
865         case VM86_INTCALL: {
866                 struct vm86_intcall_args sa;
867
868                 if ((error = priv_check(td, PRIV_VM86_INTCALL)))
869                         return (error);
870                 if ((error = copyin(ua.sub_args, &sa, sizeof(sa))))
871                         return (error);
872                 if ((error = vm86_intcall(sa.intnum, &sa.vmf)))
873                         return (error);
874                 error = copyout(&sa, ua.sub_args, sizeof(sa));
875                 }
876                 break;
877
878         default:
879                 error = EINVAL;
880         }
881         return (error);
882 }