]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/i386/i386/vm86.c
MFV r365599: import fix for a libexecinfo warning at higher WARNS
[FreeBSD/FreeBSD.git] / sys / i386 / i386 / vm86.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1997 Jonathan Lemon
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/priv.h>
35 #include <sys/proc.h>
36 #include <sys/lock.h>
37 #include <sys/malloc.h>
38 #include <sys/mutex.h>
39
40 #include <vm/vm.h>
41 #include <vm/pmap.h>
42 #include <vm/vm_map.h>
43 #include <vm/vm_page.h>
44
45 #include <machine/md_var.h>
46 #include <machine/pcb.h>
47 #include <machine/pcb_ext.h>
48 #include <machine/psl.h>
49 #include <machine/specialreg.h>
50 #include <machine/sysarch.h>
51
52 extern int vm86pa;
53 extern struct pcb *vm86pcb;
54
55 static struct mtx vm86_lock;
56
57 extern int vm86_bioscall(struct vm86frame *);
58 extern void vm86_biosret(struct vm86frame *);
59
60 void vm86_prepcall(struct vm86frame *);
61
62 struct system_map {
63         int             type;
64         vm_offset_t     start;
65         vm_offset_t     end;
66 };
67
68 #define HLT     0xf4
69 #define CLI     0xfa
70 #define STI     0xfb
71 #define PUSHF   0x9c
72 #define POPF    0x9d
73 #define INTn    0xcd
74 #define IRET    0xcf
75 #define CALLm   0xff
76 #define OPERAND_SIZE_PREFIX     0x66
77 #define ADDRESS_SIZE_PREFIX     0x67
78 #define PUSH_MASK       ~(PSL_VM | PSL_RF | PSL_I)
79 #define POP_MASK        ~(PSL_VIP | PSL_VIF | PSL_VM | PSL_RF | PSL_IOPL)
80
81 static int
82 vm86_suword16(volatile void *base, int word)
83 {
84
85         if (curthread->td_critnest != 0) {
86                 *(volatile uint16_t *)base = word;
87                 return (0);
88         }
89         return (suword16(base, word));
90 }
91
92 static int
93 vm86_suword(volatile void *base, long word)
94 {
95
96         if (curthread->td_critnest != 0) {
97                 *(volatile long *)base = word;
98                 return (0);
99         }
100         return (suword(base, word));
101 }
102
103 static int
104 vm86_fubyte(volatile const void *base)
105 {
106
107         if (curthread->td_critnest != 0)
108                 return (*(volatile const u_char *)base);
109         return (fubyte(base));
110 }
111
112 static int
113 vm86_fuword16(volatile const void *base)
114 {
115
116         if (curthread->td_critnest != 0)
117                 return (*(volatile const uint16_t *)base);
118         return (fuword16(base));
119 }
120
121 static long
122 vm86_fuword(volatile const void *base)
123 {
124
125         if (curthread->td_critnest != 0)
126                 return (*(volatile const long *)base);
127         return (fuword(base));
128 }
129
130 static __inline caddr_t
131 MAKE_ADDR(u_short sel, u_short off)
132 {
133         return ((caddr_t)((sel << 4) + off));
134 }
135
136 static __inline void
137 GET_VEC(u_int vec, u_short *sel, u_short *off)
138 {
139         *sel = vec >> 16;
140         *off = vec & 0xffff;
141 }
142
143 static __inline u_int
144 MAKE_VEC(u_short sel, u_short off)
145 {
146         return ((sel << 16) | off);
147 }
148
149 static __inline void
150 PUSH(u_short x, struct vm86frame *vmf)
151 {
152         vmf->vmf_sp -= 2;
153         vm86_suword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
154 }
155
156 static __inline void
157 PUSHL(u_int x, struct vm86frame *vmf)
158 {
159         vmf->vmf_sp -= 4;
160         vm86_suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
161 }
162
163 static __inline u_short
164 POP(struct vm86frame *vmf)
165 {
166         u_short x = vm86_fuword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
167
168         vmf->vmf_sp += 2;
169         return (x);
170 }
171
172 static __inline u_int
173 POPL(struct vm86frame *vmf)
174 {
175         u_int x = vm86_fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
176
177         vmf->vmf_sp += 4;
178         return (x);
179 }
180
181 int
182 vm86_emulate(struct vm86frame *vmf)
183 {
184         struct vm86_kernel *vm86;
185         caddr_t addr;
186         u_char i_byte;
187         u_int temp_flags;
188         int inc_ip = 1;
189         int retcode = 0;
190
191         /*
192          * pcb_ext contains the address of the extension area, or zero if
193          * the extension is not present.  (This check should not be needed,
194          * as we can't enter vm86 mode until we set up an extension area)
195          */
196         if (curpcb->pcb_ext == 0)
197                 return (SIGBUS);
198         vm86 = &curpcb->pcb_ext->ext_vm86;
199
200         if (vmf->vmf_eflags & PSL_T)
201                 retcode = SIGTRAP;
202
203         addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
204         i_byte = vm86_fubyte(addr);
205         if (i_byte == ADDRESS_SIZE_PREFIX) {
206                 i_byte = vm86_fubyte(++addr);
207                 inc_ip++;
208         }
209
210         if (vm86->vm86_has_vme) {
211                 switch (i_byte) {
212                 case OPERAND_SIZE_PREFIX:
213                         i_byte = vm86_fubyte(++addr);
214                         inc_ip++;
215                         switch (i_byte) {
216                         case PUSHF:
217                                 if (vmf->vmf_eflags & PSL_VIF)
218                                         PUSHL((vmf->vmf_eflags & PUSH_MASK)
219                                             | PSL_IOPL | PSL_I, vmf);
220                                 else
221                                         PUSHL((vmf->vmf_eflags & PUSH_MASK)
222                                             | PSL_IOPL, vmf);
223                                 vmf->vmf_ip += inc_ip;
224                                 return (retcode);
225
226                         case POPF:
227                                 temp_flags = POPL(vmf) & POP_MASK;
228                                 vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
229                                     | temp_flags | PSL_VM | PSL_I;
230                                 vmf->vmf_ip += inc_ip;
231                                 if (temp_flags & PSL_I) {
232                                         vmf->vmf_eflags |= PSL_VIF;
233                                         if (vmf->vmf_eflags & PSL_VIP)
234                                                 break;
235                                 } else {
236                                         vmf->vmf_eflags &= ~PSL_VIF;
237                                 }
238                                 return (retcode);
239                         }
240                         break;
241
242                 /* VME faults here if VIP is set, but does not set VIF. */
243                 case STI:
244                         vmf->vmf_eflags |= PSL_VIF;
245                         vmf->vmf_ip += inc_ip;
246                         if ((vmf->vmf_eflags & PSL_VIP) == 0) {
247                                 uprintf("fatal sti\n");
248                                 return (SIGKILL);
249                         }
250                         break;
251
252                 /* VME if no redirection support */
253                 case INTn:
254                         break;
255
256                 /* VME if trying to set PSL_T, or PSL_I when VIP is set */
257                 case POPF:
258                         temp_flags = POP(vmf) & POP_MASK;
259                         vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
260                             | temp_flags | PSL_VM | PSL_I;
261                         vmf->vmf_ip += inc_ip;
262                         if (temp_flags & PSL_I) {
263                                 vmf->vmf_eflags |= PSL_VIF;
264                                 if (vmf->vmf_eflags & PSL_VIP)
265                                         break;
266                         } else {
267                                 vmf->vmf_eflags &= ~PSL_VIF;
268                         }
269                         return (retcode);
270
271                 /* VME if trying to set PSL_T, or PSL_I when VIP is set */
272                 case IRET:
273                         vmf->vmf_ip = POP(vmf);
274                         vmf->vmf_cs = POP(vmf);
275                         temp_flags = POP(vmf) & POP_MASK;
276                         vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
277                             | temp_flags | PSL_VM | PSL_I;
278                         if (temp_flags & PSL_I) {
279                                 vmf->vmf_eflags |= PSL_VIF;
280                                 if (vmf->vmf_eflags & PSL_VIP)
281                                         break;
282                         } else {
283                                 vmf->vmf_eflags &= ~PSL_VIF;
284                         }
285                         return (retcode);
286                 }
287                 return (SIGBUS);
288         }
289
290         switch (i_byte) {
291         case OPERAND_SIZE_PREFIX:
292                 i_byte = vm86_fubyte(++addr);
293                 inc_ip++;
294                 switch (i_byte) {
295                 case PUSHF:
296                         if (vm86->vm86_eflags & PSL_VIF)
297                                 PUSHL((vmf->vmf_flags & PUSH_MASK)
298                                     | PSL_IOPL | PSL_I, vmf);
299                         else
300                                 PUSHL((vmf->vmf_flags & PUSH_MASK)
301                                     | PSL_IOPL, vmf);
302                         vmf->vmf_ip += inc_ip;
303                         return (retcode);
304
305                 case POPF:
306                         temp_flags = POPL(vmf) & POP_MASK;
307                         vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
308                             | temp_flags | PSL_VM | PSL_I;
309                         vmf->vmf_ip += inc_ip;
310                         if (temp_flags & PSL_I) {
311                                 vm86->vm86_eflags |= PSL_VIF;
312                                 if (vm86->vm86_eflags & PSL_VIP)
313                                         break;
314                         } else {
315                                 vm86->vm86_eflags &= ~PSL_VIF;
316                         }
317                         return (retcode);
318                 }
319                 return (SIGBUS);
320
321         case CLI:
322                 vm86->vm86_eflags &= ~PSL_VIF;
323                 vmf->vmf_ip += inc_ip;
324                 return (retcode);
325
326         case STI:
327                 /* if there is a pending interrupt, go to the emulator */
328                 vm86->vm86_eflags |= PSL_VIF;
329                 vmf->vmf_ip += inc_ip;
330                 if (vm86->vm86_eflags & PSL_VIP)
331                         break;
332                 return (retcode);
333
334         case PUSHF:
335                 if (vm86->vm86_eflags & PSL_VIF)
336                         PUSH((vmf->vmf_flags & PUSH_MASK)
337                             | PSL_IOPL | PSL_I, vmf);
338                 else
339                         PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
340                 vmf->vmf_ip += inc_ip;
341                 return (retcode);
342
343         case INTn:
344                 i_byte = vm86_fubyte(addr + 1);
345                 if ((vm86->vm86_intmap[i_byte >> 3] & (1 << (i_byte & 7))) != 0)
346                         break;
347                 if (vm86->vm86_eflags & PSL_VIF)
348                         PUSH((vmf->vmf_flags & PUSH_MASK)
349                             | PSL_IOPL | PSL_I, vmf);
350                 else
351                         PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
352                 PUSH(vmf->vmf_cs, vmf);
353                 PUSH(vmf->vmf_ip + inc_ip + 1, vmf);    /* increment IP */
354                 GET_VEC(vm86_fuword((caddr_t)(i_byte * 4)),
355                      &vmf->vmf_cs, &vmf->vmf_ip);
356                 vmf->vmf_flags &= ~PSL_T;
357                 vm86->vm86_eflags &= ~PSL_VIF;
358                 return (retcode);
359
360         case IRET:
361                 vmf->vmf_ip = POP(vmf);
362                 vmf->vmf_cs = POP(vmf);
363                 temp_flags = POP(vmf) & POP_MASK;
364                 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
365                     | temp_flags | PSL_VM | PSL_I;
366                 if (temp_flags & PSL_I) {
367                         vm86->vm86_eflags |= PSL_VIF;
368                         if (vm86->vm86_eflags & PSL_VIP)
369                                 break;
370                 } else {
371                         vm86->vm86_eflags &= ~PSL_VIF;
372                 }
373                 return (retcode);
374
375         case POPF:
376                 temp_flags = POP(vmf) & POP_MASK;
377                 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
378                     | temp_flags | PSL_VM | PSL_I;
379                 vmf->vmf_ip += inc_ip;
380                 if (temp_flags & PSL_I) {
381                         vm86->vm86_eflags |= PSL_VIF;
382                         if (vm86->vm86_eflags & PSL_VIP)
383                                 break;
384                 } else {
385                         vm86->vm86_eflags &= ~PSL_VIF;
386                 }
387                 return (retcode);
388         }
389         return (SIGBUS);
390 }
391
392 #define PGTABLE_SIZE    ((1024 + 64) * 1024 / PAGE_SIZE)
393 #define INTMAP_SIZE     32
394 #define IOMAP_SIZE      ctob(IOPAGES)
395 #define TSS_SIZE \
396         (sizeof(struct pcb_ext) - sizeof(struct segment_descriptor) + \
397          INTMAP_SIZE + IOMAP_SIZE + 1)
398
399 struct vm86_layout_pae {
400         uint64_t        vml_pgtbl[PGTABLE_SIZE];
401         struct  pcb vml_pcb;
402         struct  pcb_ext vml_ext;
403         char    vml_intmap[INTMAP_SIZE];
404         char    vml_iomap[IOMAP_SIZE];
405         char    vml_iomap_trailer;
406 };
407
408 struct vm86_layout_nopae {
409         uint32_t        vml_pgtbl[PGTABLE_SIZE];
410         struct  pcb vml_pcb;
411         struct  pcb_ext vml_ext;
412         char    vml_intmap[INTMAP_SIZE];
413         char    vml_iomap[IOMAP_SIZE];
414         char    vml_iomap_trailer;
415 };
416
417 _Static_assert(sizeof(struct vm86_layout_pae) <= ctob(3),
418     "struct vm86_layout_pae exceeds space allocated in locore.s");
419 _Static_assert(sizeof(struct vm86_layout_nopae) <= ctob(3),
420     "struct vm86_layout_nopae exceeds space allocated in locore.s");
421
422 static void
423 vm86_initialize_pae(void)
424 {
425         int i;
426         u_int *addr;
427         struct vm86_layout_pae *vml;
428         struct pcb *pcb;
429         struct pcb_ext *ext;
430         struct soft_segment_descriptor ssd = {
431                 0,                      /* segment base address (overwritten) */
432                 0,                      /* length (overwritten) */
433                 SDT_SYS386TSS,          /* segment type */
434                 0,                      /* priority level */
435                 1,                      /* descriptor present */
436                 0, 0,
437                 0,                      /* default 16 size */
438                 0                       /* granularity */
439         };
440
441         /*
442          * Below is the memory layout that we use for the vm86 region.
443          *
444          * +--------+
445          * |        | 
446          * |        |
447          * | page 0 |       
448          * |        | +--------+
449          * |        | | stack  |
450          * +--------+ +--------+ <--------- vm86paddr
451          * |        | |Page Tbl| 1M + 64K = 272 entries = 1088 bytes
452          * |        | +--------+
453          * |        | |  PCB   | size: ~240 bytes
454          * | page 1 | |PCB Ext | size: ~140 bytes (includes TSS)
455          * |        | +--------+
456          * |        | |int map |
457          * |        | +--------+
458          * +--------+ |        |
459          * | page 2 | |  I/O   |
460          * +--------+ | bitmap |
461          * | page 3 | |        |
462          * |        | +--------+
463          * +--------+ 
464          */
465
466         /*
467          * A rudimentary PCB must be installed, in order to get to the
468          * PCB extension area.  We use the PCB area as a scratchpad for
469          * data storage, the layout of which is shown below.
470          *
471          * pcb_esi      = new PTD entry 0
472          * pcb_ebp      = pointer to frame on vm86 stack
473          * pcb_esp      =    stack frame pointer at time of switch
474          * pcb_ebx      = va of vm86 page table
475          * pcb_eip      =    argument pointer to initial call
476          * pcb_vm86[0]  =    saved TSS descriptor, word 0
477          * pcb_vm86[1]  =    saved TSS descriptor, word 1
478          */
479 #define new_ptd         pcb_esi
480 #define vm86_frame      pcb_ebp
481 #define pgtable_va      pcb_ebx
482
483         vml = (struct vm86_layout_pae *)vm86paddr;
484         pcb = &vml->vml_pcb;
485         ext = &vml->vml_ext;
486
487         mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF);
488
489         bzero(pcb, sizeof(struct pcb));
490         pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
491         pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
492         pcb->pgtable_va = vm86paddr;
493         pcb->pcb_flags = PCB_VM86CALL;
494         pcb->pcb_ext = ext;
495
496         bzero(ext, sizeof(struct pcb_ext));
497         ext->ext_tss.tss_esp0 = vm86paddr;
498         ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
499         ext->ext_tss.tss_ioopt =
500                 ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
501         ext->ext_iomap = vml->vml_iomap;
502         ext->ext_vm86.vm86_intmap = vml->vml_intmap;
503
504         if (cpu_feature & CPUID_VME)
505                 ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
506
507         addr = (u_int *)ext->ext_vm86.vm86_intmap;
508         for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
509                 *addr++ = 0;
510         vml->vml_iomap_trailer = 0xff;
511
512         ssd.ssd_base = (u_int)&ext->ext_tss;
513         ssd.ssd_limit = TSS_SIZE - 1;
514         ssdtosd(&ssd, &ext->ext_tssd);
515
516         vm86pcb = pcb;
517
518 #if 0
519         /*
520          * use whatever is leftover of the vm86 page layout as a
521          * message buffer so we can capture early output.
522          */
523         msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
524             ctob(3) - sizeof(struct vm86_layout));
525 #endif
526 }
527
528 static void
529 vm86_initialize_nopae(void)
530 {
531         int i;
532         u_int *addr;
533         struct vm86_layout_nopae *vml;
534         struct pcb *pcb;
535         struct pcb_ext *ext;
536         struct soft_segment_descriptor ssd = {
537                 0,                      /* segment base address (overwritten) */
538                 0,                      /* length (overwritten) */
539                 SDT_SYS386TSS,          /* segment type */
540                 0,                      /* priority level */
541                 1,                      /* descriptor present */
542                 0, 0,
543                 0,                      /* default 16 size */
544                 0                       /* granularity */
545         };
546
547         vml = (struct vm86_layout_nopae *)vm86paddr;
548         pcb = &vml->vml_pcb;
549         ext = &vml->vml_ext;
550
551         mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF);
552
553         bzero(pcb, sizeof(struct pcb));
554         pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
555         pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
556         pcb->pgtable_va = vm86paddr;
557         pcb->pcb_flags = PCB_VM86CALL;
558         pcb->pcb_ext = ext;
559
560         bzero(ext, sizeof(struct pcb_ext));
561         ext->ext_tss.tss_esp0 = vm86paddr;
562         ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
563         ext->ext_tss.tss_ioopt =
564                 ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
565         ext->ext_iomap = vml->vml_iomap;
566         ext->ext_vm86.vm86_intmap = vml->vml_intmap;
567
568         if (cpu_feature & CPUID_VME)
569                 ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
570
571         addr = (u_int *)ext->ext_vm86.vm86_intmap;
572         for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
573                 *addr++ = 0;
574         vml->vml_iomap_trailer = 0xff;
575
576         ssd.ssd_base = (u_int)&ext->ext_tss;
577         ssd.ssd_limit = TSS_SIZE - 1;
578         ssdtosd(&ssd, &ext->ext_tssd);
579
580         vm86pcb = pcb;
581
582 #if 0
583         /*
584          * use whatever is leftover of the vm86 page layout as a
585          * message buffer so we can capture early output.
586          */
587         msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
588             ctob(3) - sizeof(struct vm86_layout));
589 #endif
590 }
591
592 void
593 vm86_initialize(void)
594 {
595
596         if (pae_mode)
597                 vm86_initialize_pae();
598         else
599                 vm86_initialize_nopae();
600 }
601
602 vm_offset_t
603 vm86_getpage(struct vm86context *vmc, int pagenum)
604 {
605         int i;
606
607         for (i = 0; i < vmc->npages; i++)
608                 if (vmc->pmap[i].pte_num == pagenum)
609                         return (vmc->pmap[i].kva);
610         return (0);
611 }
612
613 vm_offset_t
614 vm86_addpage(struct vm86context *vmc, int pagenum, vm_offset_t kva)
615 {
616         int i, flags = 0;
617
618         for (i = 0; i < vmc->npages; i++)
619                 if (vmc->pmap[i].pte_num == pagenum)
620                         goto overlap;
621
622         if (vmc->npages == VM86_PMAPSIZE)
623                 goto full;                      /* XXX grow map? */
624
625         if (kva == 0) {
626                 kva = (vm_offset_t)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
627                 flags = VMAP_MALLOC;
628         }
629
630         i = vmc->npages++;
631         vmc->pmap[i].flags = flags;
632         vmc->pmap[i].kva = kva;
633         vmc->pmap[i].pte_num = pagenum;
634         return (kva);
635 overlap:
636         panic("vm86_addpage: overlap");
637 full:
638         panic("vm86_addpage: not enough room");
639 }
640
641 /*
642  * called from vm86_bioscall, while in vm86 address space, to finalize setup.
643  */
644 void
645 vm86_prepcall(struct vm86frame *vmf)
646 {
647         struct vm86_kernel *vm86;
648         uint32_t *stack;
649         uint8_t *code;
650
651         code = (void *)0xa00;
652         stack = (void *)(0x1000 - 2);   /* keep aligned */
653         if ((vmf->vmf_trapno & PAGE_MASK) <= 0xff) {
654                 /* interrupt call requested */
655                 code[0] = INTn;
656                 code[1] = vmf->vmf_trapno & 0xff;
657                 code[2] = HLT;
658                 vmf->vmf_ip = (uintptr_t)code;
659                 vmf->vmf_cs = 0;
660         } else {
661                 code[0] = HLT;
662                 stack--;
663                 stack[0] = MAKE_VEC(0, (uintptr_t)code);
664         }
665         vmf->vmf_sp = (uintptr_t)stack;
666         vmf->vmf_ss = 0;
667         vmf->kernel_fs = vmf->kernel_es = vmf->kernel_ds = 0;
668         vmf->vmf_eflags = PSL_VIF | PSL_VM | PSL_USER;
669
670         vm86 = &curpcb->pcb_ext->ext_vm86;
671         if (!vm86->vm86_has_vme) 
672                 vm86->vm86_eflags = vmf->vmf_eflags;  /* save VIF, VIP */
673 }
674
675 /*
676  * vm86 trap handler; determines whether routine succeeded or not.
677  * Called while in vm86 space, returns to calling process.
678  */
679 void
680 vm86_trap(struct vm86frame *vmf)
681 {
682         void (*p)(struct vm86frame *);
683         caddr_t addr;
684
685         /* "should not happen" */
686         if ((vmf->vmf_eflags & PSL_VM) == 0)
687                 panic("vm86_trap called, but not in vm86 mode");
688
689         addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
690         if (*(u_char *)addr == HLT)
691                 vmf->vmf_trapno = vmf->vmf_eflags & PSL_C;
692         else
693                 vmf->vmf_trapno = vmf->vmf_trapno << 16;
694
695         p = (void (*)(struct vm86frame *))((uintptr_t)vm86_biosret +
696             setidt_disp);
697         p(vmf);
698 }
699
700 int
701 vm86_intcall(int intnum, struct vm86frame *vmf)
702 {
703         int (*p)(struct vm86frame *);
704         int retval;
705
706         if (intnum < 0 || intnum > 0xff)
707                 return (EINVAL);
708
709         vmf->vmf_trapno = intnum;
710         p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
711             setidt_disp);
712         mtx_lock(&vm86_lock);
713         critical_enter();
714         retval = p(vmf);
715         critical_exit();
716         mtx_unlock(&vm86_lock);
717         return (retval);
718 }
719
720 /*
721  * struct vm86context contains the page table to use when making
722  * vm86 calls.  If intnum is a valid interrupt number (0-255), then
723  * the "interrupt trampoline" will be used, otherwise we use the
724  * caller's cs:ip routine.  
725  */
726 int
727 vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
728 {
729         uint64_t *pte_pae;
730         uint32_t *pte_nopae;
731         int (*p)(struct vm86frame *);
732         vm_paddr_t page;
733         int i, entry, retval;
734
735         mtx_lock(&vm86_lock);
736         if (pae_mode) {
737                 pte_pae = (uint64_t *)vm86paddr;
738                 for (i = 0; i < vmc->npages; i++) {
739                         page = vtophys(vmc->pmap[i].kva & PG_FRAME_PAE);
740                         entry = vmc->pmap[i].pte_num;
741                         vmc->pmap[i].old_pte = pte_pae[entry];
742                         pte_pae[entry] = page | PG_V | PG_RW | PG_U;
743                         pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
744                 }
745         } else {
746                 pte_nopae = (uint32_t *)vm86paddr;
747                 for (i = 0; i < vmc->npages; i++) {
748                         page = vtophys(vmc->pmap[i].kva & PG_FRAME_NOPAE);
749                         entry = vmc->pmap[i].pte_num;
750                         vmc->pmap[i].old_pte = pte_nopae[entry];
751                         pte_nopae[entry] = page | PG_V | PG_RW | PG_U;
752                         pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
753                 }
754         }
755
756         vmf->vmf_trapno = intnum;
757         p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
758             setidt_disp);
759         critical_enter();
760         retval = p(vmf);
761         critical_exit();
762
763         if (pae_mode) {
764                 for (i = 0; i < vmc->npages; i++) {
765                         entry = vmc->pmap[i].pte_num;
766                         pte_pae[entry] = vmc->pmap[i].old_pte;
767                         pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
768                 }
769         } else {
770                 for (i = 0; i < vmc->npages; i++) {
771                         entry = vmc->pmap[i].pte_num;
772                         pte_nopae[entry] = vmc->pmap[i].old_pte;
773                         pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
774                 }
775         }
776         mtx_unlock(&vm86_lock);
777
778         return (retval);
779 }
780
781 vm_offset_t
782 vm86_getaddr(struct vm86context *vmc, u_short sel, u_short off)
783 {
784         int i, page;
785         vm_offset_t addr;
786
787         addr = (vm_offset_t)MAKE_ADDR(sel, off);
788         page = addr >> PAGE_SHIFT;
789         for (i = 0; i < vmc->npages; i++)
790                 if (page == vmc->pmap[i].pte_num)
791                         return (vmc->pmap[i].kva + (addr & PAGE_MASK));
792         return (0);
793 }
794
795 int
796 vm86_getptr(struct vm86context *vmc, vm_offset_t kva, u_short *sel,
797      u_short *off)
798 {
799         int i;
800
801         for (i = 0; i < vmc->npages; i++)
802                 if (kva >= vmc->pmap[i].kva &&
803                     kva < vmc->pmap[i].kva + PAGE_SIZE) {
804                         *off = kva - vmc->pmap[i].kva;
805                         *sel = vmc->pmap[i].pte_num << 8;
806                         return (1);
807                 }
808         return (0);
809 }
810
811 int
812 vm86_sysarch(struct thread *td, char *args)
813 {
814         int error = 0;
815         struct i386_vm86_args ua;
816         struct vm86_kernel *vm86;
817
818         if ((error = copyin(args, &ua, sizeof(struct i386_vm86_args))) != 0)
819                 return (error);
820
821         if (td->td_pcb->pcb_ext == 0)
822                 if ((error = i386_extend_pcb(td)) != 0)
823                         return (error);
824         vm86 = &td->td_pcb->pcb_ext->ext_vm86;
825
826         switch (ua.sub_op) {
827         case VM86_INIT: {
828                 struct vm86_init_args sa;
829
830                 if ((error = copyin(ua.sub_args, &sa, sizeof(sa))) != 0)
831                         return (error);
832                 if (cpu_feature & CPUID_VME)
833                         vm86->vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
834                 else
835                         vm86->vm86_has_vme = 0;
836                 vm86->vm86_inited = 1;
837                 vm86->vm86_debug = sa.debug;
838                 bcopy(&sa.int_map, vm86->vm86_intmap, 32);
839                 }
840                 break;
841
842 #if 0
843         case VM86_SET_VME: {
844                 struct vm86_vme_args sa;
845
846                 if ((cpu_feature & CPUID_VME) == 0)
847                         return (ENODEV);
848
849                 if (error = copyin(ua.sub_args, &sa, sizeof(sa)))
850                         return (error);
851                 if (sa.state)
852                         load_cr4(rcr4() | CR4_VME);
853                 else
854                         load_cr4(rcr4() & ~CR4_VME);
855                 }
856                 break;
857 #endif
858
859         case VM86_GET_VME: {
860                 struct vm86_vme_args sa;
861
862                 sa.state = (rcr4() & CR4_VME ? 1 : 0);
863                 error = copyout(&sa, ua.sub_args, sizeof(sa));
864                 }
865                 break;
866
867         case VM86_INTCALL: {
868                 struct vm86_intcall_args sa;
869
870                 if ((error = priv_check(td, PRIV_VM86_INTCALL)))
871                         return (error);
872                 if ((error = copyin(ua.sub_args, &sa, sizeof(sa))))
873                         return (error);
874                 if ((error = vm86_intcall(sa.intnum, &sa.vmf)))
875                         return (error);
876                 error = copyout(&sa, ua.sub_args, sizeof(sa));
877                 }
878                 break;
879
880         default:
881                 error = EINVAL;
882         }
883         return (error);
884 }