]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/i386/i386/vm86.c
zfs: merge openzfs/zfs@2e6b3c4d9
[FreeBSD/FreeBSD.git] / sys / i386 / i386 / vm86.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1997 Jonathan Lemon
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/priv.h>
32 #include <sys/proc.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/mutex.h>
36
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39 #include <vm/vm_map.h>
40 #include <vm/vm_page.h>
41
42 #include <machine/md_var.h>
43 #include <machine/pcb.h>
44 #include <machine/pcb_ext.h>
45 #include <machine/psl.h>
46 #include <machine/specialreg.h>
47 #include <machine/sysarch.h>
48
49 extern int vm86pa;
50 extern struct pcb *vm86pcb;
51
52 static struct mtx vm86_lock;
53
54 extern int vm86_bioscall(struct vm86frame *);
55 extern void vm86_biosret(struct vm86frame *);
56
57 void vm86_prepcall(struct vm86frame *);
58
59 struct system_map {
60         int             type;
61         vm_offset_t     start;
62         vm_offset_t     end;
63 };
64
65 #define HLT     0xf4
66 #define CLI     0xfa
67 #define STI     0xfb
68 #define PUSHF   0x9c
69 #define POPF    0x9d
70 #define INTn    0xcd
71 #define IRET    0xcf
72 #define CALLm   0xff
73 #define OPERAND_SIZE_PREFIX     0x66
74 #define ADDRESS_SIZE_PREFIX     0x67
75 #define PUSH_MASK       ~(PSL_VM | PSL_RF | PSL_I)
76 #define POP_MASK        ~(PSL_VIP | PSL_VIF | PSL_VM | PSL_RF | PSL_IOPL)
77
78 static int
79 vm86_suword16(volatile void *base, int word)
80 {
81
82         if (curthread->td_critnest != 0) {
83                 *(volatile uint16_t *)base = word;
84                 return (0);
85         }
86         return (suword16(base, word));
87 }
88
89 static int
90 vm86_suword(volatile void *base, long word)
91 {
92
93         if (curthread->td_critnest != 0) {
94                 *(volatile long *)base = word;
95                 return (0);
96         }
97         return (suword(base, word));
98 }
99
100 static int
101 vm86_fubyte(volatile const void *base)
102 {
103
104         if (curthread->td_critnest != 0)
105                 return (*(volatile const u_char *)base);
106         return (fubyte(base));
107 }
108
109 static int
110 vm86_fuword16(volatile const void *base)
111 {
112
113         if (curthread->td_critnest != 0)
114                 return (*(volatile const uint16_t *)base);
115         return (fuword16(base));
116 }
117
118 static long
119 vm86_fuword(volatile const void *base)
120 {
121
122         if (curthread->td_critnest != 0)
123                 return (*(volatile const long *)base);
124         return (fuword(base));
125 }
126
127 static __inline caddr_t
128 MAKE_ADDR(u_short sel, u_short off)
129 {
130         return ((caddr_t)((sel << 4) + off));
131 }
132
133 static __inline void
134 GET_VEC(u_int vec, u_short *sel, u_short *off)
135 {
136         *sel = vec >> 16;
137         *off = vec & 0xffff;
138 }
139
140 static __inline u_int
141 MAKE_VEC(u_short sel, u_short off)
142 {
143         return ((sel << 16) | off);
144 }
145
146 static __inline void
147 PUSH(u_short x, struct vm86frame *vmf)
148 {
149         vmf->vmf_sp -= 2;
150         vm86_suword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
151 }
152
153 static __inline void
154 PUSHL(u_int x, struct vm86frame *vmf)
155 {
156         vmf->vmf_sp -= 4;
157         vm86_suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
158 }
159
160 static __inline u_short
161 POP(struct vm86frame *vmf)
162 {
163         u_short x = vm86_fuword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
164
165         vmf->vmf_sp += 2;
166         return (x);
167 }
168
169 static __inline u_int
170 POPL(struct vm86frame *vmf)
171 {
172         u_int x = vm86_fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
173
174         vmf->vmf_sp += 4;
175         return (x);
176 }
177
178 int
179 vm86_emulate(struct vm86frame *vmf)
180 {
181         struct vm86_kernel *vm86;
182         caddr_t addr;
183         u_char i_byte;
184         u_int temp_flags;
185         int inc_ip = 1;
186         int retcode = 0;
187
188         /*
189          * pcb_ext contains the address of the extension area, or zero if
190          * the extension is not present.  (This check should not be needed,
191          * as we can't enter vm86 mode until we set up an extension area)
192          */
193         if (curpcb->pcb_ext == 0)
194                 return (SIGBUS);
195         vm86 = &curpcb->pcb_ext->ext_vm86;
196
197         if (vmf->vmf_eflags & PSL_T)
198                 retcode = SIGTRAP;
199
200         addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
201         i_byte = vm86_fubyte(addr);
202         if (i_byte == ADDRESS_SIZE_PREFIX) {
203                 i_byte = vm86_fubyte(++addr);
204                 inc_ip++;
205         }
206
207         if (vm86->vm86_has_vme) {
208                 switch (i_byte) {
209                 case OPERAND_SIZE_PREFIX:
210                         i_byte = vm86_fubyte(++addr);
211                         inc_ip++;
212                         switch (i_byte) {
213                         case PUSHF:
214                                 if (vmf->vmf_eflags & PSL_VIF)
215                                         PUSHL((vmf->vmf_eflags & PUSH_MASK)
216                                             | PSL_IOPL | PSL_I, vmf);
217                                 else
218                                         PUSHL((vmf->vmf_eflags & PUSH_MASK)
219                                             | PSL_IOPL, vmf);
220                                 vmf->vmf_ip += inc_ip;
221                                 return (retcode);
222
223                         case POPF:
224                                 temp_flags = POPL(vmf) & POP_MASK;
225                                 vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
226                                     | temp_flags | PSL_VM | PSL_I;
227                                 vmf->vmf_ip += inc_ip;
228                                 if (temp_flags & PSL_I) {
229                                         vmf->vmf_eflags |= PSL_VIF;
230                                         if (vmf->vmf_eflags & PSL_VIP)
231                                                 break;
232                                 } else {
233                                         vmf->vmf_eflags &= ~PSL_VIF;
234                                 }
235                                 return (retcode);
236                         }
237                         break;
238
239                 /* VME faults here if VIP is set, but does not set VIF. */
240                 case STI:
241                         vmf->vmf_eflags |= PSL_VIF;
242                         vmf->vmf_ip += inc_ip;
243                         if ((vmf->vmf_eflags & PSL_VIP) == 0) {
244                                 uprintf("fatal sti\n");
245                                 return (SIGKILL);
246                         }
247                         break;
248
249                 /* VME if no redirection support */
250                 case INTn:
251                         break;
252
253                 /* VME if trying to set PSL_T, or PSL_I when VIP is set */
254                 case POPF:
255                         temp_flags = POP(vmf) & POP_MASK;
256                         vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
257                             | temp_flags | PSL_VM | PSL_I;
258                         vmf->vmf_ip += inc_ip;
259                         if (temp_flags & PSL_I) {
260                                 vmf->vmf_eflags |= PSL_VIF;
261                                 if (vmf->vmf_eflags & PSL_VIP)
262                                         break;
263                         } else {
264                                 vmf->vmf_eflags &= ~PSL_VIF;
265                         }
266                         return (retcode);
267
268                 /* VME if trying to set PSL_T, or PSL_I when VIP is set */
269                 case IRET:
270                         vmf->vmf_ip = POP(vmf);
271                         vmf->vmf_cs = POP(vmf);
272                         temp_flags = POP(vmf) & POP_MASK;
273                         vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
274                             | temp_flags | PSL_VM | PSL_I;
275                         if (temp_flags & PSL_I) {
276                                 vmf->vmf_eflags |= PSL_VIF;
277                                 if (vmf->vmf_eflags & PSL_VIP)
278                                         break;
279                         } else {
280                                 vmf->vmf_eflags &= ~PSL_VIF;
281                         }
282                         return (retcode);
283                 }
284                 return (SIGBUS);
285         }
286
287         switch (i_byte) {
288         case OPERAND_SIZE_PREFIX:
289                 i_byte = vm86_fubyte(++addr);
290                 inc_ip++;
291                 switch (i_byte) {
292                 case PUSHF:
293                         if (vm86->vm86_eflags & PSL_VIF)
294                                 PUSHL((vmf->vmf_flags & PUSH_MASK)
295                                     | PSL_IOPL | PSL_I, vmf);
296                         else
297                                 PUSHL((vmf->vmf_flags & PUSH_MASK)
298                                     | PSL_IOPL, vmf);
299                         vmf->vmf_ip += inc_ip;
300                         return (retcode);
301
302                 case POPF:
303                         temp_flags = POPL(vmf) & POP_MASK;
304                         vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
305                             | temp_flags | PSL_VM | PSL_I;
306                         vmf->vmf_ip += inc_ip;
307                         if (temp_flags & PSL_I) {
308                                 vm86->vm86_eflags |= PSL_VIF;
309                                 if (vm86->vm86_eflags & PSL_VIP)
310                                         break;
311                         } else {
312                                 vm86->vm86_eflags &= ~PSL_VIF;
313                         }
314                         return (retcode);
315                 }
316                 return (SIGBUS);
317
318         case CLI:
319                 vm86->vm86_eflags &= ~PSL_VIF;
320                 vmf->vmf_ip += inc_ip;
321                 return (retcode);
322
323         case STI:
324                 /* if there is a pending interrupt, go to the emulator */
325                 vm86->vm86_eflags |= PSL_VIF;
326                 vmf->vmf_ip += inc_ip;
327                 if (vm86->vm86_eflags & PSL_VIP)
328                         break;
329                 return (retcode);
330
331         case PUSHF:
332                 if (vm86->vm86_eflags & PSL_VIF)
333                         PUSH((vmf->vmf_flags & PUSH_MASK)
334                             | PSL_IOPL | PSL_I, vmf);
335                 else
336                         PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
337                 vmf->vmf_ip += inc_ip;
338                 return (retcode);
339
340         case INTn:
341                 i_byte = vm86_fubyte(addr + 1);
342                 if ((vm86->vm86_intmap[i_byte >> 3] & (1 << (i_byte & 7))) != 0)
343                         break;
344                 if (vm86->vm86_eflags & PSL_VIF)
345                         PUSH((vmf->vmf_flags & PUSH_MASK)
346                             | PSL_IOPL | PSL_I, vmf);
347                 else
348                         PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
349                 PUSH(vmf->vmf_cs, vmf);
350                 PUSH(vmf->vmf_ip + inc_ip + 1, vmf);    /* increment IP */
351                 GET_VEC(vm86_fuword((caddr_t)(i_byte * 4)),
352                      &vmf->vmf_cs, &vmf->vmf_ip);
353                 vmf->vmf_flags &= ~PSL_T;
354                 vm86->vm86_eflags &= ~PSL_VIF;
355                 return (retcode);
356
357         case IRET:
358                 vmf->vmf_ip = POP(vmf);
359                 vmf->vmf_cs = POP(vmf);
360                 temp_flags = POP(vmf) & POP_MASK;
361                 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
362                     | temp_flags | PSL_VM | PSL_I;
363                 if (temp_flags & PSL_I) {
364                         vm86->vm86_eflags |= PSL_VIF;
365                         if (vm86->vm86_eflags & PSL_VIP)
366                                 break;
367                 } else {
368                         vm86->vm86_eflags &= ~PSL_VIF;
369                 }
370                 return (retcode);
371
372         case POPF:
373                 temp_flags = POP(vmf) & POP_MASK;
374                 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
375                     | temp_flags | PSL_VM | PSL_I;
376                 vmf->vmf_ip += inc_ip;
377                 if (temp_flags & PSL_I) {
378                         vm86->vm86_eflags |= PSL_VIF;
379                         if (vm86->vm86_eflags & PSL_VIP)
380                                 break;
381                 } else {
382                         vm86->vm86_eflags &= ~PSL_VIF;
383                 }
384                 return (retcode);
385         }
386         return (SIGBUS);
387 }
388
389 #define PGTABLE_SIZE    ((1024 + 64) * 1024 / PAGE_SIZE)
390 #define INTMAP_SIZE     32
391 #define IOMAP_SIZE      ctob(IOPAGES)
392 #define TSS_SIZE \
393         (sizeof(struct pcb_ext) - sizeof(struct segment_descriptor) + \
394          INTMAP_SIZE + IOMAP_SIZE + 1)
395
396 struct vm86_layout_pae {
397         uint64_t        vml_pgtbl[PGTABLE_SIZE];
398         struct  pcb vml_pcb;
399         struct  pcb_ext vml_ext;
400         char    vml_intmap[INTMAP_SIZE];
401         char    vml_iomap[IOMAP_SIZE];
402         char    vml_iomap_trailer;
403 };
404
405 struct vm86_layout_nopae {
406         uint32_t        vml_pgtbl[PGTABLE_SIZE];
407         struct  pcb vml_pcb;
408         struct  pcb_ext vml_ext;
409         char    vml_intmap[INTMAP_SIZE];
410         char    vml_iomap[IOMAP_SIZE];
411         char    vml_iomap_trailer;
412 };
413
414 _Static_assert(sizeof(struct vm86_layout_pae) <= ctob(3),
415     "struct vm86_layout_pae exceeds space allocated in locore.s");
416 _Static_assert(sizeof(struct vm86_layout_nopae) <= ctob(3),
417     "struct vm86_layout_nopae exceeds space allocated in locore.s");
418
419 static void
420 vm86_initialize_pae(void)
421 {
422         int i;
423         u_int *addr;
424         struct vm86_layout_pae *vml;
425         struct pcb *pcb;
426         struct pcb_ext *ext;
427         struct soft_segment_descriptor ssd = {
428                 0,                      /* segment base address (overwritten) */
429                 0,                      /* length (overwritten) */
430                 SDT_SYS386TSS,          /* segment type */
431                 0,                      /* priority level */
432                 1,                      /* descriptor present */
433                 0, 0,
434                 0,                      /* default 16 size */
435                 0                       /* granularity */
436         };
437
438         /*
439          * Below is the memory layout that we use for the vm86 region.
440          *
441          * +--------+
442          * |        | 
443          * |        |
444          * | page 0 |       
445          * |        | +--------+
446          * |        | | stack  |
447          * +--------+ +--------+ <--------- vm86paddr
448          * |        | |Page Tbl| 1M + 64K = 272 entries = 1088 bytes
449          * |        | +--------+
450          * |        | |  PCB   | size: ~240 bytes
451          * | page 1 | |PCB Ext | size: ~140 bytes (includes TSS)
452          * |        | +--------+
453          * |        | |int map |
454          * |        | +--------+
455          * +--------+ |        |
456          * | page 2 | |  I/O   |
457          * +--------+ | bitmap |
458          * | page 3 | |        |
459          * |        | +--------+
460          * +--------+ 
461          */
462
463         /*
464          * A rudimentary PCB must be installed, in order to get to the
465          * PCB extension area.  We use the PCB area as a scratchpad for
466          * data storage, the layout of which is shown below.
467          *
468          * pcb_esi      = new PTD entry 0
469          * pcb_ebp      = pointer to frame on vm86 stack
470          * pcb_esp      =    stack frame pointer at time of switch
471          * pcb_ebx      = va of vm86 page table
472          * pcb_eip      =    argument pointer to initial call
473          * pcb_vm86[0]  =    saved TSS descriptor, word 0
474          * pcb_vm86[1]  =    saved TSS descriptor, word 1
475          */
476 #define new_ptd         pcb_esi
477 #define vm86_frame      pcb_ebp
478 #define pgtable_va      pcb_ebx
479
480         vml = (struct vm86_layout_pae *)vm86paddr;
481         pcb = &vml->vml_pcb;
482         ext = &vml->vml_ext;
483
484         mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF);
485
486         bzero(pcb, sizeof(struct pcb));
487         pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
488         pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
489         pcb->pgtable_va = vm86paddr;
490         pcb->pcb_flags = PCB_VM86CALL;
491         pcb->pcb_ext = ext;
492
493         bzero(ext, sizeof(struct pcb_ext));
494         ext->ext_tss.tss_esp0 = vm86paddr;
495         ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
496         ext->ext_tss.tss_ioopt =
497                 ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
498         ext->ext_iomap = vml->vml_iomap;
499         ext->ext_vm86.vm86_intmap = vml->vml_intmap;
500
501         if (cpu_feature & CPUID_VME)
502                 ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
503
504         addr = (u_int *)ext->ext_vm86.vm86_intmap;
505         for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
506                 *addr++ = 0;
507         vml->vml_iomap_trailer = 0xff;
508
509         ssd.ssd_base = (u_int)&ext->ext_tss;
510         ssd.ssd_limit = TSS_SIZE - 1;
511         ssdtosd(&ssd, &ext->ext_tssd);
512
513         vm86pcb = pcb;
514
515 #if 0
516         /*
517          * use whatever is leftover of the vm86 page layout as a
518          * message buffer so we can capture early output.
519          */
520         msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
521             ctob(3) - sizeof(struct vm86_layout));
522 #endif
523 }
524
525 static void
526 vm86_initialize_nopae(void)
527 {
528         int i;
529         u_int *addr;
530         struct vm86_layout_nopae *vml;
531         struct pcb *pcb;
532         struct pcb_ext *ext;
533         struct soft_segment_descriptor ssd = {
534                 0,                      /* segment base address (overwritten) */
535                 0,                      /* length (overwritten) */
536                 SDT_SYS386TSS,          /* segment type */
537                 0,                      /* priority level */
538                 1,                      /* descriptor present */
539                 0, 0,
540                 0,                      /* default 16 size */
541                 0                       /* granularity */
542         };
543
544         vml = (struct vm86_layout_nopae *)vm86paddr;
545         pcb = &vml->vml_pcb;
546         ext = &vml->vml_ext;
547
548         mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF);
549
550         bzero(pcb, sizeof(struct pcb));
551         pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
552         pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
553         pcb->pgtable_va = vm86paddr;
554         pcb->pcb_flags = PCB_VM86CALL;
555         pcb->pcb_ext = ext;
556
557         bzero(ext, sizeof(struct pcb_ext));
558         ext->ext_tss.tss_esp0 = vm86paddr;
559         ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
560         ext->ext_tss.tss_ioopt =
561                 ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
562         ext->ext_iomap = vml->vml_iomap;
563         ext->ext_vm86.vm86_intmap = vml->vml_intmap;
564
565         if (cpu_feature & CPUID_VME)
566                 ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
567
568         addr = (u_int *)ext->ext_vm86.vm86_intmap;
569         for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
570                 *addr++ = 0;
571         vml->vml_iomap_trailer = 0xff;
572
573         ssd.ssd_base = (u_int)&ext->ext_tss;
574         ssd.ssd_limit = TSS_SIZE - 1;
575         ssdtosd(&ssd, &ext->ext_tssd);
576
577         vm86pcb = pcb;
578
579 #if 0
580         /*
581          * use whatever is leftover of the vm86 page layout as a
582          * message buffer so we can capture early output.
583          */
584         msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
585             ctob(3) - sizeof(struct vm86_layout));
586 #endif
587 }
588
589 void
590 vm86_initialize(void)
591 {
592
593         if (pae_mode)
594                 vm86_initialize_pae();
595         else
596                 vm86_initialize_nopae();
597 }
598
599 vm_offset_t
600 vm86_getpage(struct vm86context *vmc, int pagenum)
601 {
602         int i;
603
604         for (i = 0; i < vmc->npages; i++)
605                 if (vmc->pmap[i].pte_num == pagenum)
606                         return (vmc->pmap[i].kva);
607         return (0);
608 }
609
610 vm_offset_t
611 vm86_addpage(struct vm86context *vmc, int pagenum, vm_offset_t kva)
612 {
613         int i, flags = 0;
614
615         for (i = 0; i < vmc->npages; i++)
616                 if (vmc->pmap[i].pte_num == pagenum)
617                         goto overlap;
618
619         if (vmc->npages == VM86_PMAPSIZE)
620                 goto full;                      /* XXX grow map? */
621
622         if (kva == 0) {
623                 kva = (vm_offset_t)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
624                 flags = VMAP_MALLOC;
625         }
626
627         i = vmc->npages++;
628         vmc->pmap[i].flags = flags;
629         vmc->pmap[i].kva = kva;
630         vmc->pmap[i].pte_num = pagenum;
631         return (kva);
632 overlap:
633         panic("vm86_addpage: overlap");
634 full:
635         panic("vm86_addpage: not enough room");
636 }
637
638 /*
639  * called from vm86_bioscall, while in vm86 address space, to finalize setup.
640  */
641 void
642 vm86_prepcall(struct vm86frame *vmf)
643 {
644         struct vm86_kernel *vm86;
645         uint32_t *stack;
646         uint8_t *code;
647
648         code = (void *)0xa00;
649         stack = (void *)(0x1000 - 2);   /* keep aligned */
650         if ((vmf->vmf_trapno & PAGE_MASK) <= 0xff) {
651                 /* interrupt call requested */
652                 code[0] = INTn;
653                 code[1] = vmf->vmf_trapno & 0xff;
654                 code[2] = HLT;
655                 vmf->vmf_ip = (uintptr_t)code;
656                 vmf->vmf_cs = 0;
657         } else {
658                 code[0] = HLT;
659                 stack--;
660                 stack[0] = MAKE_VEC(0, (uintptr_t)code);
661         }
662         vmf->vmf_sp = (uintptr_t)stack;
663         vmf->vmf_ss = 0;
664         vmf->kernel_fs = vmf->kernel_es = vmf->kernel_ds = 0;
665         vmf->vmf_eflags = PSL_VIF | PSL_VM | PSL_USER;
666
667         vm86 = &curpcb->pcb_ext->ext_vm86;
668         if (!vm86->vm86_has_vme) 
669                 vm86->vm86_eflags = vmf->vmf_eflags;  /* save VIF, VIP */
670 }
671
672 /*
673  * vm86 trap handler; determines whether routine succeeded or not.
674  * Called while in vm86 space, returns to calling process.
675  */
676 void
677 vm86_trap(struct vm86frame *vmf)
678 {
679         void (*p)(struct vm86frame *);
680         caddr_t addr;
681
682         /* "should not happen" */
683         if ((vmf->vmf_eflags & PSL_VM) == 0)
684                 panic("vm86_trap called, but not in vm86 mode");
685
686         addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
687         if (*(u_char *)addr == HLT)
688                 vmf->vmf_trapno = vmf->vmf_eflags & PSL_C;
689         else
690                 vmf->vmf_trapno = vmf->vmf_trapno << 16;
691
692         p = (void (*)(struct vm86frame *))((uintptr_t)vm86_biosret +
693             setidt_disp);
694         p(vmf);
695 }
696
697 int
698 vm86_intcall(int intnum, struct vm86frame *vmf)
699 {
700         int (*p)(struct vm86frame *);
701         int retval;
702
703         if (intnum < 0 || intnum > 0xff)
704                 return (EINVAL);
705
706         vmf->vmf_trapno = intnum;
707         p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
708             setidt_disp);
709         mtx_lock(&vm86_lock);
710         critical_enter();
711         retval = p(vmf);
712         critical_exit();
713         mtx_unlock(&vm86_lock);
714         return (retval);
715 }
716
717 /*
718  * struct vm86context contains the page table to use when making
719  * vm86 calls.  If intnum is a valid interrupt number (0-255), then
720  * the "interrupt trampoline" will be used, otherwise we use the
721  * caller's cs:ip routine.  
722  */
723 int
724 vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
725 {
726         uint64_t *pte_pae;
727         uint32_t *pte_nopae;
728         int (*p)(struct vm86frame *);
729         vm_paddr_t page;
730         int i, entry, retval;
731
732         mtx_lock(&vm86_lock);
733         if (pae_mode) {
734                 pte_pae = (uint64_t *)vm86paddr;
735                 for (i = 0; i < vmc->npages; i++) {
736                         page = vtophys(vmc->pmap[i].kva & PG_FRAME_PAE);
737                         entry = vmc->pmap[i].pte_num;
738                         vmc->pmap[i].old_pte = pte_pae[entry];
739                         pte_pae[entry] = page | PG_V | PG_RW | PG_U;
740                         pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
741                 }
742         } else {
743                 pte_nopae = (uint32_t *)vm86paddr;
744                 for (i = 0; i < vmc->npages; i++) {
745                         page = vtophys(vmc->pmap[i].kva & PG_FRAME_NOPAE);
746                         entry = vmc->pmap[i].pte_num;
747                         vmc->pmap[i].old_pte = pte_nopae[entry];
748                         pte_nopae[entry] = page | PG_V | PG_RW | PG_U;
749                         pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
750                 }
751         }
752
753         vmf->vmf_trapno = intnum;
754         p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
755             setidt_disp);
756         critical_enter();
757         retval = p(vmf);
758         critical_exit();
759
760         if (pae_mode) {
761                 for (i = 0; i < vmc->npages; i++) {
762                         entry = vmc->pmap[i].pte_num;
763                         pte_pae[entry] = vmc->pmap[i].old_pte;
764                         pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
765                 }
766         } else {
767                 for (i = 0; i < vmc->npages; i++) {
768                         entry = vmc->pmap[i].pte_num;
769                         pte_nopae[entry] = vmc->pmap[i].old_pte;
770                         pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
771                 }
772         }
773         mtx_unlock(&vm86_lock);
774
775         return (retval);
776 }
777
778 vm_offset_t
779 vm86_getaddr(struct vm86context *vmc, u_short sel, u_short off)
780 {
781         int i, page;
782         vm_offset_t addr;
783
784         addr = (vm_offset_t)MAKE_ADDR(sel, off);
785         page = addr >> PAGE_SHIFT;
786         for (i = 0; i < vmc->npages; i++)
787                 if (page == vmc->pmap[i].pte_num)
788                         return (vmc->pmap[i].kva + (addr & PAGE_MASK));
789         return (0);
790 }
791
792 int
793 vm86_getptr(struct vm86context *vmc, vm_offset_t kva, u_short *sel,
794      u_short *off)
795 {
796         int i;
797
798         for (i = 0; i < vmc->npages; i++)
799                 if (kva >= vmc->pmap[i].kva &&
800                     kva < vmc->pmap[i].kva + PAGE_SIZE) {
801                         *off = kva - vmc->pmap[i].kva;
802                         *sel = vmc->pmap[i].pte_num << 8;
803                         return (1);
804                 }
805         return (0);
806 }
807
808 int
809 vm86_sysarch(struct thread *td, char *args)
810 {
811         int error = 0;
812         struct i386_vm86_args ua;
813         struct vm86_kernel *vm86;
814
815         if ((error = copyin(args, &ua, sizeof(struct i386_vm86_args))) != 0)
816                 return (error);
817
818         if (td->td_pcb->pcb_ext == 0)
819                 if ((error = i386_extend_pcb(td)) != 0)
820                         return (error);
821         vm86 = &td->td_pcb->pcb_ext->ext_vm86;
822
823         switch (ua.sub_op) {
824         case VM86_INIT: {
825                 struct vm86_init_args sa;
826
827                 if ((error = copyin(ua.sub_args, &sa, sizeof(sa))) != 0)
828                         return (error);
829                 if (cpu_feature & CPUID_VME)
830                         vm86->vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
831                 else
832                         vm86->vm86_has_vme = 0;
833                 vm86->vm86_inited = 1;
834                 vm86->vm86_debug = sa.debug;
835                 bcopy(&sa.int_map, vm86->vm86_intmap, 32);
836                 }
837                 break;
838
839 #if 0
840         case VM86_SET_VME: {
841                 struct vm86_vme_args sa;
842
843                 if ((cpu_feature & CPUID_VME) == 0)
844                         return (ENODEV);
845
846                 if (error = copyin(ua.sub_args, &sa, sizeof(sa)))
847                         return (error);
848                 if (sa.state)
849                         load_cr4(rcr4() | CR4_VME);
850                 else
851                         load_cr4(rcr4() & ~CR4_VME);
852                 }
853                 break;
854 #endif
855
856         case VM86_GET_VME: {
857                 struct vm86_vme_args sa;
858
859                 sa.state = (rcr4() & CR4_VME ? 1 : 0);
860                 error = copyout(&sa, ua.sub_args, sizeof(sa));
861                 }
862                 break;
863
864         case VM86_INTCALL: {
865                 struct vm86_intcall_args sa;
866
867                 if ((error = priv_check(td, PRIV_VM86_INTCALL)))
868                         return (error);
869                 if ((error = copyin(ua.sub_args, &sa, sizeof(sa))))
870                         return (error);
871                 if ((error = vm86_intcall(sa.intnum, &sa.vmf)))
872                         return (error);
873                 error = copyout(&sa, ua.sub_args, sizeof(sa));
874                 }
875                 break;
876
877         default:
878                 error = EINVAL;
879         }
880         return (error);
881 }