2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 1997 Jonathan Lemon
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/mutex.h>
42 #include <vm/vm_map.h>
43 #include <vm/vm_page.h>
45 #include <machine/md_var.h>
46 #include <machine/pcb.h>
47 #include <machine/pcb_ext.h>
48 #include <machine/psl.h>
49 #include <machine/specialreg.h>
50 #include <machine/sysarch.h>
53 extern struct pcb *vm86pcb;
55 static struct mtx vm86_lock;
57 extern int vm86_bioscall(struct vm86frame *);
58 extern void vm86_biosret(struct vm86frame *);
60 void vm86_prepcall(struct vm86frame *);
76 #define OPERAND_SIZE_PREFIX 0x66
77 #define ADDRESS_SIZE_PREFIX 0x67
78 #define PUSH_MASK ~(PSL_VM | PSL_RF | PSL_I)
79 #define POP_MASK ~(PSL_VIP | PSL_VIF | PSL_VM | PSL_RF | PSL_IOPL)
82 vm86_suword16(volatile void *base, int word)
85 if (curthread->td_critnest != 0) {
86 *(volatile uint16_t *)base = word;
89 return (suword16(base, word));
93 vm86_suword(volatile void *base, long word)
96 if (curthread->td_critnest != 0) {
97 *(volatile long *)base = word;
100 return (suword(base, word));
104 vm86_fubyte(volatile const void *base)
107 if (curthread->td_critnest != 0)
108 return (*(volatile const u_char *)base);
109 return (fubyte(base));
113 vm86_fuword16(volatile const void *base)
116 if (curthread->td_critnest != 0)
117 return (*(volatile const uint16_t *)base);
118 return (fuword16(base));
122 vm86_fuword(volatile const void *base)
125 if (curthread->td_critnest != 0)
126 return (*(volatile const long *)base);
127 return (fuword(base));
130 static __inline caddr_t
131 MAKE_ADDR(u_short sel, u_short off)
133 return ((caddr_t)((sel << 4) + off));
137 GET_VEC(u_int vec, u_short *sel, u_short *off)
143 static __inline u_int
144 MAKE_VEC(u_short sel, u_short off)
146 return ((sel << 16) | off);
150 PUSH(u_short x, struct vm86frame *vmf)
153 vm86_suword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
157 PUSHL(u_int x, struct vm86frame *vmf)
160 vm86_suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
163 static __inline u_short
164 POP(struct vm86frame *vmf)
166 u_short x = vm86_fuword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
172 static __inline u_int
173 POPL(struct vm86frame *vmf)
175 u_int x = vm86_fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
182 vm86_emulate(struct vm86frame *vmf)
184 struct vm86_kernel *vm86;
192 * pcb_ext contains the address of the extension area, or zero if
193 * the extension is not present. (This check should not be needed,
194 * as we can't enter vm86 mode until we set up an extension area)
196 if (curpcb->pcb_ext == 0)
198 vm86 = &curpcb->pcb_ext->ext_vm86;
200 if (vmf->vmf_eflags & PSL_T)
203 addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
204 i_byte = vm86_fubyte(addr);
205 if (i_byte == ADDRESS_SIZE_PREFIX) {
206 i_byte = vm86_fubyte(++addr);
210 if (vm86->vm86_has_vme) {
212 case OPERAND_SIZE_PREFIX:
213 i_byte = vm86_fubyte(++addr);
217 if (vmf->vmf_eflags & PSL_VIF)
218 PUSHL((vmf->vmf_eflags & PUSH_MASK)
219 | PSL_IOPL | PSL_I, vmf);
221 PUSHL((vmf->vmf_eflags & PUSH_MASK)
223 vmf->vmf_ip += inc_ip;
227 temp_flags = POPL(vmf) & POP_MASK;
228 vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
229 | temp_flags | PSL_VM | PSL_I;
230 vmf->vmf_ip += inc_ip;
231 if (temp_flags & PSL_I) {
232 vmf->vmf_eflags |= PSL_VIF;
233 if (vmf->vmf_eflags & PSL_VIP)
236 vmf->vmf_eflags &= ~PSL_VIF;
242 /* VME faults here if VIP is set, but does not set VIF. */
244 vmf->vmf_eflags |= PSL_VIF;
245 vmf->vmf_ip += inc_ip;
246 if ((vmf->vmf_eflags & PSL_VIP) == 0) {
247 uprintf("fatal sti\n");
252 /* VME if no redirection support */
256 /* VME if trying to set PSL_T, or PSL_I when VIP is set */
258 temp_flags = POP(vmf) & POP_MASK;
259 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
260 | temp_flags | PSL_VM | PSL_I;
261 vmf->vmf_ip += inc_ip;
262 if (temp_flags & PSL_I) {
263 vmf->vmf_eflags |= PSL_VIF;
264 if (vmf->vmf_eflags & PSL_VIP)
267 vmf->vmf_eflags &= ~PSL_VIF;
271 /* VME if trying to set PSL_T, or PSL_I when VIP is set */
273 vmf->vmf_ip = POP(vmf);
274 vmf->vmf_cs = POP(vmf);
275 temp_flags = POP(vmf) & POP_MASK;
276 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
277 | temp_flags | PSL_VM | PSL_I;
278 if (temp_flags & PSL_I) {
279 vmf->vmf_eflags |= PSL_VIF;
280 if (vmf->vmf_eflags & PSL_VIP)
283 vmf->vmf_eflags &= ~PSL_VIF;
292 case OPERAND_SIZE_PREFIX:
293 i_byte = vm86_fubyte(++addr);
297 if (vm86->vm86_eflags & PSL_VIF)
298 PUSHL((vmf->vmf_flags & PUSH_MASK)
299 | PSL_IOPL | PSL_I, vmf);
301 PUSHL((vmf->vmf_flags & PUSH_MASK)
303 vmf->vmf_ip += inc_ip;
307 temp_flags = POPL(vmf) & POP_MASK;
308 vmf->vmf_eflags = (vmf->vmf_eflags & ~POP_MASK)
309 | temp_flags | PSL_VM | PSL_I;
310 vmf->vmf_ip += inc_ip;
311 if (temp_flags & PSL_I) {
312 vm86->vm86_eflags |= PSL_VIF;
313 if (vm86->vm86_eflags & PSL_VIP)
316 vm86->vm86_eflags &= ~PSL_VIF;
323 vm86->vm86_eflags &= ~PSL_VIF;
324 vmf->vmf_ip += inc_ip;
328 /* if there is a pending interrupt, go to the emulator */
329 vm86->vm86_eflags |= PSL_VIF;
330 vmf->vmf_ip += inc_ip;
331 if (vm86->vm86_eflags & PSL_VIP)
336 if (vm86->vm86_eflags & PSL_VIF)
337 PUSH((vmf->vmf_flags & PUSH_MASK)
338 | PSL_IOPL | PSL_I, vmf);
340 PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
341 vmf->vmf_ip += inc_ip;
345 i_byte = vm86_fubyte(addr + 1);
346 if ((vm86->vm86_intmap[i_byte >> 3] & (1 << (i_byte & 7))) != 0)
348 if (vm86->vm86_eflags & PSL_VIF)
349 PUSH((vmf->vmf_flags & PUSH_MASK)
350 | PSL_IOPL | PSL_I, vmf);
352 PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
353 PUSH(vmf->vmf_cs, vmf);
354 PUSH(vmf->vmf_ip + inc_ip + 1, vmf); /* increment IP */
355 GET_VEC(vm86_fuword((caddr_t)(i_byte * 4)),
356 &vmf->vmf_cs, &vmf->vmf_ip);
357 vmf->vmf_flags &= ~PSL_T;
358 vm86->vm86_eflags &= ~PSL_VIF;
362 vmf->vmf_ip = POP(vmf);
363 vmf->vmf_cs = POP(vmf);
364 temp_flags = POP(vmf) & POP_MASK;
365 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
366 | temp_flags | PSL_VM | PSL_I;
367 if (temp_flags & PSL_I) {
368 vm86->vm86_eflags |= PSL_VIF;
369 if (vm86->vm86_eflags & PSL_VIP)
372 vm86->vm86_eflags &= ~PSL_VIF;
377 temp_flags = POP(vmf) & POP_MASK;
378 vmf->vmf_flags = (vmf->vmf_flags & ~POP_MASK)
379 | temp_flags | PSL_VM | PSL_I;
380 vmf->vmf_ip += inc_ip;
381 if (temp_flags & PSL_I) {
382 vm86->vm86_eflags |= PSL_VIF;
383 if (vm86->vm86_eflags & PSL_VIP)
386 vm86->vm86_eflags &= ~PSL_VIF;
393 #define PGTABLE_SIZE ((1024 + 64) * 1024 / PAGE_SIZE)
394 #define INTMAP_SIZE 32
395 #define IOMAP_SIZE ctob(IOPAGES)
397 (sizeof(struct pcb_ext) - sizeof(struct segment_descriptor) + \
398 INTMAP_SIZE + IOMAP_SIZE + 1)
401 pt_entry_t vml_pgtbl[PGTABLE_SIZE];
403 struct pcb_ext vml_ext;
404 char vml_intmap[INTMAP_SIZE];
405 char vml_iomap[IOMAP_SIZE];
406 char vml_iomap_trailer;
410 vm86_initialize(void)
414 struct vm86_layout *vml = (struct vm86_layout *)vm86paddr;
417 struct soft_segment_descriptor ssd = {
418 0, /* segment base address (overwritten) */
419 0, /* length (overwritten) */
420 SDT_SYS386TSS, /* segment type */
421 0, /* priority level */
422 1, /* descriptor present */
424 0, /* default 16 size */
429 * this should be a compile time error, but cpp doesn't grok sizeof().
431 if (sizeof(struct vm86_layout) > ctob(3))
432 panic("struct vm86_layout exceeds space allocated in locore.s");
435 * Below is the memory layout that we use for the vm86 region.
443 * +--------+ +--------+ <--------- vm86paddr
444 * | | |Page Tbl| 1M + 64K = 272 entries = 1088 bytes
446 * | | | PCB | size: ~240 bytes
447 * | page 1 | |PCB Ext | size: ~140 bytes (includes TSS)
453 * +--------+ | bitmap |
460 * A rudimentary PCB must be installed, in order to get to the
461 * PCB extension area. We use the PCB area as a scratchpad for
462 * data storage, the layout of which is shown below.
464 * pcb_esi = new PTD entry 0
465 * pcb_ebp = pointer to frame on vm86 stack
466 * pcb_esp = stack frame pointer at time of switch
467 * pcb_ebx = va of vm86 page table
468 * pcb_eip = argument pointer to initial call
469 * pcb_vm86[0] = saved TSS descriptor, word 0
470 * pcb_vm86[1] = saved TSS descriptor, word 1
472 #define new_ptd pcb_esi
473 #define vm86_frame pcb_ebp
474 #define pgtable_va pcb_ebx
479 mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF);
481 bzero(pcb, sizeof(struct pcb));
482 pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U;
483 pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame);
484 pcb->pgtable_va = vm86paddr;
485 pcb->pcb_flags = PCB_VM86CALL;
488 bzero(ext, sizeof(struct pcb_ext));
489 ext->ext_tss.tss_esp0 = vm86paddr;
490 ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
491 ext->ext_tss.tss_ioopt =
492 ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16;
493 ext->ext_iomap = vml->vml_iomap;
494 ext->ext_vm86.vm86_intmap = vml->vml_intmap;
496 if (cpu_feature & CPUID_VME)
497 ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
499 addr = (u_int *)ext->ext_vm86.vm86_intmap;
500 for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++)
502 vml->vml_iomap_trailer = 0xff;
504 ssd.ssd_base = (u_int)&ext->ext_tss;
505 ssd.ssd_limit = TSS_SIZE - 1;
506 ssdtosd(&ssd, &ext->ext_tssd);
512 * use whatever is leftover of the vm86 page layout as a
513 * message buffer so we can capture early output.
515 msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout),
516 ctob(3) - sizeof(struct vm86_layout));
521 vm86_getpage(struct vm86context *vmc, int pagenum)
525 for (i = 0; i < vmc->npages; i++)
526 if (vmc->pmap[i].pte_num == pagenum)
527 return (vmc->pmap[i].kva);
532 vm86_addpage(struct vm86context *vmc, int pagenum, vm_offset_t kva)
536 for (i = 0; i < vmc->npages; i++)
537 if (vmc->pmap[i].pte_num == pagenum)
540 if (vmc->npages == VM86_PMAPSIZE)
541 goto full; /* XXX grow map? */
544 kva = (vm_offset_t)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
549 vmc->pmap[i].flags = flags;
550 vmc->pmap[i].kva = kva;
551 vmc->pmap[i].pte_num = pagenum;
554 panic("vm86_addpage: overlap");
556 panic("vm86_addpage: not enough room");
560 * called from vm86_bioscall, while in vm86 address space, to finalize setup.
563 vm86_prepcall(struct vm86frame *vmf)
565 struct vm86_kernel *vm86;
569 code = (void *)0xa00;
570 stack = (void *)(0x1000 - 2); /* keep aligned */
571 if ((vmf->vmf_trapno & PAGE_MASK) <= 0xff) {
572 /* interrupt call requested */
574 code[1] = vmf->vmf_trapno & 0xff;
576 vmf->vmf_ip = (uintptr_t)code;
581 stack[0] = MAKE_VEC(0, (uintptr_t)code);
583 vmf->vmf_sp = (uintptr_t)stack;
585 vmf->kernel_fs = vmf->kernel_es = vmf->kernel_ds = 0;
586 vmf->vmf_eflags = PSL_VIF | PSL_VM | PSL_USER;
588 vm86 = &curpcb->pcb_ext->ext_vm86;
589 if (!vm86->vm86_has_vme)
590 vm86->vm86_eflags = vmf->vmf_eflags; /* save VIF, VIP */
594 * vm86 trap handler; determines whether routine succeeded or not.
595 * Called while in vm86 space, returns to calling process.
598 vm86_trap(struct vm86frame *vmf)
600 void (*p)(struct vm86frame *);
603 /* "should not happen" */
604 if ((vmf->vmf_eflags & PSL_VM) == 0)
605 panic("vm86_trap called, but not in vm86 mode");
607 addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
608 if (*(u_char *)addr == HLT)
609 vmf->vmf_trapno = vmf->vmf_eflags & PSL_C;
611 vmf->vmf_trapno = vmf->vmf_trapno << 16;
613 p = (void (*)(struct vm86frame *))((uintptr_t)vm86_biosret +
619 vm86_intcall(int intnum, struct vm86frame *vmf)
621 int (*p)(struct vm86frame *);
624 if (intnum < 0 || intnum > 0xff)
627 vmf->vmf_trapno = intnum;
628 p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
630 mtx_lock(&vm86_lock);
634 mtx_unlock(&vm86_lock);
639 * struct vm86context contains the page table to use when making
640 * vm86 calls. If intnum is a valid interrupt number (0-255), then
641 * the "interrupt trampoline" will be used, otherwise we use the
642 * caller's cs:ip routine.
645 vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
648 int (*p)(struct vm86frame *);
650 int i, entry, retval;
652 pte = (pt_entry_t *)vm86paddr;
653 mtx_lock(&vm86_lock);
654 for (i = 0; i < vmc->npages; i++) {
655 page = vtophys(vmc->pmap[i].kva & PG_FRAME);
656 entry = vmc->pmap[i].pte_num;
657 vmc->pmap[i].old_pte = pte[entry];
658 pte[entry] = page | PG_V | PG_RW | PG_U;
659 pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
662 vmf->vmf_trapno = intnum;
663 p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
669 for (i = 0; i < vmc->npages; i++) {
670 entry = vmc->pmap[i].pte_num;
671 pte[entry] = vmc->pmap[i].old_pte;
672 pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
674 mtx_unlock(&vm86_lock);
680 vm86_getaddr(struct vm86context *vmc, u_short sel, u_short off)
685 addr = (vm_offset_t)MAKE_ADDR(sel, off);
686 page = addr >> PAGE_SHIFT;
687 for (i = 0; i < vmc->npages; i++)
688 if (page == vmc->pmap[i].pte_num)
689 return (vmc->pmap[i].kva + (addr & PAGE_MASK));
694 vm86_getptr(struct vm86context *vmc, vm_offset_t kva, u_short *sel,
699 for (i = 0; i < vmc->npages; i++)
700 if (kva >= vmc->pmap[i].kva &&
701 kva < vmc->pmap[i].kva + PAGE_SIZE) {
702 *off = kva - vmc->pmap[i].kva;
703 *sel = vmc->pmap[i].pte_num << 8;
710 vm86_sysarch(struct thread *td, char *args)
713 struct i386_vm86_args ua;
714 struct vm86_kernel *vm86;
716 if ((error = copyin(args, &ua, sizeof(struct i386_vm86_args))) != 0)
719 if (td->td_pcb->pcb_ext == 0)
720 if ((error = i386_extend_pcb(td)) != 0)
722 vm86 = &td->td_pcb->pcb_ext->ext_vm86;
726 struct vm86_init_args sa;
728 if ((error = copyin(ua.sub_args, &sa, sizeof(sa))) != 0)
730 if (cpu_feature & CPUID_VME)
731 vm86->vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0);
733 vm86->vm86_has_vme = 0;
734 vm86->vm86_inited = 1;
735 vm86->vm86_debug = sa.debug;
736 bcopy(&sa.int_map, vm86->vm86_intmap, 32);
742 struct vm86_vme_args sa;
744 if ((cpu_feature & CPUID_VME) == 0)
747 if (error = copyin(ua.sub_args, &sa, sizeof(sa)))
750 load_cr4(rcr4() | CR4_VME);
752 load_cr4(rcr4() & ~CR4_VME);
758 struct vm86_vme_args sa;
760 sa.state = (rcr4() & CR4_VME ? 1 : 0);
761 error = copyout(&sa, ua.sub_args, sizeof(sa));
766 struct vm86_intcall_args sa;
768 if ((error = priv_check(td, PRIV_VM86_INTCALL)))
770 if ((error = copyin(ua.sub_args, &sa, sizeof(sa))))
772 if ((error = vm86_intcall(sa.intnum, &sa.vmf)))
774 error = copyout(&sa, ua.sub_args, sizeof(sa));