2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2014 Neel Natu <neel@freebsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/_iovec.h>
37 #include <x86/specialreg.h>
38 #include <machine/vmm.h>
39 #include <machine/vmm_instruction_emul.h>
53 * Using 'struct i386tss' is tempting but causes myriad sign extension
54 * issues because all of its fields are defined as signed integers.
96 static_assert(sizeof(struct tss32) == 104, "compile-time assertion failed");
98 #define SEL_START(sel) (((sel) & ~0x7))
99 #define SEL_LIMIT(sel) (((sel) | 0x7))
100 #define TSS_BUSY(type) (((type) & 0x2) != 0)
103 GETREG(struct vcpu *vcpu, int reg)
108 error = vm_get_register(vcpu, reg, &val);
114 SETREG(struct vcpu *vcpu, int reg, uint64_t val)
118 error = vm_set_register(vcpu, reg, val);
122 static struct seg_desc
123 usd_to_seg_desc(struct user_segment_descriptor *usd)
125 struct seg_desc seg_desc;
127 seg_desc.base = (u_int)USD_GETBASE(usd);
129 seg_desc.limit = (u_int)(USD_GETLIMIT(usd) << 12) | 0xfff;
131 seg_desc.limit = (u_int)USD_GETLIMIT(usd);
132 seg_desc.access = usd->sd_type | usd->sd_dpl << 5 | usd->sd_p << 7;
133 seg_desc.access |= usd->sd_xx << 12;
134 seg_desc.access |= usd->sd_def32 << 14;
135 seg_desc.access |= usd->sd_gran << 15;
141 * Inject an exception with an error code that is a segment selector.
142 * The format of the error code is described in section 6.13, "Error Code",
143 * Intel SDM volume 3.
145 * Bit 0 (EXT) denotes whether the exception occurred during delivery
146 * of an external event like an interrupt.
148 * Bit 1 (IDT) indicates whether the selector points to a gate descriptor
151 * Bit 2(GDT/LDT) has the usual interpretation of Table Indicator (TI).
154 sel_exception(struct vcpu *vcpu, int vector, uint16_t sel, int ext)
157 * Bit 2 from the selector is retained as-is in the error code.
159 * Bit 1 can be safely cleared because none of the selectors
160 * encountered during task switch emulation refer to a task
163 * Bit 0 is set depending on the value of 'ext'.
168 vm_inject_fault(vcpu, vector, 1, sel);
172 * Return 0 if the selector 'sel' in within the limits of the GDT/LDT
173 * and non-zero otherwise.
176 desc_table_limit_check(struct vcpu *vcpu, uint16_t sel)
179 uint32_t limit, access;
182 reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
183 error = vm_get_desc(vcpu, reg, &base, &limit, &access);
186 if (reg == VM_REG_GUEST_LDTR) {
187 if (SEG_DESC_UNUSABLE(access) || !SEG_DESC_PRESENT(access))
191 if (limit < SEL_LIMIT(sel))
198 * Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced
199 * by the selector 'sel'.
201 * Returns 0 on success.
202 * Returns 1 if an exception was injected into the guest.
203 * Returns -1 otherwise.
206 desc_table_rw(struct vcpu *vcpu, struct vm_guest_paging *paging,
207 uint16_t sel, struct user_segment_descriptor *desc, bool doread,
212 uint32_t limit, access;
215 reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
216 error = vm_get_desc(vcpu, reg, &base, &limit, &access);
218 assert(limit >= SEL_LIMIT(sel));
220 error = vm_copy_setup(vcpu, paging, base + SEL_START(sel),
221 sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov),
223 if (error || *faultptr)
227 vm_copyin(iov, desc, sizeof(*desc));
229 vm_copyout(desc, iov, sizeof(*desc));
234 desc_table_read(struct vcpu *vcpu, struct vm_guest_paging *paging,
235 uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
237 return (desc_table_rw(vcpu, paging, sel, desc, true, faultptr));
241 desc_table_write(struct vcpu *vcpu, struct vm_guest_paging *paging,
242 uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
244 return (desc_table_rw(vcpu, paging, sel, desc, false, faultptr));
248 * Read the TSS descriptor referenced by 'sel' into 'desc'.
250 * Returns 0 on success.
251 * Returns 1 if an exception was injected into the guest.
252 * Returns -1 otherwise.
255 read_tss_descriptor(struct vcpu *vcpu, struct vm_task_switch *ts,
256 uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
258 struct vm_guest_paging sup_paging;
262 assert(IDXSEL(sel) != 0);
264 /* Fetch the new TSS descriptor */
265 if (desc_table_limit_check(vcpu, sel)) {
266 if (ts->reason == TSR_IRET)
267 sel_exception(vcpu, IDT_TS, sel, ts->ext);
269 sel_exception(vcpu, IDT_GP, sel, ts->ext);
273 sup_paging = ts->paging;
274 sup_paging.cpl = 0; /* implicit supervisor mode */
275 error = desc_table_read(vcpu, &sup_paging, sel, desc, faultptr);
280 code_desc(int sd_type)
282 /* code descriptor */
283 return ((sd_type & 0x18) == 0x18);
287 stack_desc(int sd_type)
289 /* writable data descriptor */
290 return ((sd_type & 0x1A) == 0x12);
294 data_desc(int sd_type)
296 /* data descriptor or a readable code descriptor */
297 return ((sd_type & 0x18) == 0x10 || (sd_type & 0x1A) == 0x1A);
301 ldt_desc(int sd_type)
304 return (sd_type == SDT_SYSLDT);
308 * Validate the descriptor 'seg_desc' associated with 'segment'.
311 validate_seg_desc(struct vcpu *vcpu, struct vm_task_switch *ts,
312 int segment, struct seg_desc *seg_desc, int *faultptr)
314 struct vm_guest_paging sup_paging;
315 struct user_segment_descriptor usd;
319 bool ldtseg, codeseg, stackseg, dataseg, conforming;
321 ldtseg = codeseg = stackseg = dataseg = false;
323 case VM_REG_GUEST_LDTR:
326 case VM_REG_GUEST_CS:
329 case VM_REG_GUEST_SS:
332 case VM_REG_GUEST_DS:
333 case VM_REG_GUEST_ES:
334 case VM_REG_GUEST_FS:
335 case VM_REG_GUEST_GS:
342 /* Get the segment selector */
343 sel = GETREG(vcpu, segment);
345 /* LDT selector must point into the GDT */
346 if (ldtseg && ISLDT(sel)) {
347 sel_exception(vcpu, IDT_TS, sel, ts->ext);
351 /* Descriptor table limit check */
352 if (desc_table_limit_check(vcpu, sel)) {
353 sel_exception(vcpu, IDT_TS, sel, ts->ext);
358 if (IDXSEL(sel) == 0) {
359 /* Code and stack segment selectors cannot be NULL */
360 if (codeseg || stackseg) {
361 sel_exception(vcpu, IDT_TS, sel, ts->ext);
366 seg_desc->access = 0x10000; /* unusable */
370 /* Read the descriptor from the GDT/LDT */
371 sup_paging = ts->paging;
372 sup_paging.cpl = 0; /* implicit supervisor mode */
373 error = desc_table_read(vcpu, &sup_paging, sel, &usd, faultptr);
374 if (error || *faultptr)
377 /* Verify that the descriptor type is compatible with the segment */
378 if ((ldtseg && !ldt_desc(usd.sd_type)) ||
379 (codeseg && !code_desc(usd.sd_type)) ||
380 (dataseg && !data_desc(usd.sd_type)) ||
381 (stackseg && !stack_desc(usd.sd_type))) {
382 sel_exception(vcpu, IDT_TS, sel, ts->ext);
386 /* Segment must be marked present */
394 sel_exception(vcpu, idtvec, sel, ts->ext);
398 cs = GETREG(vcpu, VM_REG_GUEST_CS);
399 cpl = cs & SEL_RPL_MASK;
400 rpl = sel & SEL_RPL_MASK;
403 if (stackseg && (rpl != cpl || dpl != cpl)) {
404 sel_exception(vcpu, IDT_TS, sel, ts->ext);
409 conforming = (usd.sd_type & 0x4) ? true : false;
410 if ((conforming && (cpl < dpl)) ||
411 (!conforming && (cpl != dpl))) {
412 sel_exception(vcpu, IDT_TS, sel, ts->ext);
419 * A data segment is always non-conforming except when it's
420 * descriptor is a readable, conforming code segment.
422 if (code_desc(usd.sd_type) && (usd.sd_type & 0x4) != 0)
427 if (!conforming && (rpl > dpl || cpl > dpl)) {
428 sel_exception(vcpu, IDT_TS, sel, ts->ext);
432 *seg_desc = usd_to_seg_desc(&usd);
437 tss32_save(struct vcpu *vcpu, struct vm_task_switch *task_switch,
438 uint32_t eip, struct tss32 *tss, struct iovec *iov)
441 /* General purpose registers */
442 tss->tss_eax = GETREG(vcpu, VM_REG_GUEST_RAX);
443 tss->tss_ecx = GETREG(vcpu, VM_REG_GUEST_RCX);
444 tss->tss_edx = GETREG(vcpu, VM_REG_GUEST_RDX);
445 tss->tss_ebx = GETREG(vcpu, VM_REG_GUEST_RBX);
446 tss->tss_esp = GETREG(vcpu, VM_REG_GUEST_RSP);
447 tss->tss_ebp = GETREG(vcpu, VM_REG_GUEST_RBP);
448 tss->tss_esi = GETREG(vcpu, VM_REG_GUEST_RSI);
449 tss->tss_edi = GETREG(vcpu, VM_REG_GUEST_RDI);
451 /* Segment selectors */
452 tss->tss_es = GETREG(vcpu, VM_REG_GUEST_ES);
453 tss->tss_cs = GETREG(vcpu, VM_REG_GUEST_CS);
454 tss->tss_ss = GETREG(vcpu, VM_REG_GUEST_SS);
455 tss->tss_ds = GETREG(vcpu, VM_REG_GUEST_DS);
456 tss->tss_fs = GETREG(vcpu, VM_REG_GUEST_FS);
457 tss->tss_gs = GETREG(vcpu, VM_REG_GUEST_GS);
460 tss->tss_eflags = GETREG(vcpu, VM_REG_GUEST_RFLAGS);
461 if (task_switch->reason == TSR_IRET)
462 tss->tss_eflags &= ~PSL_NT;
465 /* Copy updated old TSS into guest memory */
466 vm_copyout(tss, iov, sizeof(struct tss32));
470 update_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *sd)
474 error = vm_set_desc(vcpu, reg, sd->base, sd->limit, sd->access);
479 * Update the vcpu registers to reflect the state of the new task.
482 tss32_restore(struct vmctx *ctx, struct vcpu *vcpu, struct vm_task_switch *ts,
483 uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr)
485 struct seg_desc seg_desc, seg_desc2;
486 uint64_t *pdpte, maxphyaddr, reserved;
492 if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) {
493 tss->tss_link = ot_sel;
497 eflags = tss->tss_eflags;
502 SETREG(vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt);
505 if (ts->paging.paging_mode != PAGING_MODE_FLAT) {
506 if (ts->paging.paging_mode == PAGING_MODE_PAE) {
508 * XXX Assuming 36-bit MAXPHYADDR.
510 maxphyaddr = (1UL << 36) - 1;
511 pdpte = paddr_guest2host(ctx, tss->tss_cr3 & ~0x1f, 32);
512 for (i = 0; i < 4; i++) {
513 /* Check reserved bits if the PDPTE is valid */
514 if (!(pdpte[i] & 0x1))
517 * Bits 2:1, 8:5 and bits above the processor's
518 * maximum physical address are reserved.
520 reserved = ~maxphyaddr | 0x1E6;
521 if (pdpte[i] & reserved) {
526 SETREG(vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]);
527 SETREG(vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]);
528 SETREG(vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]);
529 SETREG(vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]);
531 SETREG(vcpu, VM_REG_GUEST_CR3, tss->tss_cr3);
532 ts->paging.cr3 = tss->tss_cr3;
536 SETREG(vcpu, VM_REG_GUEST_RFLAGS, eflags);
537 SETREG(vcpu, VM_REG_GUEST_RIP, tss->tss_eip);
539 /* General purpose registers */
540 SETREG(vcpu, VM_REG_GUEST_RAX, tss->tss_eax);
541 SETREG(vcpu, VM_REG_GUEST_RCX, tss->tss_ecx);
542 SETREG(vcpu, VM_REG_GUEST_RDX, tss->tss_edx);
543 SETREG(vcpu, VM_REG_GUEST_RBX, tss->tss_ebx);
544 SETREG(vcpu, VM_REG_GUEST_RSP, tss->tss_esp);
545 SETREG(vcpu, VM_REG_GUEST_RBP, tss->tss_ebp);
546 SETREG(vcpu, VM_REG_GUEST_RSI, tss->tss_esi);
547 SETREG(vcpu, VM_REG_GUEST_RDI, tss->tss_edi);
549 /* Segment selectors */
550 SETREG(vcpu, VM_REG_GUEST_ES, tss->tss_es);
551 SETREG(vcpu, VM_REG_GUEST_CS, tss->tss_cs);
552 SETREG(vcpu, VM_REG_GUEST_SS, tss->tss_ss);
553 SETREG(vcpu, VM_REG_GUEST_DS, tss->tss_ds);
554 SETREG(vcpu, VM_REG_GUEST_FS, tss->tss_fs);
555 SETREG(vcpu, VM_REG_GUEST_GS, tss->tss_gs);
558 * If this is a nested task then write out the new TSS to update
559 * the previous link field.
562 vm_copyout(tss, iov, sizeof(*tss));
564 /* Validate segment descriptors */
565 error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc,
567 if (error || *faultptr)
569 update_seg_desc(vcpu, VM_REG_GUEST_LDTR, &seg_desc);
572 * Section "Checks on Guest Segment Registers", Intel SDM, Vol 3.
574 * The SS and CS attribute checks on VM-entry are inter-dependent so
575 * we need to make sure that both segments are valid before updating
576 * either of them. This ensures that the VMCS state can pass the
577 * VM-entry checks so the guest can handle any exception injected
578 * during task switch emulation.
580 error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_CS, &seg_desc,
582 if (error || *faultptr)
585 error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_SS, &seg_desc2,
587 if (error || *faultptr)
589 update_seg_desc(vcpu, VM_REG_GUEST_CS, &seg_desc);
590 update_seg_desc(vcpu, VM_REG_GUEST_SS, &seg_desc2);
591 ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
593 error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_DS, &seg_desc,
595 if (error || *faultptr)
597 update_seg_desc(vcpu, VM_REG_GUEST_DS, &seg_desc);
599 error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_ES, &seg_desc,
601 if (error || *faultptr)
603 update_seg_desc(vcpu, VM_REG_GUEST_ES, &seg_desc);
605 error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_FS, &seg_desc,
607 if (error || *faultptr)
609 update_seg_desc(vcpu, VM_REG_GUEST_FS, &seg_desc);
611 error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_GS, &seg_desc,
613 if (error || *faultptr)
615 update_seg_desc(vcpu, VM_REG_GUEST_GS, &seg_desc);
621 * Push an error code on the stack of the new task. This is needed if the
622 * task switch was triggered by a hardware exception that causes an error
623 * code to be saved (e.g. #PF).
626 push_errcode(struct vcpu *vcpu, struct vm_guest_paging *paging,
627 int task_type, uint32_t errcode, int *faultptr)
630 struct seg_desc seg_desc;
631 int stacksize, bytes, error;
632 uint64_t gla, cr0, rflags;
638 cr0 = GETREG(vcpu, VM_REG_GUEST_CR0);
639 rflags = GETREG(vcpu, VM_REG_GUEST_RFLAGS);
640 stacksel = GETREG(vcpu, VM_REG_GUEST_SS);
642 error = vm_get_desc(vcpu, VM_REG_GUEST_SS, &seg_desc.base,
643 &seg_desc.limit, &seg_desc.access);
647 * Section "Error Code" in the Intel SDM vol 3: the error code is
648 * pushed on the stack as a doubleword or word (depending on the
649 * default interrupt, trap or task gate size).
651 if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS)
657 * PUSH instruction from Intel SDM vol 2: the 'B' flag in the
658 * stack-segment descriptor determines the size of the stack
659 * pointer outside of 64-bit mode.
661 if (SEG_DESC_DEF32(seg_desc.access))
666 esp = GETREG(vcpu, VM_REG_GUEST_RSP);
669 if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
670 &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
671 sel_exception(vcpu, IDT_SS, stacksel, 1);
676 if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
677 vm_inject_ac(vcpu, 1);
682 error = vm_copy_setup(vcpu, paging, gla, bytes, PROT_WRITE,
683 iov, nitems(iov), faultptr);
684 if (error || *faultptr)
687 vm_copyout(&errcode, iov, bytes);
688 SETREG(vcpu, VM_REG_GUEST_RSP, esp);
693 * Evaluate return value from helper functions and potentially return to
696 #define CHKERR(error,fault) \
698 assert((error == 0) || (error == EFAULT)); \
700 return (VMEXIT_ABORT); \
702 return (VMEXIT_CONTINUE); \
706 vmexit_task_switch(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
709 struct tss32 oldtss, newtss;
710 struct vm_task_switch *task_switch;
711 struct vm_guest_paging *paging, sup_paging;
712 struct user_segment_descriptor nt_desc, ot_desc;
713 struct iovec nt_iov[2], ot_iov[2];
714 struct vm_exit *vmexit;
715 uint64_t cr0, ot_base;
716 uint32_t eip, ot_lim, access;
717 int error, ext, fault, minlimit, nt_type, ot_type;
718 enum task_switch_reason reason;
719 uint16_t nt_sel, ot_sel;
721 vmexit = vmrun->vm_exit;
722 task_switch = &vmexit->u.task_switch;
723 nt_sel = task_switch->tsssel;
724 ext = vmexit->u.task_switch.ext;
725 reason = vmexit->u.task_switch.reason;
726 paging = &vmexit->u.task_switch.paging;
728 assert(paging->cpu_mode == CPU_MODE_PROTECTED);
731 * Calculate the instruction pointer to store in the old TSS.
733 eip = vmexit->rip + vmexit->inst_length;
736 * Section 4.6, "Access Rights" in Intel SDM Vol 3.
737 * The following page table accesses are implicitly supervisor mode:
738 * - accesses to GDT or LDT to load segment descriptors
739 * - accesses to the task state segment during task switch
741 sup_paging = *paging;
742 sup_paging.cpl = 0; /* implicit supervisor mode */
744 /* Fetch the new TSS descriptor */
745 error = read_tss_descriptor(vcpu, task_switch, nt_sel, &nt_desc,
747 CHKERR(error, fault);
749 nt = usd_to_seg_desc(&nt_desc);
751 /* Verify the type of the new TSS */
752 nt_type = SEG_DESC_TYPE(nt.access);
753 if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS &&
754 nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) {
755 sel_exception(vcpu, IDT_TS, nt_sel, ext);
759 /* TSS descriptor must have present bit set */
760 if (!SEG_DESC_PRESENT(nt.access)) {
761 sel_exception(vcpu, IDT_NP, nt_sel, ext);
766 * TSS must have a minimum length of 104 bytes for a 32-bit TSS and
767 * 44 bytes for a 16-bit TSS.
769 if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS)
771 else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS)
776 assert(minlimit > 0);
777 if (nt.limit < (unsigned int)minlimit) {
778 sel_exception(vcpu, IDT_TS, nt_sel, ext);
782 /* TSS must be busy if task switch is due to IRET */
783 if (reason == TSR_IRET && !TSS_BUSY(nt_type)) {
784 sel_exception(vcpu, IDT_TS, nt_sel, ext);
789 * TSS must be available (not busy) if task switch reason is
790 * CALL, JMP, exception or interrupt.
792 if (reason != TSR_IRET && TSS_BUSY(nt_type)) {
793 sel_exception(vcpu, IDT_GP, nt_sel, ext);
797 /* Fetch the new TSS */
798 error = vm_copy_setup(vcpu, &sup_paging, nt.base, minlimit + 1,
799 PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault);
800 CHKERR(error, fault);
801 vm_copyin(nt_iov, &newtss, minlimit + 1);
803 /* Get the old TSS selector from the guest's task register */
804 ot_sel = GETREG(vcpu, VM_REG_GUEST_TR);
805 if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) {
807 * This might happen if a task switch was attempted without
808 * ever loading the task register with LTR. In this case the
809 * TR would contain the values from power-on:
810 * (sel = 0, base = 0, limit = 0xffff).
812 sel_exception(vcpu, IDT_TS, ot_sel, task_switch->ext);
816 /* Get the old TSS base and limit from the guest's task register */
817 error = vm_get_desc(vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim,
820 assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access));
821 ot_type = SEG_DESC_TYPE(access);
822 assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
824 /* Fetch the old TSS descriptor */
825 error = read_tss_descriptor(vcpu, task_switch, ot_sel, &ot_desc,
827 CHKERR(error, fault);
829 /* Get the old TSS */
830 error = vm_copy_setup(vcpu, &sup_paging, ot_base, minlimit + 1,
831 PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault);
832 CHKERR(error, fault);
833 vm_copyin(ot_iov, &oldtss, minlimit + 1);
836 * Clear the busy bit in the old TSS descriptor if the task switch
837 * due to an IRET or JMP instruction.
839 if (reason == TSR_IRET || reason == TSR_JMP) {
840 ot_desc.sd_type &= ~0x2;
841 error = desc_table_write(vcpu, &sup_paging, ot_sel,
843 CHKERR(error, fault);
846 if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
847 EPRINTLN("Task switch to 16-bit TSS not supported");
848 return (VMEXIT_ABORT);
851 /* Save processor state in old TSS */
852 tss32_save(vcpu, task_switch, eip, &oldtss, ot_iov);
855 * If the task switch was triggered for any reason other than IRET
856 * then set the busy bit in the new TSS descriptor.
858 if (reason != TSR_IRET) {
859 nt_desc.sd_type |= 0x2;
860 error = desc_table_write(vcpu, &sup_paging, nt_sel,
862 CHKERR(error, fault);
865 /* Update task register to point at the new TSS */
866 SETREG(vcpu, VM_REG_GUEST_TR, nt_sel);
868 /* Update the hidden descriptor state of the task register */
869 nt = usd_to_seg_desc(&nt_desc);
870 update_seg_desc(vcpu, VM_REG_GUEST_TR, &nt);
873 cr0 = GETREG(vcpu, VM_REG_GUEST_CR0);
874 SETREG(vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS);
877 * We are now committed to the task switch. Any exceptions encountered
878 * after this point will be handled in the context of the new task and
879 * the saved instruction pointer will belong to the new task.
881 error = vm_set_register(vcpu, VM_REG_GUEST_RIP, newtss.tss_eip);
884 /* Load processor state from new TSS */
885 error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov,
887 CHKERR(error, fault);
890 * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
891 * caused an error code to be generated, this error code is copied
892 * to the stack of the new task.
894 if (task_switch->errcode_valid) {
895 assert(task_switch->ext);
896 assert(task_switch->reason == TSR_IDT_GATE);
897 error = push_errcode(vcpu, &task_switch->paging, nt_type,
898 task_switch->errcode, &fault);
899 CHKERR(error, fault);
903 * Treatment of virtual-NMI blocking if NMI is delivered through
906 * Section "Architectural State Before A VM Exit", Intel SDM, Vol3:
907 * If the virtual NMIs VM-execution control is 1, VM entry injects
908 * an NMI, and delivery of the NMI causes a task switch that causes
909 * a VM exit, virtual-NMI blocking is in effect before the VM exit
912 * Thus, virtual-NMI blocking is in effect at the time of the task
917 * Treatment of virtual-NMI unblocking on IRET from NMI handler task.
919 * Section "Changes to Instruction Behavior in VMX Non-Root Operation"
920 * If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking.
921 * This unblocking of virtual-NMI occurs even if IRET causes a fault.
923 * Thus, virtual-NMI blocking is cleared at the time of the task switch
928 * If the task switch was triggered by an event delivered through
929 * the IDT then extinguish the pending event from the vcpu's
932 if (task_switch->reason == TSR_IDT_GATE) {
933 error = vm_set_intinfo(vcpu, 0);
938 * XXX should inject debug exception if 'T' bit is 1
941 return (VMEXIT_CONTINUE);