sys/amd64/vmm/vmm.c

   1 /*-
   2  * Copyright (c) 2011 NetApp, Inc.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  *
  26  * $FreeBSD$
  27  */
  28
  29 #include <sys/cdefs.h>
  30 __FBSDID("$FreeBSD$");
  31
  32 #include <sys/param.h>
  33 #include <sys/systm.h>
  34 #include <sys/kernel.h>
  35 #include <sys/module.h>
  36 #include <sys/sysctl.h>
  37 #include <sys/malloc.h>
  38 #include <sys/pcpu.h>
  39 #include <sys/lock.h>
  40 #include <sys/mutex.h>
  41 #include <sys/proc.h>
  42 #include <sys/rwlock.h>
  43 #include <sys/sched.h>
  44 #include <sys/smp.h>
  45 #include <sys/systm.h>
  46
  47 #include <vm/vm.h>
  48 #include <vm/vm_object.h>
  49 #include <vm/vm_page.h>
  50 #include <vm/pmap.h>
  51 #include <vm/vm_map.h>
  52 #include <vm/vm_extern.h>
  53 #include <vm/vm_param.h>
  54
  55 #include <machine/cpu.h>
  56 #include <machine/vm.h>
  57 #include <machine/pcb.h>
  58 #include <machine/smp.h>
  59 #include <x86/psl.h>
  60 #include <x86/apicreg.h>
  61 #include <machine/vmparam.h>
  62
  63 #include <machine/vmm.h>
  64 #include <machine/vmm_dev.h>
  65
  66 #include "vmm_ktr.h"
  67 #include "vmm_host.h"
  68 #include "vmm_mem.h"
  69 #include "vmm_util.h"
  70 #include "vhpet.h"
  71 #include "vioapic.h"
  72 #include "vlapic.h"
  73 #include "vmm_msr.h"
  74 #include "vmm_ipi.h"
  75 #include "vmm_stat.h"
  76 #include "vmm_lapic.h"
  77
  78 #include "io/ppt.h"
  79 #include "io/iommu.h"
  80
  81 struct vlapic;
  82
  83 struct vcpu {
  84         int             flags;
  85         enum vcpu_state state;
  86         struct mtx      mtx;
  87         int             hostcpu;        /* host cpuid this vcpu last ran on */
  88         uint64_t        guest_msrs[VMM_MSR_NUM];
  89         struct vlapic   *vlapic;
  90         int              vcpuid;
  91         struct savefpu  *guestfpu;      /* guest fpu state */
  92         void            *stats;
  93         struct vm_exit  exitinfo;
  94         enum x2apic_state x2apic_state;
  95         int             nmi_pending;
  96 };
  97
  98 #define vcpu_lock_init(v)       mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
  99 #define vcpu_lock(v)            mtx_lock_spin(&((v)->mtx))
 100 #define vcpu_unlock(v)          mtx_unlock_spin(&((v)->mtx))
 101 #define vcpu_assert_locked(v)   mtx_assert(&((v)->mtx), MA_OWNED)
 102
 103 struct mem_seg {
 104         vm_paddr_t      gpa;
 105         size_t          len;
 106         boolean_t       wired;
 107         vm_object_t     object;
 108 };
 109 #define VM_MAX_MEMORY_SEGMENTS  2
 110
 111 struct vm {
 112         void            *cookie;        /* processor-specific data */
 113         void            *iommu;         /* iommu-specific data */
 114         struct vhpet    *vhpet;         /* virtual HPET */
 115         struct vioapic  *vioapic;       /* virtual ioapic */
 116         struct vmspace  *vmspace;       /* guest's address space */
 117         struct vcpu     vcpu[VM_MAXCPU];
 118         int             num_mem_segs;
 119         struct mem_seg  mem_segs[VM_MAX_MEMORY_SEGMENTS];
 120         char            name[VM_MAX_NAMELEN];
 121
 122         /*
 123          * Set of active vcpus.
 124          * An active vcpu is one that has been started implicitly (BSP) or
 125          * explicitly (AP) by sending it a startup ipi.
 126          */
 127         cpuset_t        active_cpus;
 128 };
 129
 130 static int vmm_initialized;
 131
 132 static struct vmm_ops *ops;
 133 #define VMM_INIT()      (ops != NULL ? (*ops->init)() : 0)
 134 #define VMM_CLEANUP()   (ops != NULL ? (*ops->cleanup)() : 0)
 135 #define VMM_RESUME()    (ops != NULL ? (*ops->resume)() : 0)
 136
 137 #define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
 138 #define VMRUN(vmi, vcpu, rip, pmap) \
 139         (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap) : ENXIO)
 140 #define VMCLEANUP(vmi)  (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
 141 #define VMSPACE_ALLOC(min, max) \
 142         (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
 143 #define VMSPACE_FREE(vmspace) \
 144         (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
 145 #define VMGETREG(vmi, vcpu, num, retval)                \
 146         (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
 147 #define VMSETREG(vmi, vcpu, num, val)           \
 148         (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
 149 #define VMGETDESC(vmi, vcpu, num, desc)         \
 150         (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
 151 #define VMSETDESC(vmi, vcpu, num, desc)         \
 152         (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
 153 #define VMINJECT(vmi, vcpu, type, vec, ec, ecv) \
 154         (ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO)
 155 #define VMGETCAP(vmi, vcpu, num, retval)        \
 156         (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
 157 #define VMSETCAP(vmi, vcpu, num, val)           \
 158         (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
 159 #define VLAPIC_INIT(vmi, vcpu)                  \
 160         (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
 161 #define VLAPIC_CLEANUP(vmi, vlapic)             \
 162         (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
 163
 164 #define fpu_start_emulating()   load_cr0(rcr0() | CR0_TS)
 165 #define fpu_stop_emulating()    clts()
 166
 167 static MALLOC_DEFINE(M_VM, "vm", "vm");
 168 CTASSERT(VMM_MSR_NUM <= 64);    /* msr_mask can keep track of up to 64 msrs */
 169
 170 /* statistics */
 171 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
 172
 173 static void
 174 vcpu_cleanup(struct vm *vm, int i)
 175 {
 176         struct vcpu *vcpu = &vm->vcpu[i];
 177
 178         VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
 179         vmm_stat_free(vcpu->stats);
 180         fpu_save_area_free(vcpu->guestfpu);
 181 }
 182
 183 static void
 184 vcpu_init(struct vm *vm, uint32_t vcpu_id)
 185 {
 186         struct vcpu *vcpu;
 187
 188         vcpu = &vm->vcpu[vcpu_id];
 189
 190         vcpu_lock_init(vcpu);
 191         vcpu->hostcpu = NOCPU;
 192         vcpu->vcpuid = vcpu_id;
 193         vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
 194         vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
 195         vcpu->guestfpu = fpu_save_area_alloc();
 196         fpu_save_area_reset(vcpu->guestfpu);
 197         vcpu->stats = vmm_stat_alloc();
 198 }
 199
 200 struct vm_exit *
 201 vm_exitinfo(struct vm *vm, int cpuid)
 202 {
 203         struct vcpu *vcpu;
 204
 205         if (cpuid < 0 || cpuid >= VM_MAXCPU)
 206                 panic("vm_exitinfo: invalid cpuid %d", cpuid);
 207
 208         vcpu = &vm->vcpu[cpuid];
 209
 210         return (&vcpu->exitinfo);
 211 }
 212
 213 static void
 214 vmm_resume(void)
 215 {
 216         VMM_RESUME();
 217 }
 218
 219 static int
 220 vmm_init(void)
 221 {
 222         int error;
 223
 224         vmm_host_state_init();
 225         vmm_ipi_init();
 226
 227         error = vmm_mem_init();
 228         if (error)
 229                 return (error);
 230
 231         if (vmm_is_intel())
 232                 ops = &vmm_ops_intel;
 233         else if (vmm_is_amd())
 234                 ops = &vmm_ops_amd;
 235         else
 236                 return (ENXIO);
 237
 238         vmm_msr_init();
 239         vmm_resume_p = vmm_resume;
 240
 241         return (VMM_INIT());
 242 }
 243
 244 static int
 245 vmm_handler(module_t mod, int what, void *arg)
 246 {
 247         int error;
 248
 249         switch (what) {
 250         case MOD_LOAD:
 251                 vmmdev_init();
 252                 iommu_init();
 253                 error = vmm_init();
 254                 if (error == 0)
 255                         vmm_initialized = 1;
 256                 break;
 257         case MOD_UNLOAD:
 258                 error = vmmdev_cleanup();
 259                 if (error == 0) {
 260                         vmm_resume_p = NULL;
 261                         iommu_cleanup();
 262                         vmm_ipi_cleanup();
 263                         error = VMM_CLEANUP();
 264                         /*
 265                          * Something bad happened - prevent new
 266                          * VMs from being created
 267                          */
 268                         if (error)
 269                                 vmm_initialized = 0;
 270                 }
 271                 break;
 272         default:
 273                 error = 0;
 274                 break;
 275         }
 276         return (error);
 277 }
 278
 279 static moduledata_t vmm_kmod = {
 280         "vmm",
 281         vmm_handler,
 282         NULL
 283 };
 284
 285 /*
 286  * vmm initialization has the following dependencies:
 287  *
 288  * - iommu initialization must happen after the pci passthru driver has had
 289  *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
 290  *
 291  * - VT-x initialization requires smp_rendezvous() and therefore must happen
 292  *   after SMP is fully functional (after SI_SUB_SMP).
 293  */
 294 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
 295 MODULE_VERSION(vmm, 1);
 296
 297 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
 298
 299 int
 300 vm_create(const char *name, struct vm **retvm)
 301 {
 302         int i;
 303         struct vm *vm;
 304         struct vmspace *vmspace;
 305
 306         const int BSP = 0;
 307
 308         /*
 309          * If vmm.ko could not be successfully initialized then don't attempt
 310          * to create the virtual machine.
 311          */
 312         if (!vmm_initialized)
 313                 return (ENXIO);
 314
 315         if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
 316                 return (EINVAL);
 317
 318         vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
 319         if (vmspace == NULL)
 320                 return (ENOMEM);
 321
 322         vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
 323         strcpy(vm->name, name);
 324         vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
 325         vm->vioapic = vioapic_init(vm);
 326         vm->vhpet = vhpet_init(vm);
 327
 328         for (i = 0; i < VM_MAXCPU; i++) {
 329                 vcpu_init(vm, i);
 330                 guest_msrs_init(vm, i);
 331         }
 332
 333         vm_activate_cpu(vm, BSP);
 334         vm->vmspace = vmspace;
 335
 336         *retvm = vm;
 337         return (0);
 338 }
 339
 340 static void
 341 vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
 342 {
 343
 344         if (seg->object != NULL)
 345                 vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
 346
 347         bzero(seg, sizeof(*seg));
 348 }
 349
 350 void
 351 vm_destroy(struct vm *vm)
 352 {
 353         int i;
 354
 355         ppt_unassign_all(vm);
 356
 357         if (vm->iommu != NULL)
 358                 iommu_destroy_domain(vm->iommu);
 359
 360         vhpet_cleanup(vm->vhpet);
 361         vioapic_cleanup(vm->vioapic);
 362
 363         for (i = 0; i < vm->num_mem_segs; i++)
 364                 vm_free_mem_seg(vm, &vm->mem_segs[i]);
 365
 366         vm->num_mem_segs = 0;
 367
 368         for (i = 0; i < VM_MAXCPU; i++)
 369                 vcpu_cleanup(vm, i);
 370
 371         VMSPACE_FREE(vm->vmspace);
 372
 373         VMCLEANUP(vm->cookie);
 374
 375         free(vm, M_VM);
 376 }
 377
 378 const char *
 379 vm_name(struct vm *vm)
 380 {
 381         return (vm->name);
 382 }
 383
 384 int
 385 vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 386 {
 387         vm_object_t obj;
 388
 389         if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
 390                 return (ENOMEM);
 391         else
 392                 return (0);
 393 }
 394
 395 int
 396 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
 397 {
 398
 399         vmm_mmio_free(vm->vmspace, gpa, len);
 400         return (0);
 401 }
 402
 403 boolean_t
 404 vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
 405 {
 406         int i;
 407         vm_paddr_t gpabase, gpalimit;
 408
 409         for (i = 0; i < vm->num_mem_segs; i++) {
 410                 gpabase = vm->mem_segs[i].gpa;
 411                 gpalimit = gpabase + vm->mem_segs[i].len;
 412                 if (gpa >= gpabase && gpa < gpalimit)
 413                         return (TRUE);          /* 'gpa' is regular memory */
 414         }
 415
 416         if (ppt_is_mmio(vm, gpa))
 417                 return (TRUE);                  /* 'gpa' is pci passthru mmio */
 418
 419         return (FALSE);
 420 }
 421
 422 int
 423 vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
 424 {
 425         int available, allocated;
 426         struct mem_seg *seg;
 427         vm_object_t object;
 428         vm_paddr_t g;
 429
 430         if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
 431                 return (EINVAL);
 432
 433         available = allocated = 0;
 434         g = gpa;
 435         while (g < gpa + len) {
 436                 if (vm_mem_allocated(vm, g))
 437                         allocated++;
 438                 else
 439                         available++;
 440
 441                 g += PAGE_SIZE;
 442         }
 443
 444         /*
 445          * If there are some allocated and some available pages in the address
 446          * range then it is an error.
 447          */
 448         if (allocated && available)
 449                 return (EINVAL);
 450
 451         /*
 452          * If the entire address range being requested has already been
 453          * allocated then there isn't anything more to do.
 454          */
 455         if (allocated && available == 0)
 456                 return (0);
 457
 458         if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
 459                 return (E2BIG);
 460
 461         seg = &vm->mem_segs[vm->num_mem_segs];
 462
 463         if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
 464                 return (ENOMEM);
 465
 466         seg->gpa = gpa;
 467         seg->len = len;
 468         seg->object = object;
 469         seg->wired = FALSE;
 470
 471         vm->num_mem_segs++;
 472
 473         return (0);
 474 }
 475
 476 static void
 477 vm_gpa_unwire(struct vm *vm)
 478 {
 479         int i, rv;
 480         struct mem_seg *seg;
 481
 482         for (i = 0; i < vm->num_mem_segs; i++) {
 483                 seg = &vm->mem_segs[i];
 484                 if (!seg->wired)
 485                         continue;
 486
 487                 rv = vm_map_unwire(&vm->vmspace->vm_map,
 488                                    seg->gpa, seg->gpa + seg->len,
 489                                    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 490                 KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
 491                     "%#lx/%ld could not be unwired: %d",
 492                     vm_name(vm), seg->gpa, seg->len, rv));
 493
 494                 seg->wired = FALSE;
 495         }
 496 }
 497
 498 static int
 499 vm_gpa_wire(struct vm *vm)
 500 {
 501         int i, rv;
 502         struct mem_seg *seg;
 503
 504         for (i = 0; i < vm->num_mem_segs; i++) {
 505                 seg = &vm->mem_segs[i];
 506                 if (seg->wired)
 507                         continue;
 508
 509                 /* XXX rlimits? */
 510                 rv = vm_map_wire(&vm->vmspace->vm_map,
 511                                  seg->gpa, seg->gpa + seg->len,
 512                                  VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 513                 if (rv != KERN_SUCCESS)
 514                         break;
 515
 516                 seg->wired = TRUE;
 517         }
 518
 519         if (i < vm->num_mem_segs) {
 520                 /*
 521                  * Undo the wiring before returning an error.
 522                  */
 523                 vm_gpa_unwire(vm);
 524                 return (EAGAIN);
 525         }
 526
 527         return (0);
 528 }
 529
 530 static void
 531 vm_iommu_modify(struct vm *vm, boolean_t map)
 532 {
 533         int i, sz;
 534         vm_paddr_t gpa, hpa;
 535         struct mem_seg *seg;
 536         void *vp, *cookie, *host_domain;
 537
 538         sz = PAGE_SIZE;
 539         host_domain = iommu_host_domain();
 540
 541         for (i = 0; i < vm->num_mem_segs; i++) {
 542                 seg = &vm->mem_segs[i];
 543                 KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
 544                     vm_name(vm), seg->gpa, seg->len));
 545
 546                 gpa = seg->gpa;
 547                 while (gpa < seg->gpa + seg->len) {
 548                         vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
 549                                          &cookie);
 550                         KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
 551                             vm_name(vm), gpa));
 552
 553                         vm_gpa_release(cookie);
 554
 555                         hpa = DMAP_TO_PHYS((uintptr_t)vp);
 556                         if (map) {
 557                                 iommu_create_mapping(vm->iommu, gpa, hpa, sz);
 558                                 iommu_remove_mapping(host_domain, hpa, sz);
 559                         } else {
 560                                 iommu_remove_mapping(vm->iommu, gpa, sz);
 561                                 iommu_create_mapping(host_domain, hpa, hpa, sz);
 562                         }
 563
 564                         gpa += PAGE_SIZE;
 565                 }
 566         }
 567
 568         /*
 569          * Invalidate the cached translations associated with the domain
 570          * from which pages were removed.
 571          */
 572         if (map)
 573                 iommu_invalidate_tlb(host_domain);
 574         else
 575                 iommu_invalidate_tlb(vm->iommu);
 576 }
 577
 578 #define vm_iommu_unmap(vm)      vm_iommu_modify((vm), FALSE)
 579 #define vm_iommu_map(vm)        vm_iommu_modify((vm), TRUE)
 580
 581 int
 582 vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
 583 {
 584         int error;
 585
 586         error = ppt_unassign_device(vm, bus, slot, func);
 587         if (error)
 588                 return (error);
 589
 590         if (ppt_num_devices(vm) == 0) {
 591                 vm_iommu_unmap(vm);
 592                 vm_gpa_unwire(vm);
 593         }
 594         return (0);
 595 }
 596
 597 int
 598 vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
 599 {
 600         int error;
 601         vm_paddr_t maxaddr;
 602
 603         /*
 604          * Virtual machines with pci passthru devices get special treatment:
 605          * - the guest physical memory is wired
 606          * - the iommu is programmed to do the 'gpa' to 'hpa' translation
 607          *
 608          * We need to do this before the first pci passthru device is attached.
 609          */
 610         if (ppt_num_devices(vm) == 0) {
 611                 KASSERT(vm->iommu == NULL,
 612                     ("vm_assign_pptdev: iommu must be NULL"));
 613                 maxaddr = vmm_mem_maxaddr();
 614                 vm->iommu = iommu_create_domain(maxaddr);
 615
 616                 error = vm_gpa_wire(vm);
 617                 if (error)
 618                         return (error);
 619
 620                 vm_iommu_map(vm);
 621         }
 622
 623         error = ppt_assign_device(vm, bus, slot, func);
 624         return (error);
 625 }
 626
 627 void *
 628 vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
 629             void **cookie)
 630 {
 631         int count, pageoff;
 632         vm_page_t m;
 633
 634         pageoff = gpa & PAGE_MASK;
 635         if (len > PAGE_SIZE - pageoff)
 636                 panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
 637
 638         count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
 639             trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
 640
 641         if (count == 1) {
 642                 *cookie = m;
 643                 return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
 644         } else {
 645                 *cookie = NULL;
 646                 return (NULL);
 647         }
 648 }
 649
 650 void
 651 vm_gpa_release(void *cookie)
 652 {
 653         vm_page_t m = cookie;
 654
 655         vm_page_lock(m);
 656         vm_page_unhold(m);
 657         vm_page_unlock(m);
 658 }
 659
 660 int
 661 vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
 662                   struct vm_memory_segment *seg)
 663 {
 664         int i;
 665
 666         for (i = 0; i < vm->num_mem_segs; i++) {
 667                 if (gpabase == vm->mem_segs[i].gpa) {
 668                         seg->gpa = vm->mem_segs[i].gpa;
 669                         seg->len = vm->mem_segs[i].len;
 670                         seg->wired = vm->mem_segs[i].wired;
 671                         return (0);
 672                 }
 673         }
 674         return (-1);
 675 }
 676
 677 int
 678 vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
 679               vm_offset_t *offset, struct vm_object **object)
 680 {
 681         int i;
 682         size_t seg_len;
 683         vm_paddr_t seg_gpa;
 684         vm_object_t seg_obj;
 685
 686         for (i = 0; i < vm->num_mem_segs; i++) {
 687                 if ((seg_obj = vm->mem_segs[i].object) == NULL)
 688                         continue;
 689
 690                 seg_gpa = vm->mem_segs[i].gpa;
 691                 seg_len = vm->mem_segs[i].len;
 692
 693                 if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
 694                         *offset = gpa - seg_gpa;
 695                         *object = seg_obj;
 696                         vm_object_reference(seg_obj);
 697                         return (0);
 698                 }
 699         }
 700
 701         return (EINVAL);
 702 }
 703
 704 int
 705 vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
 706 {
 707
 708         if (vcpu < 0 || vcpu >= VM_MAXCPU)
 709                 return (EINVAL);
 710
 711         if (reg >= VM_REG_LAST)
 712                 return (EINVAL);
 713
 714         return (VMGETREG(vm->cookie, vcpu, reg, retval));
 715 }
 716
 717 int
 718 vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
 719 {
 720
 721         if (vcpu < 0 || vcpu >= VM_MAXCPU)
 722                 return (EINVAL);
 723
 724         if (reg >= VM_REG_LAST)
 725                 return (EINVAL);
 726
 727         return (VMSETREG(vm->cookie, vcpu, reg, val));
 728 }
 729
 730 static boolean_t
 731 is_descriptor_table(int reg)
 732 {
 733
 734         switch (reg) {
 735         case VM_REG_GUEST_IDTR:
 736         case VM_REG_GUEST_GDTR:
 737                 return (TRUE);
 738         default:
 739                 return (FALSE);
 740         }
 741 }
 742
 743 static boolean_t
 744 is_segment_register(int reg)
 745 {
 746
 747         switch (reg) {
 748         case VM_REG_GUEST_ES:
 749         case VM_REG_GUEST_CS:
 750         case VM_REG_GUEST_SS:
 751         case VM_REG_GUEST_DS:
 752         case VM_REG_GUEST_FS:
 753         case VM_REG_GUEST_GS:
 754         case VM_REG_GUEST_TR:
 755         case VM_REG_GUEST_LDTR:
 756                 return (TRUE);
 757         default:
 758                 return (FALSE);
 759         }
 760 }
 761
 762 int
 763 vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
 764                 struct seg_desc *desc)
 765 {
 766
 767         if (vcpu < 0 || vcpu >= VM_MAXCPU)
 768                 return (EINVAL);
 769
 770         if (!is_segment_register(reg) && !is_descriptor_table(reg))
 771                 return (EINVAL);
 772
 773         return (VMGETDESC(vm->cookie, vcpu, reg, desc));
 774 }
 775
 776 int
 777 vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 778                 struct seg_desc *desc)
 779 {
 780         if (vcpu < 0 || vcpu >= VM_MAXCPU)
 781                 return (EINVAL);
 782
 783         if (!is_segment_register(reg) && !is_descriptor_table(reg))
 784                 return (EINVAL);
 785
 786         return (VMSETDESC(vm->cookie, vcpu, reg, desc));
 787 }
 788
 789 static void
 790 restore_guest_fpustate(struct vcpu *vcpu)
 791 {
 792
 793         /* flush host state to the pcb */
 794         fpuexit(curthread);
 795
 796         /* restore guest FPU state */
 797         fpu_stop_emulating();
 798         fpurestore(vcpu->guestfpu);
 799
 800         /*
 801          * The FPU is now "dirty" with the guest's state so turn on emulation
 802          * to trap any access to the FPU by the host.
 803          */
 804         fpu_start_emulating();
 805 }
 806
 807 static void
 808 save_guest_fpustate(struct vcpu *vcpu)
 809 {
 810
 811         if ((rcr0() & CR0_TS) == 0)
 812                 panic("fpu emulation not enabled in host!");
 813
 814         /* save guest FPU state */
 815         fpu_stop_emulating();
 816         fpusave(vcpu->guestfpu);
 817         fpu_start_emulating();
 818 }
 819
 820 static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
 821
 822 static int
 823 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
 824     bool from_idle)
 825 {
 826         int error;
 827
 828         vcpu_assert_locked(vcpu);
 829
 830         /*
 831          * State transitions from the vmmdev_ioctl() must always begin from
 832          * the VCPU_IDLE state. This guarantees that there is only a single
 833          * ioctl() operating on a vcpu at any point.
 834          */
 835         if (from_idle) {
 836                 while (vcpu->state != VCPU_IDLE)
 837                         msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
 838         } else {
 839                 KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
 840                     "vcpu idle state"));
 841         }
 842
 843         /*
 844          * The following state transitions are allowed:
 845          * IDLE -> FROZEN -> IDLE
 846          * FROZEN -> RUNNING -> FROZEN
 847          * FROZEN -> SLEEPING -> FROZEN
 848          */
 849         switch (vcpu->state) {
 850         case VCPU_IDLE:
 851         case VCPU_RUNNING:
 852         case VCPU_SLEEPING:
 853                 error = (newstate != VCPU_FROZEN);
 854                 break;
 855         case VCPU_FROZEN:
 856                 error = (newstate == VCPU_FROZEN);
 857                 break;
 858         default:
 859                 error = 1;
 860                 break;
 861         }
 862
 863         if (error)
 864                 return (EBUSY);
 865
 866         vcpu->state = newstate;
 867         if (newstate == VCPU_IDLE)
 868                 wakeup(&vcpu->state);
 869
 870         return (0);
 871 }
 872
 873 static void
 874 vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
 875 {
 876         int error;
 877
 878         if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
 879                 panic("Error %d setting state to %d\n", error, newstate);
 880 }
 881
 882 static void
 883 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
 884 {
 885         int error;
 886
 887         if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
 888                 panic("Error %d setting state to %d", error, newstate);
 889 }
 890
 891 /*
 892  * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
 893  */
 894 static int
 895 vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 896 {
 897         struct vm_exit *vmexit;
 898         struct vcpu *vcpu;
 899         int t, timo;
 900
 901         vcpu = &vm->vcpu[vcpuid];
 902
 903         vcpu_lock(vcpu);
 904
 905         /*
 906          * Do a final check for pending NMI or interrupts before
 907          * really putting this thread to sleep.
 908          *
 909          * These interrupts could have happened any time after we
 910          * returned from VMRUN() and before we grabbed the vcpu lock.
 911          */
 912         if (!vm_nmi_pending(vm, vcpuid) &&
 913             (intr_disabled || vlapic_pending_intr(vcpu->vlapic) < 0)) {
 914                 t = ticks;
 915                 vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
 916                 if (vlapic_enabled(vcpu->vlapic)) {
 917                         /*
 918                          * XXX msleep_spin() is not interruptible so use the
 919                          * 'timo' to put an upper bound on the sleep time.
 920                          */
 921                         timo = hz;
 922                         msleep_spin(vcpu, &vcpu->mtx, "vmidle", timo);
 923                 } else {
 924                         /*
 925                          * Spindown the vcpu if the apic is disabled and it
 926                          * had entered the halted state.
 927                          */
 928                         *retu = true;
 929                         vmexit = vm_exitinfo(vm, vcpuid);
 930                         vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
 931                         VCPU_CTR0(vm, vcpuid, "spinning down cpu");
 932                 }
 933                 vcpu_require_state_locked(vcpu, VCPU_FROZEN);
 934                 vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
 935         }
 936         vcpu_unlock(vcpu);
 937
 938         return (0);
 939 }
 940
 941 static int
 942 vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
 943 {
 944         int rv, ftype;
 945         struct vm_map *map;
 946         struct vcpu *vcpu;
 947         struct vm_exit *vme;
 948
 949         vcpu = &vm->vcpu[vcpuid];
 950         vme = &vcpu->exitinfo;
 951
 952         ftype = vme->u.paging.fault_type;
 953         KASSERT(ftype == VM_PROT_READ ||
 954             ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
 955             ("vm_handle_paging: invalid fault_type %d", ftype));
 956
 957         if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
 958                 rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
 959                     vme->u.paging.gpa, ftype);
 960                 if (rv == 0)
 961                         goto done;
 962         }
 963
 964         map = &vm->vmspace->vm_map;
 965         rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
 966
 967         VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
 968             "ftype = %d", rv, vme->u.paging.gpa, ftype);
 969
 970         if (rv != KERN_SUCCESS)
 971                 return (EFAULT);
 972 done:
 973         /* restart execution at the faulting instruction */
 974         vme->inst_length = 0;
 975
 976         return (0);
 977 }
 978
 979 static int
 980 vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
 981 {
 982         struct vie *vie;
 983         struct vcpu *vcpu;
 984         struct vm_exit *vme;
 985         int error, inst_length;
 986         uint64_t rip, gla, gpa, cr3;
 987         mem_region_read_t mread;
 988         mem_region_write_t mwrite;
 989
 990         vcpu = &vm->vcpu[vcpuid];
 991         vme = &vcpu->exitinfo;
 992
 993         rip = vme->rip;
 994         inst_length = vme->inst_length;
 995
 996         gla = vme->u.inst_emul.gla;
 997         gpa = vme->u.inst_emul.gpa;
 998         cr3 = vme->u.inst_emul.cr3;
 999         vie = &vme->u.inst_emul.vie;
1000
1001         vie_init(vie);
1002
1003         /* Fetch, decode and emulate the faulting instruction */
1004         if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3, vie) != 0)
1005                 return (EFAULT);
1006
1007         if (vmm_decode_instruction(vm, vcpuid, gla, vie) != 0)
1008                 return (EFAULT);
1009
1010         /* return to userland unless this is an in-kernel emulated device */
1011         if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
1012                 mread = lapic_mmio_read;
1013                 mwrite = lapic_mmio_write;
1014         } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
1015                 mread = vioapic_mmio_read;
1016                 mwrite = vioapic_mmio_write;
1017         } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
1018                 mread = vhpet_mmio_read;
1019                 mwrite = vhpet_mmio_write;
1020         } else {
1021                 *retu = true;
1022                 return (0);
1023         }
1024
1025         error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
1026             retu);
1027
1028         return (error);
1029 }
1030
1031 int
1032 vm_run(struct vm *vm, struct vm_run *vmrun)
1033 {
1034         int error, vcpuid;
1035         struct vcpu *vcpu;
1036         struct pcb *pcb;
1037         uint64_t tscval, rip;
1038         struct vm_exit *vme;
1039         bool retu, intr_disabled;
1040         pmap_t pmap;
1041
1042         vcpuid = vmrun->cpuid;
1043
1044         if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1045                 return (EINVAL);
1046
1047         pmap = vmspace_pmap(vm->vmspace);
1048         vcpu = &vm->vcpu[vcpuid];
1049         vme = &vcpu->exitinfo;
1050         rip = vmrun->rip;
1051 restart:
1052         critical_enter();
1053
1054         KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
1055             ("vm_run: absurd pm_active"));
1056
1057         tscval = rdtsc();
1058
1059         pcb = PCPU_GET(curpcb);
1060         set_pcb_flags(pcb, PCB_FULL_IRET);
1061
1062         restore_guest_msrs(vm, vcpuid);
1063         restore_guest_fpustate(vcpu);
1064
1065         vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
1066         vcpu->hostcpu = curcpu;
1067         error = VMRUN(vm->cookie, vcpuid, rip, pmap);
1068         vcpu->hostcpu = NOCPU;
1069         vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
1070
1071         save_guest_fpustate(vcpu);
1072         restore_host_msrs(vm, vcpuid);
1073
1074         vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
1075
1076         critical_exit();
1077
1078         if (error == 0) {
1079                 retu = false;
1080                 switch (vme->exitcode) {
1081                 case VM_EXITCODE_HLT:
1082                         intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
1083                         error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
1084                         break;
1085                 case VM_EXITCODE_PAGING:
1086                         error = vm_handle_paging(vm, vcpuid, &retu);
1087                         break;
1088                 case VM_EXITCODE_INST_EMUL:
1089                         error = vm_handle_inst_emul(vm, vcpuid, &retu);
1090                         break;
1091                 default:
1092                         retu = true;    /* handled in userland */
1093                         break;
1094                 }
1095         }
1096
1097         if (error == 0 && retu == false) {
1098                 rip = vme->rip + vme->inst_length;
1099                 goto restart;
1100         }
1101
1102         /* copy the exit information */
1103         bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
1104         return (error);
1105 }
1106
1107 int
1108 vm_inject_event(struct vm *vm, int vcpuid, int type,
1109                 int vector, uint32_t code, int code_valid)
1110 {
1111         if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1112                 return (EINVAL);
1113
1114         if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0)
1115                 return (EINVAL);
1116
1117         if (vector < 0 || vector > 255)
1118                 return (EINVAL);
1119
1120         return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid));
1121 }
1122
1123 static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
1124
1125 int
1126 vm_inject_nmi(struct vm *vm, int vcpuid)
1127 {
1128         struct vcpu *vcpu;
1129
1130         if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1131                 return (EINVAL);
1132
1133         vcpu = &vm->vcpu[vcpuid];
1134
1135         vcpu->nmi_pending = 1;
1136         vcpu_notify_event(vm, vcpuid, false);
1137         return (0);
1138 }
1139
1140 int
1141 vm_nmi_pending(struct vm *vm, int vcpuid)
1142 {
1143         struct vcpu *vcpu;
1144
1145         if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1146                 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1147
1148         vcpu = &vm->vcpu[vcpuid];
1149
1150         return (vcpu->nmi_pending);
1151 }
1152
1153 void
1154 vm_nmi_clear(struct vm *vm, int vcpuid)
1155 {
1156         struct vcpu *vcpu;
1157
1158         if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1159                 panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
1160
1161         vcpu = &vm->vcpu[vcpuid];
1162
1163         if (vcpu->nmi_pending == 0)
1164                 panic("vm_nmi_clear: inconsistent nmi_pending state");
1165
1166         vcpu->nmi_pending = 0;
1167         vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
1168 }
1169
1170 int
1171 vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
1172 {
1173         if (vcpu < 0 || vcpu >= VM_MAXCPU)
1174                 return (EINVAL);
1175
1176         if (type < 0 || type >= VM_CAP_MAX)
1177                 return (EINVAL);
1178
1179         return (VMGETCAP(vm->cookie, vcpu, type, retval));
1180 }
1181
1182 int
1183 vm_set_capability(struct vm *vm, int vcpu, int type, int val)
1184 {
1185         if (vcpu < 0 || vcpu >= VM_MAXCPU)
1186                 return (EINVAL);
1187
1188         if (type < 0 || type >= VM_CAP_MAX)
1189                 return (EINVAL);
1190
1191         return (VMSETCAP(vm->cookie, vcpu, type, val));
1192 }
1193
1194 uint64_t *
1195 vm_guest_msrs(struct vm *vm, int cpu)
1196 {
1197         return (vm->vcpu[cpu].guest_msrs);
1198 }
1199
1200 struct vlapic *
1201 vm_lapic(struct vm *vm, int cpu)
1202 {
1203         return (vm->vcpu[cpu].vlapic);
1204 }
1205
1206 struct vioapic *
1207 vm_ioapic(struct vm *vm)
1208 {
1209
1210         return (vm->vioapic);
1211 }
1212
1213 struct vhpet *
1214 vm_hpet(struct vm *vm)
1215 {
1216
1217         return (vm->vhpet);
1218 }
1219
1220 boolean_t
1221 vmm_is_pptdev(int bus, int slot, int func)
1222 {
1223         int found, i, n;
1224         int b, s, f;
1225         char *val, *cp, *cp2;
1226
1227         /*
1228          * XXX
1229          * The length of an environment variable is limited to 128 bytes which
1230          * puts an upper limit on the number of passthru devices that may be
1231          * specified using a single environment variable.
1232          *
1233          * Work around this by scanning multiple environment variable
1234          * names instead of a single one - yuck!
1235          */
1236         const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
1237
1238         /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
1239         found = 0;
1240         for (i = 0; names[i] != NULL && !found; i++) {
1241                 cp = val = getenv(names[i]);
1242                 while (cp != NULL && *cp != '\0') {
1243                         if ((cp2 = strchr(cp, ' ')) != NULL)
1244                                 *cp2 = '\0';
1245
1246                         n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
1247                         if (n == 3 && bus == b && slot == s && func == f) {
1248                                 found = 1;
1249                                 break;
1250                         }
1251
1252                         if (cp2 != NULL)
1253                                 *cp2++ = ' ';
1254
1255                         cp = cp2;
1256                 }
1257                 freeenv(val);
1258         }
1259         return (found);
1260 }
1261
1262 void *
1263 vm_iommu_domain(struct vm *vm)
1264 {
1265
1266         return (vm->iommu);
1267 }
1268
1269 int
1270 vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
1271     bool from_idle)
1272 {
1273         int error;
1274         struct vcpu *vcpu;
1275
1276         if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1277                 panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
1278
1279         vcpu = &vm->vcpu[vcpuid];
1280
1281         vcpu_lock(vcpu);
1282         error = vcpu_set_state_locked(vcpu, newstate, from_idle);
1283         vcpu_unlock(vcpu);
1284
1285         return (error);
1286 }
1287
1288 enum vcpu_state
1289 vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
1290 {
1291         struct vcpu *vcpu;
1292         enum vcpu_state state;
1293
1294         if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1295                 panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
1296
1297         vcpu = &vm->vcpu[vcpuid];
1298
1299         vcpu_lock(vcpu);
1300         state = vcpu->state;
1301         if (hostcpu != NULL)
1302                 *hostcpu = vcpu->hostcpu;
1303         vcpu_unlock(vcpu);
1304
1305         return (state);
1306 }
1307
1308 void
1309 vm_activate_cpu(struct vm *vm, int vcpuid)
1310 {
1311
1312         if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
1313                 CPU_SET(vcpuid, &vm->active_cpus);
1314 }
1315
1316 cpuset_t
1317 vm_active_cpus(struct vm *vm)
1318 {
1319
1320         return (vm->active_cpus);
1321 }
1322
1323 void *
1324 vcpu_stats(struct vm *vm, int vcpuid)
1325 {
1326
1327         return (vm->vcpu[vcpuid].stats);
1328 }
1329
1330 int
1331 vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
1332 {
1333         if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1334                 return (EINVAL);
1335
1336         *state = vm->vcpu[vcpuid].x2apic_state;
1337
1338         return (0);
1339 }
1340
1341 int
1342 vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1343 {
1344         if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
1345                 return (EINVAL);
1346
1347         if (state >= X2APIC_STATE_LAST)
1348                 return (EINVAL);
1349
1350         vm->vcpu[vcpuid].x2apic_state = state;
1351
1352         vlapic_set_x2apic_state(vm, vcpuid, state);
1353
1354         return (0);
1355 }
1356
1357 /*
1358  * This function is called to ensure that a vcpu "sees" a pending event
1359  * as soon as possible:
1360  * - If the vcpu thread is sleeping then it is woken up.
1361  * - If the vcpu is running on a different host_cpu then an IPI will be directed
1362  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
1363  */
1364 void
1365 vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
1366 {
1367         int hostcpu;
1368         struct vcpu *vcpu;
1369
1370         vcpu = &vm->vcpu[vcpuid];
1371
1372         vcpu_lock(vcpu);
1373         hostcpu = vcpu->hostcpu;
1374         if (hostcpu == NOCPU) {
1375                 if (vcpu->state == VCPU_SLEEPING)
1376                         wakeup_one(vcpu);
1377         } else {
1378                 if (vcpu->state != VCPU_RUNNING)
1379                         panic("invalid vcpu state %d", vcpu->state);
1380                 if (hostcpu != curcpu) {
1381                         if (lapic_intr)
1382                                 vlapic_post_intr(vcpu->vlapic, hostcpu);
1383                         else
1384                                 ipi_cpu(hostcpu, vmm_ipinum);
1385                 }
1386         }
1387         vcpu_unlock(vcpu);
1388 }
1389
1390 struct vmspace *
1391 vm_get_vmspace(struct vm *vm)
1392 {
1393
1394         return (vm->vmspace);
1395 }
1396
1397 int
1398 vm_apicid2vcpuid(struct vm *vm, int apicid)
1399 {
1400         /*
1401          * XXX apic id is assumed to be numerically identical to vcpu id
1402          */
1403         return (apicid);
1404 }