lib/libvmmapi/vmmapi.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3  *
   4  * Copyright (c) 2011 NetApp, Inc.
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  *
  28  * $FreeBSD$
  29  */
  30
  31 #include <sys/cdefs.h>
  32 __FBSDID("$FreeBSD$");
  33
  34 #include <sys/param.h>
  35 #include <sys/sysctl.h>
  36 #include <sys/ioctl.h>
  37 #include <sys/linker.h>
  38 #include <sys/mman.h>
  39 #include <sys/module.h>
  40 #include <sys/_iovec.h>
  41 #include <sys/cpuset.h>
  42
  43 #include <x86/segments.h>
  44 #include <machine/specialreg.h>
  45
  46 #include <errno.h>
  47 #include <stdio.h>
  48 #include <stdlib.h>
  49 #include <assert.h>
  50 #include <string.h>
  51 #include <fcntl.h>
  52 #include <unistd.h>
  53
  54 #include <libutil.h>
  55
  56 #include <machine/vmm.h>
  57 #include <machine/vmm_dev.h>
  58
  59 #include "vmmapi.h"
  60
  61 #define MB      (1024 * 1024UL)
  62 #define GB      (1024 * 1024 * 1024UL)
  63
  64 /*
  65  * Size of the guard region before and after the virtual address space
  66  * mapping the guest physical memory. This must be a multiple of the
  67  * superpage size for performance reasons.
  68  */
  69 #define VM_MMAP_GUARD_SIZE      (4 * MB)
  70
  71 #define PROT_RW         (PROT_READ | PROT_WRITE)
  72 #define PROT_ALL        (PROT_READ | PROT_WRITE | PROT_EXEC)
  73
  74 struct vmctx {
  75         int     fd;
  76         uint32_t lowmem_limit;
  77         int     memflags;
  78         size_t  lowmem;
  79         size_t  highmem;
  80         char    *baseaddr;
  81         char    *name;
  82 };
  83
  84 #define CREATE(x)  sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
  85 #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
  86
  87 static int
  88 vm_device_open(const char *name)
  89 {
  90         int fd, len;
  91         char *vmfile;
  92
  93         len = strlen("/dev/vmm/") + strlen(name) + 1;
  94         vmfile = malloc(len);
  95         assert(vmfile != NULL);
  96         snprintf(vmfile, len, "/dev/vmm/%s", name);
  97
  98         /* Open the device file */
  99         fd = open(vmfile, O_RDWR, 0);
 100
 101         free(vmfile);
 102         return (fd);
 103 }
 104
 105 int
 106 vm_create(const char *name)
 107 {
 108         /* Try to load vmm(4) module before creating a guest. */
 109         if (modfind("vmm") < 0)
 110                 kldload("vmm");
 111         return (CREATE((char *)name));
 112 }
 113
 114 struct vmctx *
 115 vm_open(const char *name)
 116 {
 117         struct vmctx *vm;
 118
 119         vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
 120         assert(vm != NULL);
 121
 122         vm->fd = -1;
 123         vm->memflags = 0;
 124         vm->lowmem_limit = 3 * GB;
 125         vm->name = (char *)(vm + 1);
 126         strcpy(vm->name, name);
 127
 128         if ((vm->fd = vm_device_open(vm->name)) < 0)
 129                 goto err;
 130
 131         return (vm);
 132 err:
 133         vm_destroy(vm);
 134         return (NULL);
 135 }
 136
 137 void
 138 vm_destroy(struct vmctx *vm)
 139 {
 140         assert(vm != NULL);
 141
 142         if (vm->fd >= 0)
 143                 close(vm->fd);
 144         DESTROY(vm->name);
 145
 146         free(vm);
 147 }
 148
 149 int
 150 vm_parse_memsize(const char *optarg, size_t *ret_memsize)
 151 {
 152         char *endptr;
 153         size_t optval;
 154         int error;
 155
 156         optval = strtoul(optarg, &endptr, 0);
 157         if (*optarg != '\0' && *endptr == '\0') {
 158                 /*
 159                  * For the sake of backward compatibility if the memory size
 160                  * specified on the command line is less than a megabyte then
 161                  * it is interpreted as being in units of MB.
 162                  */
 163                 if (optval < MB)
 164                         optval *= MB;
 165                 *ret_memsize = optval;
 166                 error = 0;
 167         } else
 168                 error = expand_number(optarg, ret_memsize);
 169
 170         return (error);
 171 }
 172
 173 uint32_t
 174 vm_get_lowmem_limit(struct vmctx *ctx)
 175 {
 176
 177         return (ctx->lowmem_limit);
 178 }
 179
 180 void
 181 vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit)
 182 {
 183
 184         ctx->lowmem_limit = limit;
 185 }
 186
 187 void
 188 vm_set_memflags(struct vmctx *ctx, int flags)
 189 {
 190
 191         ctx->memflags = flags;
 192 }
 193
 194 int
 195 vm_get_memflags(struct vmctx *ctx)
 196 {
 197
 198         return (ctx->memflags);
 199 }
 200
 201 /*
 202  * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len).
 203  */
 204 int
 205 vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off,
 206     size_t len, int prot)
 207 {
 208         struct vm_memmap memmap;
 209         int error, flags;
 210
 211         memmap.gpa = gpa;
 212         memmap.segid = segid;
 213         memmap.segoff = off;
 214         memmap.len = len;
 215         memmap.prot = prot;
 216         memmap.flags = 0;
 217
 218         if (ctx->memflags & VM_MEM_F_WIRED)
 219                 memmap.flags |= VM_MEMMAP_F_WIRED;
 220
 221         /*
 222          * If this mapping already exists then don't create it again. This
 223          * is the common case for SYSMEM mappings created by bhyveload(8).
 224          */
 225         error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags);
 226         if (error == 0 && gpa == memmap.gpa) {
 227                 if (segid != memmap.segid || off != memmap.segoff ||
 228                     prot != memmap.prot || flags != memmap.flags) {
 229                         errno = EEXIST;
 230                         return (-1);
 231                 } else {
 232                         return (0);
 233                 }
 234         }
 235
 236         error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap);
 237         return (error);
 238 }
 239
 240 int
 241 vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
 242     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
 243 {
 244         struct vm_memmap memmap;
 245         int error;
 246
 247         bzero(&memmap, sizeof(struct vm_memmap));
 248         memmap.gpa = *gpa;
 249         error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap);
 250         if (error == 0) {
 251                 *gpa = memmap.gpa;
 252                 *segid = memmap.segid;
 253                 *segoff = memmap.segoff;
 254                 *len = memmap.len;
 255                 *prot = memmap.prot;
 256                 *flags = memmap.flags;
 257         }
 258         return (error);
 259 }
 260
 261 /*
 262  * Return 0 if the segments are identical and non-zero otherwise.
 263  *
 264  * This is slightly complicated by the fact that only device memory segments
 265  * are named.
 266  */
 267 static int
 268 cmpseg(size_t len, const char *str, size_t len2, const char *str2)
 269 {
 270
 271         if (len == len2) {
 272                 if ((!str && !str2) || (str && str2 && !strcmp(str, str2)))
 273                         return (0);
 274         }
 275         return (-1);
 276 }
 277
 278 static int
 279 vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
 280 {
 281         struct vm_memseg memseg;
 282         size_t n;
 283         int error;
 284
 285         /*
 286          * If the memory segment has already been created then just return.
 287          * This is the usual case for the SYSMEM segment created by userspace
 288          * loaders like bhyveload(8).
 289          */
 290         error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name,
 291             sizeof(memseg.name));
 292         if (error)
 293                 return (error);
 294
 295         if (memseg.len != 0) {
 296                 if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) {
 297                         errno = EINVAL;
 298                         return (-1);
 299                 } else {
 300                         return (0);
 301                 }
 302         }
 303
 304         bzero(&memseg, sizeof(struct vm_memseg));
 305         memseg.segid = segid;
 306         memseg.len = len;
 307         if (name != NULL) {
 308                 n = strlcpy(memseg.name, name, sizeof(memseg.name));
 309                 if (n >= sizeof(memseg.name)) {
 310                         errno = ENAMETOOLONG;
 311                         return (-1);
 312                 }
 313         }
 314
 315         error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg);
 316         return (error);
 317 }
 318
 319 int
 320 vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf,
 321     size_t bufsize)
 322 {
 323         struct vm_memseg memseg;
 324         size_t n;
 325         int error;
 326
 327         memseg.segid = segid;
 328         error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg);
 329         if (error == 0) {
 330                 *lenp = memseg.len;
 331                 n = strlcpy(namebuf, memseg.name, bufsize);
 332                 if (n >= bufsize) {
 333                         errno = ENAMETOOLONG;
 334                         error = -1;
 335                 }
 336         }
 337         return (error);
 338 }
 339
 340 static int
 341 setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
 342 {
 343         char *ptr;
 344         int error, flags;
 345
 346         /* Map 'len' bytes starting at 'gpa' in the guest address space */
 347         error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
 348         if (error)
 349                 return (error);
 350
 351         flags = MAP_SHARED | MAP_FIXED;
 352         if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
 353                 flags |= MAP_NOCORE;
 354
 355         /* mmap into the process address space on the host */
 356         ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa);
 357         if (ptr == MAP_FAILED)
 358                 return (-1);
 359
 360         return (0);
 361 }
 362
 363 int
 364 vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
 365 {
 366         size_t objsize, len;
 367         vm_paddr_t gpa;
 368         char *baseaddr, *ptr;
 369         int error;
 370
 371         assert(vms == VM_MMAP_ALL);
 372
 373         /*
 374          * If 'memsize' cannot fit entirely in the 'lowmem' segment then
 375          * create another 'highmem' segment above 4GB for the remainder.
 376          */
 377         if (memsize > ctx->lowmem_limit) {
 378                 ctx->lowmem = ctx->lowmem_limit;
 379                 ctx->highmem = memsize - ctx->lowmem_limit;
 380                 objsize = 4*GB + ctx->highmem;
 381         } else {
 382                 ctx->lowmem = memsize;
 383                 ctx->highmem = 0;
 384                 objsize = ctx->lowmem;
 385         }
 386
 387         error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
 388         if (error)
 389                 return (error);
 390
 391         /*
 392          * Stake out a contiguous region covering the guest physical memory
 393          * and the adjoining guard regions.
 394          */
 395         len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
 396         ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0);
 397         if (ptr == MAP_FAILED)
 398                 return (-1);
 399
 400         baseaddr = ptr + VM_MMAP_GUARD_SIZE;
 401         if (ctx->highmem > 0) {
 402                 gpa = 4*GB;
 403                 len = ctx->highmem;
 404                 error = setup_memory_segment(ctx, gpa, len, baseaddr);
 405                 if (error)
 406                         return (error);
 407         }
 408
 409         if (ctx->lowmem > 0) {
 410                 gpa = 0;
 411                 len = ctx->lowmem;
 412                 error = setup_memory_segment(ctx, gpa, len, baseaddr);
 413                 if (error)
 414                         return (error);
 415         }
 416
 417         ctx->baseaddr = baseaddr;
 418
 419         return (0);
 420 }
 421
 422 /*
 423  * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in
 424  * the lowmem or highmem regions.
 425  *
 426  * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region.
 427  * The instruction emulation code depends on this behavior.
 428  */
 429 void *
 430 vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
 431 {
 432
 433         if (ctx->lowmem > 0) {
 434                 if (gaddr < ctx->lowmem && len <= ctx->lowmem &&
 435                     gaddr + len <= ctx->lowmem)
 436                         return (ctx->baseaddr + gaddr);
 437         }
 438
 439         if (ctx->highmem > 0) {
 440                 if (gaddr >= 4*GB) {
 441                         if (gaddr < 4*GB + ctx->highmem &&
 442                             len <= ctx->highmem &&
 443                             gaddr + len <= 4*GB + ctx->highmem)
 444                                 return (ctx->baseaddr + gaddr);
 445                 }
 446         }
 447
 448         return (NULL);
 449 }
 450
 451 size_t
 452 vm_get_lowmem_size(struct vmctx *ctx)
 453 {
 454
 455         return (ctx->lowmem);
 456 }
 457
 458 size_t
 459 vm_get_highmem_size(struct vmctx *ctx)
 460 {
 461
 462         return (ctx->highmem);
 463 }
 464
 465 void *
 466 vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len)
 467 {
 468         char pathname[MAXPATHLEN];
 469         size_t len2;
 470         char *base, *ptr;
 471         int fd, error, flags;
 472
 473         fd = -1;
 474         ptr = MAP_FAILED;
 475         if (name == NULL || strlen(name) == 0) {
 476                 errno = EINVAL;
 477                 goto done;
 478         }
 479
 480         error = vm_alloc_memseg(ctx, segid, len, name);
 481         if (error)
 482                 goto done;
 483
 484         strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname));
 485         strlcat(pathname, ctx->name, sizeof(pathname));
 486         strlcat(pathname, ".", sizeof(pathname));
 487         strlcat(pathname, name, sizeof(pathname));
 488
 489         fd = open(pathname, O_RDWR);
 490         if (fd < 0)
 491                 goto done;
 492
 493         /*
 494          * Stake out a contiguous region covering the device memory and the
 495          * adjoining guard regions.
 496          */
 497         len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE;
 498         base = mmap(NULL, len2, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1,
 499             0);
 500         if (base == MAP_FAILED)
 501                 goto done;
 502
 503         flags = MAP_SHARED | MAP_FIXED;
 504         if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
 505                 flags |= MAP_NOCORE;
 506
 507         /* mmap the devmem region in the host address space */
 508         ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0);
 509 done:
 510         if (fd >= 0)
 511                 close(fd);
 512         return (ptr);
 513 }
 514
 515 int
 516 vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
 517             uint64_t base, uint32_t limit, uint32_t access)
 518 {
 519         int error;
 520         struct vm_seg_desc vmsegdesc;
 521
 522         bzero(&vmsegdesc, sizeof(vmsegdesc));
 523         vmsegdesc.cpuid = vcpu;
 524         vmsegdesc.regnum = reg;
 525         vmsegdesc.desc.base = base;
 526         vmsegdesc.desc.limit = limit;
 527         vmsegdesc.desc.access = access;
 528
 529         error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 530         return (error);
 531 }
 532
 533 int
 534 vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
 535             uint64_t *base, uint32_t *limit, uint32_t *access)
 536 {
 537         int error;
 538         struct vm_seg_desc vmsegdesc;
 539
 540         bzero(&vmsegdesc, sizeof(vmsegdesc));
 541         vmsegdesc.cpuid = vcpu;
 542         vmsegdesc.regnum = reg;
 543
 544         error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 545         if (error == 0) {
 546                 *base = vmsegdesc.desc.base;
 547                 *limit = vmsegdesc.desc.limit;
 548                 *access = vmsegdesc.desc.access;
 549         }
 550         return (error);
 551 }
 552
 553 int
 554 vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc)
 555 {
 556         int error;
 557
 558         error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit,
 559             &seg_desc->access);
 560         return (error);
 561 }
 562
 563 int
 564 vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
 565 {
 566         int error;
 567         struct vm_register vmreg;
 568
 569         bzero(&vmreg, sizeof(vmreg));
 570         vmreg.cpuid = vcpu;
 571         vmreg.regnum = reg;
 572         vmreg.regval = val;
 573
 574         error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
 575         return (error);
 576 }
 577
 578 int
 579 vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
 580 {
 581         int error;
 582         struct vm_register vmreg;
 583
 584         bzero(&vmreg, sizeof(vmreg));
 585         vmreg.cpuid = vcpu;
 586         vmreg.regnum = reg;
 587
 588         error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
 589         *ret_val = vmreg.regval;
 590         return (error);
 591 }
 592
 593 int
 594 vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
 595     const int *regnums, uint64_t *regvals)
 596 {
 597         int error;
 598         struct vm_register_set vmregset;
 599
 600         bzero(&vmregset, sizeof(vmregset));
 601         vmregset.cpuid = vcpu;
 602         vmregset.count = count;
 603         vmregset.regnums = regnums;
 604         vmregset.regvals = regvals;
 605
 606         error = ioctl(ctx->fd, VM_SET_REGISTER_SET, &vmregset);
 607         return (error);
 608 }
 609
 610 int
 611 vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
 612     const int *regnums, uint64_t *regvals)
 613 {
 614         int error;
 615         struct vm_register_set vmregset;
 616
 617         bzero(&vmregset, sizeof(vmregset));
 618         vmregset.cpuid = vcpu;
 619         vmregset.count = count;
 620         vmregset.regnums = regnums;
 621         vmregset.regvals = regvals;
 622
 623         error = ioctl(ctx->fd, VM_GET_REGISTER_SET, &vmregset);
 624         return (error);
 625 }
 626
 627 int
 628 vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit)
 629 {
 630         int error;
 631         struct vm_run vmrun;
 632
 633         bzero(&vmrun, sizeof(vmrun));
 634         vmrun.cpuid = vcpu;
 635
 636         error = ioctl(ctx->fd, VM_RUN, &vmrun);
 637         bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
 638         return (error);
 639 }
 640
 641 int
 642 vm_suspend(struct vmctx *ctx, enum vm_suspend_how how)
 643 {
 644         struct vm_suspend vmsuspend;
 645
 646         bzero(&vmsuspend, sizeof(vmsuspend));
 647         vmsuspend.how = how;
 648         return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend));
 649 }
 650
 651 int
 652 vm_reinit(struct vmctx *ctx)
 653 {
 654
 655         return (ioctl(ctx->fd, VM_REINIT, 0));
 656 }
 657
 658 int
 659 vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid,
 660     uint32_t errcode, int restart_instruction)
 661 {
 662         struct vm_exception exc;
 663
 664         exc.cpuid = vcpu;
 665         exc.vector = vector;
 666         exc.error_code = errcode;
 667         exc.error_code_valid = errcode_valid;
 668         exc.restart_instruction = restart_instruction;
 669
 670         return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc));
 671 }
 672
 673 int
 674 vm_apicid2vcpu(struct vmctx *ctx, int apicid)
 675 {
 676         /*
 677          * The apic id associated with the 'vcpu' has the same numerical value
 678          * as the 'vcpu' itself.
 679          */
 680         return (apicid);
 681 }
 682
 683 int
 684 vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector)
 685 {
 686         struct vm_lapic_irq vmirq;
 687
 688         bzero(&vmirq, sizeof(vmirq));
 689         vmirq.cpuid = vcpu;
 690         vmirq.vector = vector;
 691
 692         return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq));
 693 }
 694
 695 int
 696 vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector)
 697 {
 698         struct vm_lapic_irq vmirq;
 699
 700         bzero(&vmirq, sizeof(vmirq));
 701         vmirq.cpuid = vcpu;
 702         vmirq.vector = vector;
 703
 704         return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq));
 705 }
 706
 707 int
 708 vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg)
 709 {
 710         struct vm_lapic_msi vmmsi;
 711
 712         bzero(&vmmsi, sizeof(vmmsi));
 713         vmmsi.addr = addr;
 714         vmmsi.msg = msg;
 715
 716         return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi));
 717 }
 718
 719 int
 720 vm_ioapic_assert_irq(struct vmctx *ctx, int irq)
 721 {
 722         struct vm_ioapic_irq ioapic_irq;
 723
 724         bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 725         ioapic_irq.irq = irq;
 726
 727         return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq));
 728 }
 729
 730 int
 731 vm_ioapic_deassert_irq(struct vmctx *ctx, int irq)
 732 {
 733         struct vm_ioapic_irq ioapic_irq;
 734
 735         bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 736         ioapic_irq.irq = irq;
 737
 738         return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq));
 739 }
 740
 741 int
 742 vm_ioapic_pulse_irq(struct vmctx *ctx, int irq)
 743 {
 744         struct vm_ioapic_irq ioapic_irq;
 745
 746         bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 747         ioapic_irq.irq = irq;
 748
 749         return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq));
 750 }
 751
 752 int
 753 vm_ioapic_pincount(struct vmctx *ctx, int *pincount)
 754 {
 755
 756         return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount));
 757 }
 758
 759 int
 760 vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 761 {
 762         struct vm_isa_irq isa_irq;
 763
 764         bzero(&isa_irq, sizeof(struct vm_isa_irq));
 765         isa_irq.atpic_irq = atpic_irq;
 766         isa_irq.ioapic_irq = ioapic_irq;
 767
 768         return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq));
 769 }
 770
 771 int
 772 vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 773 {
 774         struct vm_isa_irq isa_irq;
 775
 776         bzero(&isa_irq, sizeof(struct vm_isa_irq));
 777         isa_irq.atpic_irq = atpic_irq;
 778         isa_irq.ioapic_irq = ioapic_irq;
 779
 780         return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq));
 781 }
 782
 783 int
 784 vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 785 {
 786         struct vm_isa_irq isa_irq;
 787
 788         bzero(&isa_irq, sizeof(struct vm_isa_irq));
 789         isa_irq.atpic_irq = atpic_irq;
 790         isa_irq.ioapic_irq = ioapic_irq;
 791
 792         return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq));
 793 }
 794
 795 int
 796 vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq,
 797     enum vm_intr_trigger trigger)
 798 {
 799         struct vm_isa_irq_trigger isa_irq_trigger;
 800
 801         bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger));
 802         isa_irq_trigger.atpic_irq = atpic_irq;
 803         isa_irq_trigger.trigger = trigger;
 804
 805         return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger));
 806 }
 807
 808 int
 809 vm_inject_nmi(struct vmctx *ctx, int vcpu)
 810 {
 811         struct vm_nmi vmnmi;
 812
 813         bzero(&vmnmi, sizeof(vmnmi));
 814         vmnmi.cpuid = vcpu;
 815
 816         return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi));
 817 }
 818
 819 static const char *capstrmap[] = {
 820         [VM_CAP_HALT_EXIT]  = "hlt_exit",
 821         [VM_CAP_MTRAP_EXIT] = "mtrap_exit",
 822         [VM_CAP_PAUSE_EXIT] = "pause_exit",
 823         [VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest",
 824         [VM_CAP_ENABLE_INVPCID] = "enable_invpcid",
 825         [VM_CAP_BPT_EXIT] = "bpt_exit",
 826 };
 827
 828 int
 829 vm_capability_name2type(const char *capname)
 830 {
 831         int i;
 832
 833         for (i = 0; i < nitems(capstrmap); i++) {
 834                 if (strcmp(capstrmap[i], capname) == 0)
 835                         return (i);
 836         }
 837
 838         return (-1);
 839 }
 840
 841 const char *
 842 vm_capability_type2name(int type)
 843 {
 844         if (type < nitems(capstrmap))
 845                 return (capstrmap[type]);
 846
 847         return (NULL);
 848 }
 849
 850 int
 851 vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
 852                   int *retval)
 853 {
 854         int error;
 855         struct vm_capability vmcap;
 856
 857         bzero(&vmcap, sizeof(vmcap));
 858         vmcap.cpuid = vcpu;
 859         vmcap.captype = cap;
 860
 861         error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
 862         *retval = vmcap.capval;
 863         return (error);
 864 }
 865
 866 int
 867 vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
 868 {
 869         struct vm_capability vmcap;
 870
 871         bzero(&vmcap, sizeof(vmcap));
 872         vmcap.cpuid = vcpu;
 873         vmcap.captype = cap;
 874         vmcap.capval = val;
 875
 876         return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
 877 }
 878
 879 int
 880 vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
 881 {
 882         struct vm_pptdev pptdev;
 883
 884         bzero(&pptdev, sizeof(pptdev));
 885         pptdev.bus = bus;
 886         pptdev.slot = slot;
 887         pptdev.func = func;
 888
 889         return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
 890 }
 891
 892 int
 893 vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
 894 {
 895         struct vm_pptdev pptdev;
 896
 897         bzero(&pptdev, sizeof(pptdev));
 898         pptdev.bus = bus;
 899         pptdev.slot = slot;
 900         pptdev.func = func;
 901
 902         return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
 903 }
 904
 905 int
 906 vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
 907                    vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 908 {
 909         struct vm_pptdev_mmio pptmmio;
 910
 911         bzero(&pptmmio, sizeof(pptmmio));
 912         pptmmio.bus = bus;
 913         pptmmio.slot = slot;
 914         pptmmio.func = func;
 915         pptmmio.gpa = gpa;
 916         pptmmio.len = len;
 917         pptmmio.hpa = hpa;
 918
 919         return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
 920 }
 921
 922 int
 923 vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
 924     uint64_t addr, uint64_t msg, int numvec)
 925 {
 926         struct vm_pptdev_msi pptmsi;
 927
 928         bzero(&pptmsi, sizeof(pptmsi));
 929         pptmsi.vcpu = vcpu;
 930         pptmsi.bus = bus;
 931         pptmsi.slot = slot;
 932         pptmsi.func = func;
 933         pptmsi.msg = msg;
 934         pptmsi.addr = addr;
 935         pptmsi.numvec = numvec;
 936
 937         return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
 938 }
 939
 940 int
 941 vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
 942     int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
 943 {
 944         struct vm_pptdev_msix pptmsix;
 945
 946         bzero(&pptmsix, sizeof(pptmsix));
 947         pptmsix.vcpu = vcpu;
 948         pptmsix.bus = bus;
 949         pptmsix.slot = slot;
 950         pptmsix.func = func;
 951         pptmsix.idx = idx;
 952         pptmsix.msg = msg;
 953         pptmsix.addr = addr;
 954         pptmsix.vector_control = vector_control;
 955
 956         return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
 957 }
 958
 959 uint64_t *
 960 vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
 961              int *ret_entries)
 962 {
 963         int error;
 964
 965         static struct vm_stats vmstats;
 966
 967         vmstats.cpuid = vcpu;
 968
 969         error = ioctl(ctx->fd, VM_STATS, &vmstats);
 970         if (error == 0) {
 971                 if (ret_entries)
 972                         *ret_entries = vmstats.num_entries;
 973                 if (ret_tv)
 974                         *ret_tv = vmstats.tv;
 975                 return (vmstats.statbuf);
 976         } else
 977                 return (NULL);
 978 }
 979
 980 const char *
 981 vm_get_stat_desc(struct vmctx *ctx, int index)
 982 {
 983         static struct vm_stat_desc statdesc;
 984
 985         statdesc.index = index;
 986         if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
 987                 return (statdesc.desc);
 988         else
 989                 return (NULL);
 990 }
 991
 992 int
 993 vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state)
 994 {
 995         int error;
 996         struct vm_x2apic x2apic;
 997
 998         bzero(&x2apic, sizeof(x2apic));
 999         x2apic.cpuid = vcpu;
1000
1001         error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic);
1002         *state = x2apic.state;
1003         return (error);
1004 }
1005
1006 int
1007 vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state)
1008 {
1009         int error;
1010         struct vm_x2apic x2apic;
1011
1012         bzero(&x2apic, sizeof(x2apic));
1013         x2apic.cpuid = vcpu;
1014         x2apic.state = state;
1015
1016         error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic);
1017
1018         return (error);
1019 }
1020
1021 /*
1022  * From Intel Vol 3a:
1023  * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
1024  */
1025 int
1026 vcpu_reset(struct vmctx *vmctx, int vcpu)
1027 {
1028         int error;
1029         uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx;
1030         uint32_t desc_access, desc_limit;
1031         uint16_t sel;
1032
1033         zero = 0;
1034
1035         rflags = 0x2;
1036         error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
1037         if (error)
1038                 goto done;
1039
1040         rip = 0xfff0;
1041         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
1042                 goto done;
1043
1044         cr0 = CR0_NE;
1045         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
1046                 goto done;
1047
1048         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0)
1049                 goto done;
1050
1051         cr4 = 0;
1052         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
1053                 goto done;
1054
1055         /*
1056          * CS: present, r/w, accessed, 16-bit, byte granularity, usable
1057          */
1058         desc_base = 0xffff0000;
1059         desc_limit = 0xffff;
1060         desc_access = 0x0093;
1061         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
1062                             desc_base, desc_limit, desc_access);
1063         if (error)
1064                 goto done;
1065
1066         sel = 0xf000;
1067         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0)
1068                 goto done;
1069
1070         /*
1071          * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity
1072          */
1073         desc_base = 0;
1074         desc_limit = 0xffff;
1075         desc_access = 0x0093;
1076         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
1077                             desc_base, desc_limit, desc_access);
1078         if (error)
1079                 goto done;
1080
1081         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
1082                             desc_base, desc_limit, desc_access);
1083         if (error)
1084                 goto done;
1085
1086         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
1087                             desc_base, desc_limit, desc_access);
1088         if (error)
1089                 goto done;
1090
1091         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
1092                             desc_base, desc_limit, desc_access);
1093         if (error)
1094                 goto done;
1095
1096         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
1097                             desc_base, desc_limit, desc_access);
1098         if (error)
1099                 goto done;
1100
1101         sel = 0;
1102         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0)
1103                 goto done;
1104         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0)
1105                 goto done;
1106         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0)
1107                 goto done;
1108         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0)
1109                 goto done;
1110         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0)
1111                 goto done;
1112
1113         /* General purpose registers */
1114         rdx = 0xf00;
1115         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0)
1116                 goto done;
1117         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0)
1118                 goto done;
1119         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0)
1120                 goto done;
1121         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0)
1122                 goto done;
1123         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0)
1124                 goto done;
1125         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0)
1126                 goto done;
1127         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0)
1128                 goto done;
1129         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0)
1130                 goto done;
1131
1132         /* GDTR, IDTR */
1133         desc_base = 0;
1134         desc_limit = 0xffff;
1135         desc_access = 0;
1136         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
1137                             desc_base, desc_limit, desc_access);
1138         if (error != 0)
1139                 goto done;
1140
1141         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR,
1142                             desc_base, desc_limit, desc_access);
1143         if (error != 0)
1144                 goto done;
1145
1146         /* TR */
1147         desc_base = 0;
1148         desc_limit = 0xffff;
1149         desc_access = 0x0000008b;
1150         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
1151         if (error)
1152                 goto done;
1153
1154         sel = 0;
1155         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0)
1156                 goto done;
1157
1158         /* LDTR */
1159         desc_base = 0;
1160         desc_limit = 0xffff;
1161         desc_access = 0x00000082;
1162         error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base,
1163                             desc_limit, desc_access);
1164         if (error)
1165                 goto done;
1166
1167         sel = 0;
1168         if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
1169                 goto done;
1170
1171         /* XXX cr2, debug registers */
1172
1173         error = 0;
1174 done:
1175         return (error);
1176 }
1177
1178 int
1179 vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num)
1180 {
1181         int error, i;
1182         struct vm_gpa_pte gpapte;
1183
1184         bzero(&gpapte, sizeof(gpapte));
1185         gpapte.gpa = gpa;
1186
1187         error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte);
1188
1189         if (error == 0) {
1190                 *num = gpapte.ptenum;
1191                 for (i = 0; i < gpapte.ptenum; i++)
1192                         pte[i] = gpapte.pte[i];
1193         }
1194
1195         return (error);
1196 }
1197
1198 int
1199 vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities)
1200 {
1201         int error;
1202         struct vm_hpet_cap cap;
1203
1204         bzero(&cap, sizeof(struct vm_hpet_cap));
1205         error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap);
1206         if (capabilities != NULL)
1207                 *capabilities = cap.capabilities;
1208         return (error);
1209 }
1210
1211 int
1212 vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
1213     uint64_t gla, int prot, uint64_t *gpa, int *fault)
1214 {
1215         struct vm_gla2gpa gg;
1216         int error;
1217
1218         bzero(&gg, sizeof(struct vm_gla2gpa));
1219         gg.vcpuid = vcpu;
1220         gg.prot = prot;
1221         gg.gla = gla;
1222         gg.paging = *paging;
1223
1224         error = ioctl(ctx->fd, VM_GLA2GPA, &gg);
1225         if (error == 0) {
1226                 *fault = gg.fault;
1227                 *gpa = gg.gpa;
1228         }
1229         return (error);
1230 }
1231
1232 int
1233 vm_gla2gpa_nofault(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
1234     uint64_t gla, int prot, uint64_t *gpa, int *fault)
1235 {
1236         struct vm_gla2gpa gg;
1237         int error;
1238
1239         bzero(&gg, sizeof(struct vm_gla2gpa));
1240         gg.vcpuid = vcpu;
1241         gg.prot = prot;
1242         gg.gla = gla;
1243         gg.paging = *paging;
1244
1245         error = ioctl(ctx->fd, VM_GLA2GPA_NOFAULT, &gg);
1246         if (error == 0) {
1247                 *fault = gg.fault;
1248                 *gpa = gg.gpa;
1249         }
1250         return (error);
1251 }
1252
1253 #ifndef min
1254 #define min(a,b)        (((a) < (b)) ? (a) : (b))
1255 #endif
1256
1257 int
1258 vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
1259     uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt,
1260     int *fault)
1261 {
1262         void *va;
1263         uint64_t gpa;
1264         int error, i, n, off;
1265
1266         for (i = 0; i < iovcnt; i++) {
1267                 iov[i].iov_base = 0;
1268                 iov[i].iov_len = 0;
1269         }
1270
1271         while (len) {
1272                 assert(iovcnt > 0);
1273                 error = vm_gla2gpa(ctx, vcpu, paging, gla, prot, &gpa, fault);
1274                 if (error || *fault)
1275                         return (error);
1276
1277                 off = gpa & PAGE_MASK;
1278                 n = min(len, PAGE_SIZE - off);
1279
1280                 va = vm_map_gpa(ctx, gpa, n);
1281                 if (va == NULL)
1282                         return (EFAULT);
1283
1284                 iov->iov_base = va;
1285                 iov->iov_len = n;
1286                 iov++;
1287                 iovcnt--;
1288
1289                 gla += n;
1290                 len -= n;
1291         }
1292         return (0);
1293 }
1294
1295 void
1296 vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov, int iovcnt)
1297 {
1298
1299         return;
1300 }
1301
1302 void
1303 vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len)
1304 {
1305         const char *src;
1306         char *dst;
1307         size_t n;
1308
1309         dst = vp;
1310         while (len) {
1311                 assert(iov->iov_len);
1312                 n = min(len, iov->iov_len);
1313                 src = iov->iov_base;
1314                 bcopy(src, dst, n);
1315
1316                 iov++;
1317                 dst += n;
1318                 len -= n;
1319         }
1320 }
1321
1322 void
1323 vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov,
1324     size_t len)
1325 {
1326         const char *src;
1327         char *dst;
1328         size_t n;
1329
1330         src = vp;
1331         while (len) {
1332                 assert(iov->iov_len);
1333                 n = min(len, iov->iov_len);
1334                 dst = iov->iov_base;
1335                 bcopy(src, dst, n);
1336
1337                 iov++;
1338                 src += n;
1339                 len -= n;
1340         }
1341 }
1342
1343 static int
1344 vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus)
1345 {
1346         struct vm_cpuset vm_cpuset;
1347         int error;
1348
1349         bzero(&vm_cpuset, sizeof(struct vm_cpuset));
1350         vm_cpuset.which = which;
1351         vm_cpuset.cpusetsize = sizeof(cpuset_t);
1352         vm_cpuset.cpus = cpus;
1353
1354         error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset);
1355         return (error);
1356 }
1357
1358 int
1359 vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus)
1360 {
1361
1362         return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus));
1363 }
1364
1365 int
1366 vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus)
1367 {
1368
1369         return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus));
1370 }
1371
1372 int
1373 vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus)
1374 {
1375
1376         return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus));
1377 }
1378
1379 int
1380 vm_activate_cpu(struct vmctx *ctx, int vcpu)
1381 {
1382         struct vm_activate_cpu ac;
1383         int error;
1384
1385         bzero(&ac, sizeof(struct vm_activate_cpu));
1386         ac.vcpuid = vcpu;
1387         error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac);
1388         return (error);
1389 }
1390
1391 int
1392 vm_suspend_cpu(struct vmctx *ctx, int vcpu)
1393 {
1394         struct vm_activate_cpu ac;
1395         int error;
1396
1397         bzero(&ac, sizeof(struct vm_activate_cpu));
1398         ac.vcpuid = vcpu;
1399         error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac);
1400         return (error);
1401 }
1402
1403 int
1404 vm_resume_cpu(struct vmctx *ctx, int vcpu)
1405 {
1406         struct vm_activate_cpu ac;
1407         int error;
1408
1409         bzero(&ac, sizeof(struct vm_activate_cpu));
1410         ac.vcpuid = vcpu;
1411         error = ioctl(ctx->fd, VM_RESUME_CPU, &ac);
1412         return (error);
1413 }
1414
1415 int
1416 vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2)
1417 {
1418         struct vm_intinfo vmii;
1419         int error;
1420
1421         bzero(&vmii, sizeof(struct vm_intinfo));
1422         vmii.vcpuid = vcpu;
1423         error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii);
1424         if (error == 0) {
1425                 *info1 = vmii.info1;
1426                 *info2 = vmii.info2;
1427         }
1428         return (error);
1429 }
1430
1431 int
1432 vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1)
1433 {
1434         struct vm_intinfo vmii;
1435         int error;
1436
1437         bzero(&vmii, sizeof(struct vm_intinfo));
1438         vmii.vcpuid = vcpu;
1439         vmii.info1 = info1;
1440         error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii);
1441         return (error);
1442 }
1443
1444 int
1445 vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value)
1446 {
1447         struct vm_rtc_data rtcdata;
1448         int error;
1449
1450         bzero(&rtcdata, sizeof(struct vm_rtc_data));
1451         rtcdata.offset = offset;
1452         rtcdata.value = value;
1453         error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata);
1454         return (error);
1455 }
1456
1457 int
1458 vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval)
1459 {
1460         struct vm_rtc_data rtcdata;
1461         int error;
1462
1463         bzero(&rtcdata, sizeof(struct vm_rtc_data));
1464         rtcdata.offset = offset;
1465         error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata);
1466         if (error == 0)
1467                 *retval = rtcdata.value;
1468         return (error);
1469 }
1470
1471 int
1472 vm_rtc_settime(struct vmctx *ctx, time_t secs)
1473 {
1474         struct vm_rtc_time rtctime;
1475         int error;
1476
1477         bzero(&rtctime, sizeof(struct vm_rtc_time));
1478         rtctime.secs = secs;
1479         error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime);
1480         return (error);
1481 }
1482
1483 int
1484 vm_rtc_gettime(struct vmctx *ctx, time_t *secs)
1485 {
1486         struct vm_rtc_time rtctime;
1487         int error;
1488
1489         bzero(&rtctime, sizeof(struct vm_rtc_time));
1490         error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime);
1491         if (error == 0)
1492                 *secs = rtctime.secs;
1493         return (error);
1494 }
1495
1496 int
1497 vm_restart_instruction(void *arg, int vcpu)
1498 {
1499         struct vmctx *ctx = arg;
1500
1501         return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
1502 }
1503
1504 int
1505 vm_set_topology(struct vmctx *ctx,
1506     uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus)
1507 {
1508         struct vm_cpu_topology topology;
1509
1510         bzero(&topology, sizeof (struct vm_cpu_topology));
1511         topology.sockets = sockets;
1512         topology.cores = cores;
1513         topology.threads = threads;
1514         topology.maxcpus = maxcpus;
1515         return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology));
1516 }
1517
1518 int
1519 vm_get_topology(struct vmctx *ctx,
1520     uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus)
1521 {
1522         struct vm_cpu_topology topology;
1523         int error;
1524
1525         bzero(&topology, sizeof (struct vm_cpu_topology));
1526         error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology);
1527         if (error == 0) {
1528                 *sockets = topology.sockets;
1529                 *cores = topology.cores;
1530                 *threads = topology.threads;
1531                 *maxcpus = topology.maxcpus;
1532         }
1533         return (error);
1534 }
1535
1536 int
1537 vm_get_device_fd(struct vmctx *ctx)
1538 {
1539
1540         return (ctx->fd);
1541 }
1542
1543 const cap_ioctl_t *
1544 vm_get_ioctls(size_t *len)
1545 {
1546         cap_ioctl_t *cmds;
1547         /* keep in sync with machine/vmm_dev.h */
1548         static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT,
1549             VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG,
1550             VM_MMAP_GETNEXT, VM_SET_REGISTER, VM_GET_REGISTER,
1551             VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR,
1552             VM_SET_REGISTER_SET, VM_GET_REGISTER_SET,
1553             VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ,
1554             VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ,
1555             VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ,
1556             VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER,
1557             VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV,
1558             VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI,
1559             VM_PPTDEV_MSIX, VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
1560             VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
1561             VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
1562             VM_GLA2GPA_NOFAULT,
1563             VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
1564             VM_SET_INTINFO, VM_GET_INTINFO,
1565             VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
1566             VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY };
1567
1568         if (len == NULL) {
1569                 cmds = malloc(sizeof(vm_ioctl_cmds));
1570                 if (cmds == NULL)
1571                         return (NULL);
1572                 bcopy(vm_ioctl_cmds, cmds, sizeof(vm_ioctl_cmds));
1573                 return (cmds);
1574         }
1575
1576         *len = nitems(vm_ioctl_cmds);
1577         return (NULL);
1578 }