From 191ebce7390b7677accbf180066cfb0ecf6a1cb2 Mon Sep 17 00:00:00 2001 From: rgrimes Date: Thu, 23 May 2019 21:23:18 +0000 Subject: [PATCH] MFC: r332298,333712,334199,334216,334219 bhyve cpu topology Approved by: re (gjb), bde/phk (mentor, implicit) --- lib/libvmmapi/vmmapi.c | 34 ++++++++++- lib/libvmmapi/vmmapi.h | 6 ++ sys/amd64/include/vmm.h | 4 ++ sys/amd64/include/vmm_dev.h | 15 +++++ sys/amd64/vmm/vmm.c | 42 +++++++++++++ sys/amd64/vmm/vmm_dev.c | 12 ++++ sys/amd64/vmm/x86.c | 37 ++++++----- usr.sbin/bhyve/bhyve.8 | 38 ++++++++++-- usr.sbin/bhyve/bhyverun.c | 115 ++++++++++++++++++++++++++++++++--- usr.sbin/bhyvectl/bhyvectl.c | 13 +++- 10 files changed, 285 insertions(+), 31 deletions(-) diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 4c26642e0b0..fd1b13ac293 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -1416,6 +1416,38 @@ vm_restart_instruction(void *arg, int vcpu) return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu)); } +int +vm_set_topology(struct vmctx *ctx, + uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus) +{ + struct vm_cpu_topology topology; + + bzero(&topology, sizeof (struct vm_cpu_topology)); + topology.sockets = sockets; + topology.cores = cores; + topology.threads = threads; + topology.maxcpus = maxcpus; + return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology)); +} + +int +vm_get_topology(struct vmctx *ctx, + uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus) +{ + struct vm_cpu_topology topology; + int error; + + bzero(&topology, sizeof (struct vm_cpu_topology)); + error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology); + if (error == 0) { + *sockets = topology.sockets; + *cores = topology.cores; + *threads = topology.threads; + *maxcpus = topology.maxcpus; + } + return (error); +} + int vm_get_device_fd(struct vmctx *ctx) { @@ -1443,7 +1475,7 @@ vm_get_ioctls(size_t *len) VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA, VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SET_INTINFO, VM_GET_INTINFO, VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME, - VM_RESTART_INSTRUCTION }; + VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY }; if (len == NULL) { cmds = malloc(sizeof(vm_ioctl_cmds)); diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index df3a81b5aad..85603be9bc0 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -209,6 +209,12 @@ int vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus); int vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus); int vm_activate_cpu(struct vmctx *ctx, int vcpu); +/* CPU topology */ +int vm_set_topology(struct vmctx *ctx, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus); +int vm_get_topology(struct vmctx *ctx, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus); + /* * FreeBSD specific APIs */ diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 739a40fab02..97df4d40198 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -184,6 +184,10 @@ int vm_create(const char *name, struct vm **retvm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); const char *vm_name(struct vm *vm); +void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus); +int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus); /* * APIs that modify the guest memory map require all vcpus to be frozen. diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index 1af75a3c065..30160e5066f 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -216,6 +216,13 @@ struct vm_rtc_data { uint8_t value; }; +struct vm_cpu_topology { + uint16_t sockets; + uint16_t cores; + uint16_t threads; + uint16_t maxcpus; +}; + enum { /* general routines */ IOCNUM_ABIVERS = 0, @@ -271,6 +278,10 @@ enum { IOCNUM_GET_X2APIC_STATE = 61, IOCNUM_GET_HPET_CAPABILITIES = 62, + /* CPU Topology */ + IOCNUM_SET_TOPOLOGY = 63, + IOCNUM_GET_TOPOLOGY = 64, + /* legacy interrupt injection */ IOCNUM_ISA_ASSERT_IRQ = 80, IOCNUM_ISA_DEASSERT_IRQ = 81, @@ -360,6 +371,10 @@ enum { _IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic) #define VM_GET_HPET_CAPABILITIES \ _IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap) +#define VM_SET_TOPOLOGY \ + _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology) +#define VM_GET_TOPOLOGY \ + _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology) #define VM_GET_GPA_PMAP \ _IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte) #define VM_GLA2GPA \ diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 5eda96ce5fd..0e96824e1bf 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -163,6 +163,11 @@ struct vm { struct vmspace *vmspace; /* (o) guest's address space */ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ + /* The following describe the vm cpu topology */ + uint16_t sockets; /* (o) num of sockets */ + uint16_t cores; /* (o) num of cores/socket */ + uint16_t threads; /* (o) num of threads/core */ + uint16_t maxcpus; /* (o) max pluggable cpus */ }; static int vmm_initialized; @@ -423,6 +428,12 @@ vm_init(struct vm *vm, bool create) vcpu_init(vm, i, create); } +/* + * The default CPU topology is a single thread per package. + */ +u_int cores_per_package = 1; +u_int threads_per_core = 1; + int vm_create(const char *name, struct vm **retvm) { @@ -448,12 +459,43 @@ vm_create(const char *name, struct vm **retvm) vm->vmspace = vmspace; mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); + vm->sockets = 1; + vm->cores = cores_per_package; /* XXX backwards compatibility */ + vm->threads = threads_per_core; /* XXX backwards compatibility */ + vm->maxcpus = 0; /* XXX not implemented */ + vm_init(vm, true); *retvm = vm; return (0); } +void +vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus) +{ + *sockets = vm->sockets; + *cores = vm->cores; + *threads = vm->threads; + *maxcpus = vm->maxcpus; +} + +int +vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus) +{ + if (maxcpus != 0) + return (EINVAL); /* XXX remove when supported */ + if ((sockets * cores * threads) > VM_MAXCPU) + return (EINVAL); + /* XXX need to check sockets * cores * threads == vCPU, how? */ + vm->sockets = sockets; + vm->cores = cores; + vm->threads = threads; + vm->maxcpus = maxcpus; + return(0); +} + static void vm_cleanup(struct vm *vm, bool destroy) { diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index ff1963a162c..9b263cd5cfa 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -314,6 +314,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vm_rtc_time *rtctime; struct vm_rtc_data *rtcdata; struct vm_memmap *mm; + struct vm_cpu_topology *topology; sc = vmmdev_lookup2(cdev); if (sc == NULL) @@ -641,6 +642,17 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_RESTART_INSTRUCTION: error = vm_restart_instruction(sc->vm, vcpu); break; + case VM_SET_TOPOLOGY: + topology = (struct vm_cpu_topology *)data; + error = vm_set_topology(sc->vm, topology->sockets, + topology->cores, topology->threads, topology->maxcpus); + break; + case VM_GET_TOPOLOGY: + topology = (struct vm_cpu_topology *)data; + vm_get_topology(sc->vm, &topology->sockets, &topology->cores, + &topology->threads, &topology->maxcpus); + error = 0; + break; default: error = ENOTTY; break; diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c index 61d99d502ac..ba1c56eb3b3 100644 --- a/sys/amd64/vmm/x86.c +++ b/sys/amd64/vmm/x86.c @@ -58,16 +58,15 @@ static uint64_t bhyve_xcpuids; SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0, "Number of times an unknown cpuid leaf was accessed"); -/* - * The default CPU topology is a single thread per package. - */ -static u_int threads_per_core = 1; +#if __FreeBSD_version < 1200060 /* Remove after 11 EOL helps MFCing */ +extern u_int threads_per_core; SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN, &threads_per_core, 0, NULL); -static u_int cores_per_package = 1; +extern u_int cores_per_package; SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN, &cores_per_package, 0, NULL); +#endif static int cpuid_leaf_b = 1; SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN, @@ -93,6 +92,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, int error, enable_invpcid, level, width, x2apic_id; unsigned int func, regs[4], logical_cpus; enum x2apic_state x2apic_state; + uint16_t cores, maxcpus, sockets, threads; VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", *eax, *ecx); @@ -140,11 +140,11 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, * * However this matches the logical cpus as * advertised by leaf 0x1 and will work even - * if the 'threads_per_core' tunable is set - * incorrectly on an AMD host. + * if threads is set incorrectly on an AMD host. */ - logical_cpus = threads_per_core * - cores_per_package; + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + logical_cpus = threads * cores; regs[2] = logical_cpus - 1; } break; @@ -303,7 +303,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, */ regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR); - logical_cpus = threads_per_core * cores_per_package; + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + logical_cpus = threads * cores; regs[1] &= ~CPUID_HTT_CORES; regs[1] |= (logical_cpus & 0xff) << 16; regs[3] |= CPUID_HTT; @@ -313,8 +315,10 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, cpuid_count(*eax, *ecx, regs); if (regs[0] || regs[1] || regs[2] || regs[3]) { + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); regs[0] &= 0x3ff; - regs[0] |= (cores_per_package - 1) << 26; + regs[0] |= (cores - 1) << 26; /* * Cache topology: * - L1 and L2 are shared only by the logical @@ -322,10 +326,10 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, * - L3 and above are shared by all logical * processors in the package. */ - logical_cpus = threads_per_core; + logical_cpus = threads; level = (regs[0] >> 5) & 0x7; if (level >= 3) - logical_cpus *= cores_per_package; + logical_cpus *= cores; regs[0] |= (logical_cpus - 1) << 14; } break; @@ -387,16 +391,17 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, /* * Processor topology enumeration */ + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); if (*ecx == 0) { - logical_cpus = threads_per_core; + logical_cpus = threads; width = log2(logical_cpus); level = CPUID_TYPE_SMT; x2apic_id = vcpu_id; } if (*ecx == 1) { - logical_cpus = threads_per_core * - cores_per_package; + logical_cpus = threads * cores; width = log2(logical_cpus); level = CPUID_TYPE_CORE; x2apic_id = vcpu_id; diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 index 192cee52302..567f852cb83 100644 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -33,7 +33,16 @@ .Sh SYNOPSIS .Nm .Op Fl abehuwxACHPSWY -.Op Fl c Ar numcpus +.Oo +.Fl c\~ Ns +.Oo +.Op Ar cpus= Ns +.Ar numcpus Ns +.Oc Ns +.Op Ar ,sockets=n Ns +.Op Ar ,cores=n Ns +.Op Ar ,threads=n +.Oc .Op Fl g Ar gdbport .Op Fl l Ar lpcdev Ns Op , Ns Ar conf .Op Fl m Ar memsize Ns Op Ar K|k|M|m|G|g|T|t @@ -77,9 +86,30 @@ Enable a low-level console device supported by kernels compiled with .Cd "device bvmconsole" . This option will be deprecated in a future version. -.It Fl c Ar numcpus -Number of guest virtual CPUs. -The default is 1 and the maximum is 16. +.It Fl c Op Ar setting ... +Number of guest virtual CPUs +and/or the CPU topology. +The default value for each of +.Ar numcpus , +.Ar sockets , +.Ar cores , +and +.Ar threads +is 1. +The current maximum number of guest virtual CPUs is 16. +If +.Ar numcpus +is not specified then it will be calculated from the other arguments. +The topology must be consistent in that the +.Ar numcpus +must equal the product of +.Ar sockets , +.Ar cores , +and +.Ar threads . +If a +.Ar setting +is specified more than once the last one has precedence. .It Fl C Include guest memory in core file. .It Fl e diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index 5851c5758f1..59dc2df9574 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #ifndef WITHOUT_CAPSICUM #include #include @@ -163,6 +164,8 @@ extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); char *vmname; int guest_ncpus; +uint16_t cores, maxcpus, sockets, threads; + char *guest_uuid_str; static int guest_vmexit_on_hlt, guest_vmexit_on_pause; @@ -207,11 +210,13 @@ usage(int code) { fprintf(stderr, - "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g ] [-l ]\n" + "Usage: %s [-abehuwxACHPSWY]\n" + " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n" + " %*s [-g ] [-l ]\n" " %*s [-m mem] [-p vcpu:hostcpu] [-s ] [-U uuid] \n" " -a: local apic is in xAPIC mode (deprecated)\n" " -A: create ACPI tables\n" - " -c: # cpus (default 1)\n" + " -c: number of cpus and/or topology specification\n" " -C: include guest memory in core file\n" " -e: exit on unhandled I/O access\n" " -g: gdb port\n" @@ -229,7 +234,8 @@ usage(int code) " -W: force virtio to use single-vector MSI\n" " -x: local apic is in x2APIC mode\n" " -Y: disable MPtable generation\n", - progname, (int)strlen(progname), ""); + progname, (int)strlen(progname), "", (int)strlen(progname), "", + (int)strlen(progname), ""); exit(code); } @@ -271,6 +277,94 @@ bhyve_caph_limit_stdoe(void) #endif +/* + * XXX This parser is known to have the following issues: + * 1. It accepts null key=value tokens ",,". + * 2. It accepts whitespace after = and before value. + * 3. Values out of range of INT are silently wrapped. + * 4. It doesn't check non-final values. + * 5. The apparently bogus limits of UINT16_MAX are for future expansion. + * + * The acceptance of a null specification ('-c ""') is by design to match the + * manual page syntax specification, this results in a topology of 1 vCPU. + */ +static int +topology_parse(const char *opt) +{ + uint64_t ncpus; + int c, chk, n, s, t, tmp; + char *cp, *str; + bool ns, scts; + + c = 1, n = 1, s = 1, t = 1; + ns = false, scts = false; + str = strdup(opt); + if (str == NULL) + goto out; + + while ((cp = strsep(&str, ",")) != NULL) { + if (sscanf(cp, "%i%n", &tmp, &chk) == 1) { + n = tmp; + ns = true; + } else if (sscanf(cp, "cpus=%i%n", &tmp, &chk) == 1) { + n = tmp; + ns = true; + } else if (sscanf(cp, "sockets=%i%n", &tmp, &chk) == 1) { + s = tmp; + scts = true; + } else if (sscanf(cp, "cores=%i%n", &tmp, &chk) == 1) { + c = tmp; + scts = true; + } else if (sscanf(cp, "threads=%i%n", &tmp, &chk) == 1) { + t = tmp; + scts = true; +#ifdef notyet /* Do not expose this until vmm.ko implements it */ + } else if (sscanf(cp, "maxcpus=%i%n", &tmp, &chk) == 1) { + m = tmp; +#endif + /* Skip the empty argument case from -c "" */ + } else if (cp[0] == '\0') + continue; + else + goto out; + /* Any trailing garbage causes an error */ + if (cp[chk] != '\0') + goto out; + } + free(str); + str = NULL; + + /* + * Range check 1 <= n <= UINT16_MAX all values + */ + if (n < 1 || s < 1 || c < 1 || t < 1 || + n > UINT16_MAX || s > UINT16_MAX || c > UINT16_MAX || + t > UINT16_MAX) + return (-1); + + /* If only the cpus was specified, use that as sockets */ + if (!scts) + s = n; + /* + * Compute sockets * cores * threads avoiding overflow + * The range check above insures these are 16 bit values + * If n was specified check it against computed ncpus + */ + ncpus = (uint64_t)s * c * t; + if (ncpus > UINT16_MAX || (ns && n != ncpus)) + return (-1); + + guest_ncpus = ncpus; + sockets = s; + cores = c; + threads = t; + return(0); + +out: + free(str); + return (-1); +} + static int pincpu_parse(const char *opt) { @@ -899,6 +993,9 @@ do_open(const char *vmname) exit(4); } } + error = vm_set_topology(ctx, sockets, cores, threads, maxcpus); + if (error) + errx(EX_OSERR, "vm_set_topology"); return (ctx); } @@ -917,6 +1014,8 @@ main(int argc, char *argv[]) progname = basename(argv[0]); gdb_port = 0; guest_ncpus = 1; + sockets = cores = threads = 1; + maxcpus = 0; memsize = 256 * MB; mptgen = 1; rtc_localtime = 1; @@ -941,7 +1040,10 @@ main(int argc, char *argv[]) } break; case 'c': - guest_ncpus = atoi(optarg); + if (topology_parse(optarg) != 0) { + errx(EX_USAGE, "invalid cpu topology " + "'%s'", optarg); + } break; case 'C': memflags |= VM_MEM_F_INCORE; @@ -1019,11 +1121,6 @@ main(int argc, char *argv[]) vmname = argv[0]; ctx = do_open(vmname); - if (guest_ncpus < 1) { - fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); - exit(1); - } - max_vcpus = num_vcpus_allowed(ctx); if (guest_ncpus > max_vcpus) { fprintf(stderr, "%d vCPUs requested but only %d available\n", diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c index f3e622499e2..dfb23c436dd 100644 --- a/usr.sbin/bhyvectl/bhyvectl.c +++ b/usr.sbin/bhyvectl/bhyvectl.c @@ -191,7 +191,8 @@ usage(bool cpu_intel) " [--get-msr-bitmap]\n" " [--get-msr-bitmap-address]\n" " [--get-guest-sysenter]\n" - " [--get-exit-reason]\n", + " [--get-exit-reason]\n" + " [--get-cpu-topology]\n", progname); if (cpu_intel) { @@ -285,6 +286,7 @@ static int set_x2apic_state, get_x2apic_state; enum x2apic_state x2apic_state; static int unassign_pptdev, bus, slot, func; static int run; +static int get_cpu_topology; /* * VMCB specific. @@ -1456,6 +1458,7 @@ setup_options(bool cpu_intel) { "get-active-cpus", NO_ARG, &get_active_cpus, 1 }, { "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 }, { "get-intinfo", NO_ARG, &get_intinfo, 1 }, + { "get-cpu-topology", NO_ARG, &get_cpu_topology, 1 }, }; const struct option intel_opts[] = { @@ -2312,6 +2315,14 @@ main(int argc, char *argv[]) } } + if (!error && (get_cpu_topology || get_all)) { + uint16_t sockets, cores, threads, maxcpus; + + vm_get_topology(ctx, &sockets, &cores, &threads, &maxcpus); + printf("cpu_topology:\tsockets=%hu, cores=%hu, threads=%hu, " + "maxcpus=%hu\n", sockets, cores, threads, maxcpus); + } + if (!error && run) { error = vm_run(ctx, vcpu, &vmexit); if (error == 0) -- 2.45.0