From 6fa88a627d5e9d290022b6f463effc99f3df8ee2 Mon Sep 17 00:00:00 2001 From: Adam Fenn Date: Wed, 4 Aug 2021 08:42:48 -0700 Subject: [PATCH] kvm_clock: KVM paravirtual clock support Add support for the KVM paravirtual clock device. Sponsored by: Juniper Networks, Inc. Sponsored by: Klara, Inc. Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D29733 (cherry picked from commit 6c69c6bb4c7ffde48b130df707799d4eca21ca9f) --- sys/amd64/conf/GENERIC | 3 + sys/amd64/conf/MINIMAL | 3 + sys/amd64/conf/NOTES | 3 + sys/conf/files.amd64 | 3 +- sys/conf/files.i386 | 3 +- sys/dev/kvm_clock/kvm_clock.c | 240 ++++++++++++++++++++++++++++++++++ sys/i386/conf/GENERIC | 3 + sys/i386/conf/NOTES | 3 + sys/x86/include/kvm.h | 79 +++++++++++ 9 files changed, 338 insertions(+), 2 deletions(-) create mode 100644 sys/dev/kvm_clock/kvm_clock.c create mode 100644 sys/x86/include/kvm.h diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 0784544c9c7..84912e69526 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -362,6 +362,9 @@ device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device +# Linux KVM paravirtualization support +device kvm_clock # KVM paravirtual clock driver + # HyperV drivers and enhancement support device hyperv # HyperV drivers diff --git a/sys/amd64/conf/MINIMAL b/sys/amd64/conf/MINIMAL index 7c035e871e7..97a97aacff7 100644 --- a/sys/amd64/conf/MINIMAL +++ b/sys/amd64/conf/MINIMAL @@ -133,6 +133,9 @@ device gif # IPv6 and IPv4 tunneling # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter +# Linux KVM paravirtualization support +device kvm_clock # KVM paravirtual clock driver + # Xen HVM Guest Optimizations # NOTE: XENHVM depends on xenpci. They must be added or removed together. options XENHVM # Xen HVM kernel infrastructure diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES index 0b949acdc4a..abf4397787c 100644 --- a/sys/amd64/conf/NOTES +++ b/sys/amd64/conf/NOTES @@ -527,6 +527,9 @@ device virtio_balloon # VirtIO Memory Balloon device device virtio_random # VirtIO Entropy device device virtio_console # VirtIO Console device +# Linux KVM paravirtualization support +device kvm_clock # KVM paravirtual clock driver + # Microsoft Hyper-V enhancement support device hyperv # HyperV drivers diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index ad698de09fe..425abcd8b4c 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -398,6 +398,7 @@ dev/hyperv/vmbus/vmbus_res.c optional hyperv dev/hyperv/vmbus/vmbus_xact.c optional hyperv dev/hyperv/vmbus/amd64/hyperv_machdep.c optional hyperv dev/hyperv/vmbus/amd64/vmbus_vector.S optional hyperv +dev/kvm_clock/kvm_clock.c optional kvm_clock dev/nctgpio/nctgpio.c optional nctgpio dev/nfe/if_nfe.c optional nfe pci dev/ntb/if_ntb/if_ntb.c optional if_ntb @@ -819,7 +820,7 @@ x86/x86/mp_x86.c optional smp x86/x86/mp_watchdog.c optional mp_watchdog smp x86/x86/msi.c optional pci x86/x86/nexus.c standard -x86/x86/pvclock.c optional xenhvm +x86/x86/pvclock.c optional kvm_clock | xenhvm x86/x86/stack_machdep.c optional ddb | stack x86/x86/tsc.c standard x86/x86/ucode.c standard diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 934c72c8704..4bb09402791 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -285,6 +285,7 @@ dev/ipmi/ipmi_ssif.c optional ipmi smbus dev/ipmi/ipmi_pci.c optional ipmi pci dev/ipmi/ipmi_linux.c optional ipmi compat_linux dev/itwd/itwd.c optional itwd +dev/kvm_clock/kvm_clock.c optional kvm_clock dev/le/if_le_isa.c optional le isa dev/mse/mse.c optional mse dev/mse/mse_isa.c optional mse isa @@ -635,7 +636,7 @@ x86/x86/nexus.c standard x86/x86/stack_machdep.c optional ddb | stack x86/x86/tsc.c standard x86/x86/ucode.c standard -x86/x86/pvclock.c optional xenhvm +x86/x86/pvclock.c optional kvm_clock | xenhvm x86/x86/delay.c standard x86/xen/hvm.c optional xenhvm x86/xen/xen_intr.c optional xenhvm diff --git a/sys/dev/kvm_clock/kvm_clock.c b/sys/dev/kvm_clock/kvm_clock.c new file mode 100644 index 00000000000..1a76432e417 --- /dev/null +++ b/sys/dev/kvm_clock/kvm_clock.c @@ -0,0 +1,240 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2014 Bryan Venteicher + * Copyright (c) 2021 Mathieu Chouquet-Stringer + * Copyright (c) 2021 Juniper Networks, Inc. + * Copyright (c) 2021 Klara, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Linux KVM paravirtual clock support + * + * References: + * - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html + * - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "clock_if.h" + +#define KVM_CLOCK_DEVNAME "kvmclock" +/* + * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's + * default value of 800, and (3) below the TSC's value when it supports the + * "Invariant TSC" feature and is believed to be synchronized across all CPUs. + */ +#define KVM_CLOCK_TC_QUALITY 975 + +struct kvm_clock_softc { + struct pvclock pvc; + struct pvclock_wall_clock wc; + struct pvclock_vcpu_time_info *timeinfos; + u_int msr_tc; + u_int msr_wc; +}; + +static devclass_t kvm_clock_devclass; + +static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg); +static void kvm_clock_system_time_enable(struct kvm_clock_softc *sc); +static void kvm_clock_system_time_enable_pcpu(void *arg); + +static struct pvclock_wall_clock * +kvm_clock_get_wallclock(void *arg) +{ + struct kvm_clock_softc *sc = arg; + + wrmsr(sc->msr_wc, vtophys(&sc->wc)); + return (&sc->wc); +} + +static void +kvm_clock_system_time_enable(struct kvm_clock_softc *sc) +{ + smp_rendezvous(NULL, kvm_clock_system_time_enable_pcpu, NULL, sc); +} + +static void +kvm_clock_system_time_enable_pcpu(void *arg) +{ + struct kvm_clock_softc *sc = arg; + + /* + * See [2]; the lsb of this MSR is the system time enable bit. + */ + wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1); +} + +static void +kvm_clock_identify(driver_t *driver, device_t parent) +{ + u_int regs[4]; + + kvm_cpuid_get_features(regs); + if ((regs[0] & + (KVM_FEATURE_CLOCKSOURCE2 | KVM_FEATURE_CLOCKSOURCE)) == 0) + return; + if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1)) + return; + BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0); +} + +static int +kvm_clock_probe(device_t dev) +{ + device_set_desc(dev, "KVM paravirtual clock"); + return (BUS_PROBE_DEFAULT); +} + +static int +kvm_clock_attach(device_t dev) +{ + u_int regs[4]; + struct kvm_clock_softc *sc = device_get_softc(dev); + bool stable_flag_supported; + + /* Process KVM "features" CPUID leaf content: */ + kvm_cpuid_get_features(regs); + if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) { + sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW; + sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW; + } else { + KASSERT((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0, + ("Clocksource feature flags disappeared since " + "kvm_clock_identify: regs[0] %#0x.", regs[0])); + sc->msr_tc = KVM_MSR_SYSTEM_TIME; + sc->msr_wc = KVM_MSR_WALL_CLOCK; + } + stable_flag_supported = + (regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0; + + /* Set up 'struct pvclock_vcpu_time_info' page(s): */ + sc->timeinfos = (struct pvclock_vcpu_time_info *)kmem_malloc(mp_ncpus * + sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO); + kvm_clock_system_time_enable(sc); + + /* + * Init pvclock; register KVM clock wall clock, register KVM clock + * timecounter, and set up the requisite infrastructure for vDSO access + * to this timecounter. + * Regarding 'tc_flags': Since the KVM MSR documentation does not + * specifically discuss suspend/resume scenarios, conservatively + * leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume that the system + * time must be re-inited in such cases. + */ + sc->pvc.get_wallclock = kvm_clock_get_wallclock; + sc->pvc.get_wallclock_arg = sc; + sc->pvc.timeinfos = sc->timeinfos; + sc->pvc.stable_flag_supported = stable_flag_supported; + pvclock_init(&sc->pvc, dev, KVM_CLOCK_DEVNAME, KVM_CLOCK_TC_QUALITY, 0); + return (0); +} + +static int +kvm_clock_detach(device_t dev) +{ + struct kvm_clock_softc *sc = device_get_softc(dev); + + return (pvclock_destroy(&sc->pvc)); +} + +static int +kvm_clock_suspend(device_t dev) +{ + return (0); +} + +static int +kvm_clock_resume(device_t dev) +{ + /* + * See note in 'kvm_clock_attach()' regarding 'TC_FLAGS_SUSPEND_SAFE'; + * conservatively assume that the system time must be re-inited in + * suspend/resume scenarios. + */ + kvm_clock_system_time_enable(device_get_softc(dev)); + pvclock_resume(); + inittodr(time_second); + return (0); +} + +static int +kvm_clock_gettime(device_t dev, struct timespec *ts) +{ + struct kvm_clock_softc *sc = device_get_softc(dev); + + pvclock_gettime(&sc->pvc, ts); + return (0); +} + +static int +kvm_clock_settime(device_t dev, struct timespec *ts) +{ + /* + * Even though it is not possible to set the KVM clock's wall clock, to + * avoid the possibility of periodic benign error messages from + * 'settime_task_func()', report success rather than, e.g., 'ENODEV'. + */ + return (0); +} + +static device_method_t kvm_clock_methods[] = { + DEVMETHOD(device_identify, kvm_clock_identify), + DEVMETHOD(device_probe, kvm_clock_probe), + DEVMETHOD(device_attach, kvm_clock_attach), + DEVMETHOD(device_detach, kvm_clock_detach), + DEVMETHOD(device_suspend, kvm_clock_suspend), + DEVMETHOD(device_resume, kvm_clock_resume), + /* clock interface */ + DEVMETHOD(clock_gettime, kvm_clock_gettime), + DEVMETHOD(clock_settime, kvm_clock_settime), + + DEVMETHOD_END +}; + +static driver_t kvm_clock_driver = { + KVM_CLOCK_DEVNAME, + kvm_clock_methods, + sizeof(struct kvm_clock_softc), +}; + +DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, kvm_clock_devclass, 0, 0); diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index ecb7529df25..d9cb9ed0776 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -353,6 +353,9 @@ device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device +# Linux KVM paravirtualization support +device kvm_clock # KVM paravirtual clock driver + # HyperV drivers and enhancement support device hyperv # HyperV drivers diff --git a/sys/i386/conf/NOTES b/sys/i386/conf/NOTES index 211b3a6fa7e..34561c54862 100644 --- a/sys/i386/conf/NOTES +++ b/sys/i386/conf/NOTES @@ -771,6 +771,9 @@ device virtio_balloon # VirtIO Memory Balloon device device virtio_random # VirtIO Entropy device device virtio_console # VirtIO Console device +# Linux KVM paravirtualization support +device kvm_clock # KVM paravirtual clock driver + device hyperv # HyperV drivers ##################################################################### diff --git a/sys/x86/include/kvm.h b/sys/x86/include/kvm.h new file mode 100644 index 00000000000..b74d45d0798 --- /dev/null +++ b/sys/x86/include/kvm.h @@ -0,0 +1,79 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2014 Bryan Venteicher + * Copyright (c) 2021 Mathieu Chouquet-Stringer + * Copyright (c) 2021 Juniper Networks, Inc. + * Copyright (c) 2021 Klara, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Linux KVM paravirtualization: common definitions + * + * References: + * - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html + * - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html + */ + +#ifndef _X86_KVM_H_ +#define _X86_KVM_H_ + +#include +#include + +#include + +#define KVM_CPUID_SIGNATURE 0x40000000 +#define KVM_CPUID_FEATURES_LEAF 0x40000001 + +#define KVM_FEATURE_CLOCKSOURCE 0x00000001 +#define KVM_FEATURE_CLOCKSOURCE2 0x00000008 +#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 0x01000000 + +/* Deprecated: for the CLOCKSOURCE feature. */ +#define KVM_MSR_WALL_CLOCK 0x11 +#define KVM_MSR_SYSTEM_TIME 0x12 + +#define KVM_MSR_WALL_CLOCK_NEW 0x4b564d00 +#define KVM_MSR_SYSTEM_TIME_NEW 0x4b564d01 + +static inline bool +kvm_cpuid_features_leaf_supported(void) +{ + return (vm_guest == VM_GUEST_KVM && + KVM_CPUID_FEATURES_LEAF <= hv_high); +} + +static inline void +kvm_cpuid_get_features(u_int *regs) +{ + if (!kvm_cpuid_features_leaf_supported()) + regs[0] = regs[1] = regs[2] = regs[3] = 0; + else + do_cpuid(KVM_CPUID_FEATURES_LEAF, regs); +} + +#endif /* !_X86_KVM_H_ */ -- 2.45.0