2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2014 Bryan Venteicher <bryanv@FreeBSD.org>
5 * Copyright (c) 2021 Mathieu Chouquet-Stringer
6 * Copyright (c) 2021 Juniper Networks, Inc.
7 * Copyright (c) 2021 Klara, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * Linux KVM paravirtual clock support
35 * - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
36 * - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html
39 #include <sys/cdefs.h>
40 #include <sys/param.h>
42 #include <sys/domainset.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/module.h>
47 #include <sys/sysctl.h>
51 #include <vm/vm_extern.h>
53 #include <machine/pvclock.h>
58 #define KVM_CLOCK_DEVNAME "kvmclock"
60 * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's
61 * default value of 800, and (3) below the TSC's value when it supports the
62 * "Invariant TSC" feature and is believed to be synchronized across all CPUs.
64 #define KVM_CLOCK_TC_QUALITY 975
66 struct kvm_clock_softc {
68 struct pvclock_wall_clock wc;
69 struct pvclock_vcpu_time_info *timeinfos;
72 #ifndef EARLY_AP_STARTUP
77 static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg);
78 static void kvm_clock_system_time_enable(struct kvm_clock_softc *sc,
79 const cpuset_t *cpus);
80 static void kvm_clock_system_time_enable_pcpu(void *arg);
81 static void kvm_clock_setup_sysctl(device_t);
83 static struct pvclock_wall_clock *
84 kvm_clock_get_wallclock(void *arg)
86 struct kvm_clock_softc *sc = arg;
88 wrmsr(sc->msr_wc, vtophys(&sc->wc));
93 kvm_clock_system_time_enable(struct kvm_clock_softc *sc, const cpuset_t *cpus)
95 smp_rendezvous_cpus(*cpus, NULL, kvm_clock_system_time_enable_pcpu,
100 kvm_clock_system_time_enable_pcpu(void *arg)
102 struct kvm_clock_softc *sc = arg;
105 * See [2]; the lsb of this MSR is the system time enable bit.
107 wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1);
110 #ifndef EARLY_AP_STARTUP
112 kvm_clock_init_smp(void *arg __unused)
114 devclass_t kvm_clock_devclass;
116 struct kvm_clock_softc *sc;
118 kvm_clock_devclass = devclass_find(KVM_CLOCK_DEVNAME);
119 sc = devclass_get_softc(kvm_clock_devclass, 0);
120 if (sc == NULL || mp_ncpus == 1)
124 * Register with the hypervisor on all CPUs except the one that
125 * registered in kvm_clock_attach().
128 KASSERT(CPU_ISSET(sc->firstcpu, &cpus),
129 ("%s: invalid first CPU %d", __func__, sc->firstcpu));
130 CPU_CLR(sc->firstcpu, &cpus);
131 kvm_clock_system_time_enable(sc, &cpus);
133 SYSINIT(kvm_clock, SI_SUB_SMP, SI_ORDER_ANY, kvm_clock_init_smp, NULL);
137 kvm_clock_identify(driver_t *driver, device_t parent)
141 kvm_cpuid_get_features(regs);
143 (KVM_FEATURE_CLOCKSOURCE2 | KVM_FEATURE_CLOCKSOURCE)) == 0)
145 if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1))
147 BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0);
151 kvm_clock_probe(device_t dev)
153 device_set_desc(dev, "KVM paravirtual clock");
154 return (BUS_PROBE_DEFAULT);
158 kvm_clock_attach(device_t dev)
161 struct kvm_clock_softc *sc = device_get_softc(dev);
162 bool stable_flag_supported;
164 /* Process KVM "features" CPUID leaf content: */
165 kvm_cpuid_get_features(regs);
166 if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) {
167 sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW;
168 sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW;
170 KASSERT((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0,
171 ("Clocksource feature flags disappeared since "
172 "kvm_clock_identify: regs[0] %#0x.", regs[0]));
173 sc->msr_tc = KVM_MSR_SYSTEM_TIME;
174 sc->msr_wc = KVM_MSR_WALL_CLOCK;
176 stable_flag_supported =
177 (regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0;
179 /* Set up 'struct pvclock_vcpu_time_info' page(s): */
180 sc->timeinfos = kmem_malloc(mp_ncpus *
181 sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO);
182 #ifdef EARLY_AP_STARTUP
183 kvm_clock_system_time_enable(sc, &all_cpus);
185 sc->firstcpu = curcpu;
186 kvm_clock_system_time_enable_pcpu(sc);
190 * Init pvclock; register KVM clock wall clock, register KVM clock
191 * timecounter, and set up the requisite infrastructure for vDSO access
192 * to this timecounter.
193 * Regarding 'tc_flags': Since the KVM MSR documentation does not
194 * specifically discuss suspend/resume scenarios, conservatively
195 * leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume that the system
196 * time must be re-inited in such cases.
198 sc->pvc.get_wallclock = kvm_clock_get_wallclock;
199 sc->pvc.get_wallclock_arg = sc;
200 sc->pvc.timeinfos = sc->timeinfos;
201 sc->pvc.stable_flag_supported = stable_flag_supported;
202 pvclock_init(&sc->pvc, dev, KVM_CLOCK_DEVNAME, KVM_CLOCK_TC_QUALITY, 0);
203 kvm_clock_setup_sysctl(dev);
208 kvm_clock_detach(device_t dev)
210 struct kvm_clock_softc *sc = device_get_softc(dev);
212 return (pvclock_destroy(&sc->pvc));
216 kvm_clock_suspend(device_t dev)
222 kvm_clock_resume(device_t dev)
225 * See note in 'kvm_clock_attach()' regarding 'TC_FLAGS_SUSPEND_SAFE';
226 * conservatively assume that the system time must be re-inited in
227 * suspend/resume scenarios.
229 kvm_clock_system_time_enable(device_get_softc(dev), &all_cpus);
231 inittodr(time_second);
236 kvm_clock_gettime(device_t dev, struct timespec *ts)
238 struct kvm_clock_softc *sc = device_get_softc(dev);
240 pvclock_gettime(&sc->pvc, ts);
245 kvm_clock_settime(device_t dev, struct timespec *ts)
248 * Even though it is not possible to set the KVM clock's wall clock, to
249 * avoid the possibility of periodic benign error messages from
250 * 'settime_task_func()', report success rather than, e.g., 'ENODEV'.
256 kvm_clock_tsc_freq_sysctl(SYSCTL_HANDLER_ARGS)
258 struct kvm_clock_softc *sc = oidp->oid_arg1;
259 uint64_t freq = pvclock_tsc_freq(sc->timeinfos);
261 return (sysctl_handle_64(oidp, &freq, 0, req));
265 kvm_clock_setup_sysctl(device_t dev)
267 struct kvm_clock_softc *sc = device_get_softc(dev);
268 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
269 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
270 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
272 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tsc_freq",
273 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
274 kvm_clock_tsc_freq_sysctl, "QU",
275 "Time Stamp Counter frequency");
278 static device_method_t kvm_clock_methods[] = {
279 DEVMETHOD(device_identify, kvm_clock_identify),
280 DEVMETHOD(device_probe, kvm_clock_probe),
281 DEVMETHOD(device_attach, kvm_clock_attach),
282 DEVMETHOD(device_detach, kvm_clock_detach),
283 DEVMETHOD(device_suspend, kvm_clock_suspend),
284 DEVMETHOD(device_resume, kvm_clock_resume),
285 /* clock interface */
286 DEVMETHOD(clock_gettime, kvm_clock_gettime),
287 DEVMETHOD(clock_settime, kvm_clock_settime),
292 static driver_t kvm_clock_driver = {
295 sizeof(struct kvm_clock_softc),
298 DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, 0, 0);