]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/kvm_clock/kvm_clock.c
zfs: merge openzfs/zfs@4647353c8
[FreeBSD/FreeBSD.git] / sys / dev / kvm_clock / kvm_clock.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2014 Bryan Venteicher <bryanv@FreeBSD.org>
5  * Copyright (c) 2021 Mathieu Chouquet-Stringer
6  * Copyright (c) 2021 Juniper Networks, Inc.
7  * Copyright (c) 2021 Klara, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30
31 /*
32  * Linux KVM paravirtual clock support
33  *
34  * References:
35  *     - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
36  *     - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html
37  */
38
39 #include <sys/cdefs.h>
40 #include <sys/param.h>
41 #include <sys/bus.h>
42 #include <sys/domainset.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/module.h>
46 #include <sys/smp.h>
47 #include <sys/sysctl.h>
48
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_extern.h>
52
53 #include <machine/pvclock.h>
54 #include <x86/kvm.h>
55
56 #include "clock_if.h"
57
58 #define KVM_CLOCK_DEVNAME               "kvmclock"
59 /*
60  * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's
61  * default value of 800, and (3) below the TSC's value when it supports the
62  * "Invariant TSC" feature and is believed to be synchronized across all CPUs.
63  */
64 #define KVM_CLOCK_TC_QUALITY            975
65
66 struct kvm_clock_softc {
67         struct pvclock                   pvc;
68         struct pvclock_wall_clock        wc;
69         struct pvclock_vcpu_time_info   *timeinfos;
70         u_int                            msr_tc;
71         u_int                            msr_wc;
72 #ifndef EARLY_AP_STARTUP
73         int                              firstcpu;
74 #endif
75 };
76
77 static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg);
78 static void     kvm_clock_system_time_enable(struct kvm_clock_softc *sc,
79                     const cpuset_t *cpus);
80 static void     kvm_clock_system_time_enable_pcpu(void *arg);
81 static void     kvm_clock_setup_sysctl(device_t);
82
83 static struct pvclock_wall_clock *
84 kvm_clock_get_wallclock(void *arg)
85 {
86         struct kvm_clock_softc *sc = arg;
87
88         wrmsr(sc->msr_wc, vtophys(&sc->wc));
89         return (&sc->wc);
90 }
91
92 static void
93 kvm_clock_system_time_enable(struct kvm_clock_softc *sc, const cpuset_t *cpus)
94 {
95         smp_rendezvous_cpus(*cpus, NULL, kvm_clock_system_time_enable_pcpu,
96             NULL, sc);
97 }
98
99 static void
100 kvm_clock_system_time_enable_pcpu(void *arg)
101 {
102         struct kvm_clock_softc *sc = arg;
103
104         /*
105          * See [2]; the lsb of this MSR is the system time enable bit.
106          */
107         wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1);
108 }
109
110 #ifndef EARLY_AP_STARTUP
111 static void
112 kvm_clock_init_smp(void *arg __unused)
113 {
114         devclass_t kvm_clock_devclass;
115         cpuset_t cpus;
116         struct kvm_clock_softc *sc;
117
118         kvm_clock_devclass = devclass_find(KVM_CLOCK_DEVNAME);
119         sc = devclass_get_softc(kvm_clock_devclass, 0);
120         if (sc == NULL || mp_ncpus == 1)
121                 return;
122
123         /*
124          * Register with the hypervisor on all CPUs except the one that
125          * registered in kvm_clock_attach().
126          */
127         cpus = all_cpus;
128         KASSERT(CPU_ISSET(sc->firstcpu, &cpus),
129             ("%s: invalid first CPU %d", __func__, sc->firstcpu));
130         CPU_CLR(sc->firstcpu, &cpus);
131         kvm_clock_system_time_enable(sc, &cpus);
132 }
133 SYSINIT(kvm_clock, SI_SUB_SMP, SI_ORDER_ANY, kvm_clock_init_smp, NULL);
134 #endif
135
136 static void
137 kvm_clock_identify(driver_t *driver, device_t parent)
138 {
139         u_int regs[4];
140
141         kvm_cpuid_get_features(regs);
142         if ((regs[0] &
143             (KVM_FEATURE_CLOCKSOURCE2 | KVM_FEATURE_CLOCKSOURCE)) == 0)
144                 return;
145         if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1))
146                 return;
147         BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0);
148 }
149
150 static int
151 kvm_clock_probe(device_t dev)
152 {
153         device_set_desc(dev, "KVM paravirtual clock");
154         return (BUS_PROBE_DEFAULT);
155 }
156
157 static int
158 kvm_clock_attach(device_t dev)
159 {
160         u_int regs[4];
161         struct kvm_clock_softc *sc = device_get_softc(dev);
162         bool stable_flag_supported;
163
164         /* Process KVM "features" CPUID leaf content: */
165         kvm_cpuid_get_features(regs);
166         if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) {
167                 sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW;
168                 sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW;
169         } else {
170                 KASSERT((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0,
171                     ("Clocksource feature flags disappeared since "
172                     "kvm_clock_identify: regs[0] %#0x.", regs[0]));
173                 sc->msr_tc = KVM_MSR_SYSTEM_TIME;
174                 sc->msr_wc = KVM_MSR_WALL_CLOCK;
175         }
176         stable_flag_supported =
177             (regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0;
178
179         /* Set up 'struct pvclock_vcpu_time_info' page(s): */
180         sc->timeinfos = kmem_malloc(mp_ncpus *
181             sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO);
182 #ifdef EARLY_AP_STARTUP
183         kvm_clock_system_time_enable(sc, &all_cpus);
184 #else
185         sc->firstcpu = curcpu;
186         kvm_clock_system_time_enable_pcpu(sc);
187 #endif
188
189         /*
190          * Init pvclock; register KVM clock wall clock, register KVM clock
191          * timecounter, and set up the requisite infrastructure for vDSO access
192          * to this timecounter.
193          *     Regarding 'tc_flags': Since the KVM MSR documentation does not
194          *     specifically discuss suspend/resume scenarios, conservatively
195          *     leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume that the system
196          *     time must be re-inited in such cases.
197          */
198         sc->pvc.get_wallclock = kvm_clock_get_wallclock;
199         sc->pvc.get_wallclock_arg = sc;
200         sc->pvc.timeinfos = sc->timeinfos;
201         sc->pvc.stable_flag_supported = stable_flag_supported;
202         pvclock_init(&sc->pvc, dev, KVM_CLOCK_DEVNAME, KVM_CLOCK_TC_QUALITY, 0);
203         kvm_clock_setup_sysctl(dev);
204         return (0);
205 }
206
207 static int
208 kvm_clock_detach(device_t dev)
209 {
210         struct kvm_clock_softc *sc = device_get_softc(dev);
211
212         return (pvclock_destroy(&sc->pvc));
213 }
214
215 static int
216 kvm_clock_suspend(device_t dev)
217 {
218         return (0);
219 }
220
221 static int
222 kvm_clock_resume(device_t dev)
223 {
224         /*
225          * See note in 'kvm_clock_attach()' regarding 'TC_FLAGS_SUSPEND_SAFE';
226          * conservatively assume that the system time must be re-inited in
227          * suspend/resume scenarios.
228          */
229         kvm_clock_system_time_enable(device_get_softc(dev), &all_cpus);
230         pvclock_resume();
231         inittodr(time_second);
232         return (0);
233 }
234
235 static int
236 kvm_clock_gettime(device_t dev, struct timespec *ts)
237 {
238         struct kvm_clock_softc *sc = device_get_softc(dev);
239
240         pvclock_gettime(&sc->pvc, ts);
241         return (0);
242 }
243
244 static int
245 kvm_clock_settime(device_t dev, struct timespec *ts)
246 {
247         /*
248          * Even though it is not possible to set the KVM clock's wall clock, to
249          * avoid the possibility of periodic benign error messages from
250          * 'settime_task_func()', report success rather than, e.g., 'ENODEV'.
251          */
252         return (0);
253 }
254
255 static int
256 kvm_clock_tsc_freq_sysctl(SYSCTL_HANDLER_ARGS)
257 {
258         struct kvm_clock_softc *sc = oidp->oid_arg1;
259         uint64_t freq = pvclock_tsc_freq(sc->timeinfos);
260
261         return (sysctl_handle_64(oidp, &freq, 0, req));
262 }
263
264 static void
265 kvm_clock_setup_sysctl(device_t dev)
266 {
267         struct kvm_clock_softc *sc = device_get_softc(dev);
268         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
269         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
270         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
271
272         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tsc_freq",
273             CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
274             kvm_clock_tsc_freq_sysctl, "QU",
275             "Time Stamp Counter frequency");
276 }
277
278 static device_method_t kvm_clock_methods[] = {
279         DEVMETHOD(device_identify,      kvm_clock_identify),
280         DEVMETHOD(device_probe,         kvm_clock_probe),
281         DEVMETHOD(device_attach,        kvm_clock_attach),
282         DEVMETHOD(device_detach,        kvm_clock_detach),
283         DEVMETHOD(device_suspend,       kvm_clock_suspend),
284         DEVMETHOD(device_resume,        kvm_clock_resume),
285         /* clock interface */
286         DEVMETHOD(clock_gettime,        kvm_clock_gettime),
287         DEVMETHOD(clock_settime,        kvm_clock_settime),
288
289         DEVMETHOD_END
290 };
291
292 static driver_t kvm_clock_driver = {
293         KVM_CLOCK_DEVNAME,
294         kvm_clock_methods,
295         sizeof(struct kvm_clock_softc),
296 };
297
298 DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, 0, 0);