2 * Copyright (c) 2009 Adrian Chadd
3 * Copyright (c) 2012 Spectra Logic Corporation
4 * Copyright (c) 2014 Bryan Venteicher
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/clock.h>
37 #include <sys/fcntl.h>
38 #include <sys/limits.h>
42 #include <sys/sysctl.h>
48 #include <machine/atomic.h>
49 #include <machine/cpufunc.h>
50 #include <machine/md_var.h>
51 #include <machine/pvclock.h>
54 * Last system time. This is used to guarantee a monotonically non-decreasing
55 * clock for the kernel codepath and approximate the same for the vDSO codepath.
56 * In theory, this should be unnecessary absent hypervisor bug(s) and/or what
57 * should be rare cases where TSC jitter may still be visible despite the
58 * hypervisor's best efforts.
60 static volatile uint64_t pvclock_last_systime;
62 static uint64_t pvclock_getsystime(struct pvclock *pvc);
63 static void pvclock_read_time_info(
64 struct pvclock_vcpu_time_info *ti, uint64_t *ns, uint8_t *flags);
65 static void pvclock_read_wall_clock(struct pvclock_wall_clock *wc,
67 static u_int pvclock_tc_get_timecount(struct timecounter *tc);
68 static uint32_t pvclock_tc_vdso_timehands(
69 struct vdso_timehands *vdso_th, struct timecounter *tc);
70 #ifdef COMPAT_FREEBSD32
71 static uint32_t pvclock_tc_vdso_timehands32(
72 struct vdso_timehands32 *vdso_th, struct timecounter *tc);
75 static d_open_t pvclock_cdev_open;
76 static d_mmap_t pvclock_cdev_mmap;
78 static struct cdevsw pvclock_cdev_cdevsw = {
79 .d_version = D_VERSION,
80 .d_name = PVCLOCK_CDEVNAME,
81 .d_open = pvclock_cdev_open,
82 .d_mmap = pvclock_cdev_mmap,
88 atomic_store_rel_64(&pvclock_last_systime, 0);
92 pvclock_tsc_freq(struct pvclock_vcpu_time_info *ti)
96 freq = (1000000000ULL << 32) / ti->tsc_to_system_mul;
97 if (ti->tsc_shift < 0)
98 freq <<= -ti->tsc_shift;
100 freq >>= ti->tsc_shift;
105 pvclock_read_time_info(struct pvclock_vcpu_time_info *ti,
106 uint64_t *ns, uint8_t *flags)
112 version = atomic_load_acq_32(&ti->version);
113 delta = rdtsc_ordered() - ti->tsc_timestamp;
114 *ns = ti->system_time + pvclock_scale_delta(delta,
115 ti->tsc_to_system_mul, ti->tsc_shift);
117 atomic_thread_fence_acq();
118 } while ((ti->version & 1) != 0 || ti->version != version);
122 pvclock_read_wall_clock(struct pvclock_wall_clock *wc, struct timespec *ts)
127 version = atomic_load_acq_32(&wc->version);
128 ts->tv_sec = wc->sec;
129 ts->tv_nsec = wc->nsec;
130 atomic_thread_fence_acq();
131 } while ((wc->version & 1) != 0 || wc->version != version);
135 pvclock_getsystime(struct pvclock *pvc)
137 uint64_t now, last, ret;
141 pvclock_read_time_info(&pvc->timeinfos[curcpu], &now, &flags);
143 if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
144 last = atomic_load_acq_64(&pvclock_last_systime);
150 } while (!atomic_fcmpset_rel_64(&pvclock_last_systime, &last,
158 * NOTE: Transitional-only; this should be removed after 'dev/xen/timer/timer.c'
159 * has been migrated to the 'struct pvclock' API.
162 pvclock_get_timecount(struct pvclock_vcpu_time_info *ti)
164 uint64_t now, last, ret;
167 pvclock_read_time_info(ti, &now, &flags);
169 if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
170 last = atomic_load_acq_64(&pvclock_last_systime);
176 } while (!atomic_fcmpset_rel_64(&pvclock_last_systime, &last,
183 * NOTE: Transitional-only; this should be removed after 'dev/xen/timer/timer.c'
184 * has been migrated to the 'struct pvclock' API.
187 pvclock_get_wallclock(struct pvclock_wall_clock *wc, struct timespec *ts)
189 pvclock_read_wall_clock(wc, ts);
193 pvclock_cdev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
201 pvclock_cdev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
202 int nprot, vm_memattr_t *memattr)
204 if (offset >= mp_ncpus * sizeof(struct pvclock_vcpu_time_info))
206 if (PROT_EXTRACT(nprot) != PROT_READ)
208 *paddr = vtophys((uintptr_t)dev->si_drv1 + offset);
209 *memattr = VM_MEMATTR_DEFAULT;
214 pvclock_tc_get_timecount(struct timecounter *tc)
216 struct pvclock *pvc = tc->tc_priv;
218 return (pvclock_getsystime(pvc) & UINT_MAX);
222 pvclock_tc_vdso_timehands(struct vdso_timehands *vdso_th,
223 struct timecounter *tc)
225 struct pvclock *pvc = tc->tc_priv;
227 vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
228 vdso_th->th_x86_shift = 0;
229 vdso_th->th_x86_hpet_idx = 0;
230 vdso_th->th_x86_pvc_last_systime =
231 atomic_load_acq_64(&pvclock_last_systime);
232 vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable &&
233 pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0;
234 bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
235 return (pvc->cdev != NULL && amd_feature & AMDID_RDTSCP);
238 #ifdef COMPAT_FREEBSD32
240 pvclock_tc_vdso_timehands32(struct vdso_timehands32 *vdso_th,
241 struct timecounter *tc)
243 struct pvclock *pvc = tc->tc_priv;
245 vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
246 vdso_th->th_x86_shift = 0;
247 vdso_th->th_x86_hpet_idx = 0;
248 vdso_th->th_x86_pvc_last_systime =
249 atomic_load_acq_64(&pvclock_last_systime);
250 vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable &&
251 pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0;
252 bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
253 return (pvc->cdev != NULL && amd_feature & AMDID_RDTSCP);
258 pvclock_gettime(struct pvclock *pvc, struct timespec *ts)
260 struct timespec system_ts;
263 pvclock_read_wall_clock(pvc->get_wallclock(pvc->get_wallclock_arg), ts);
264 system_ns = pvclock_getsystime(pvc);
265 system_ts.tv_sec = system_ns / 1000000000ULL;
266 system_ts.tv_nsec = system_ns % 1000000000ULL;
267 timespecadd(ts, &system_ts, ts);
271 pvclock_init(struct pvclock *pvc, device_t dev, const char *tc_name,
272 int tc_quality, u_int tc_flags)
274 struct make_dev_args mda;
277 KASSERT(((uintptr_t)pvc->timeinfos & PAGE_MASK) == 0,
278 ("Specified time info page(s) address is not page-aligned."));
280 /* Set up vDSO stable-flag suppression test facility: */
281 pvc->vdso_force_unstable = false;
282 SYSCTL_ADD_BOOL(device_get_sysctl_ctx(dev),
283 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
284 "vdso_force_unstable", CTLFLAG_RW, &pvc->vdso_force_unstable, 0,
285 "Forcibly deassert stable flag in vDSO codepath");
287 /* Set up timecounter and timecounter-supporting members: */
288 pvc->tc.tc_get_timecount = pvclock_tc_get_timecount;
289 pvc->tc.tc_poll_pps = NULL;
290 pvc->tc.tc_counter_mask = ~0U;
291 pvc->tc.tc_frequency = 1000000000ULL;
292 pvc->tc.tc_name = tc_name;
293 pvc->tc.tc_quality = tc_quality;
294 pvc->tc.tc_flags = tc_flags;
295 pvc->tc.tc_priv = pvc;
296 pvc->tc.tc_fill_vdso_timehands = pvclock_tc_vdso_timehands;
297 #ifdef COMPAT_FREEBSD32
298 pvc->tc.tc_fill_vdso_timehands32 = pvclock_tc_vdso_timehands32;
301 /* Set up cdev for userspace mmapping of vCPU 0 time info page: */
302 make_dev_args_init(&mda);
303 mda.mda_devsw = &pvclock_cdev_cdevsw;
304 mda.mda_uid = UID_ROOT;
305 mda.mda_gid = GID_WHEEL;
307 mda.mda_si_drv1 = pvc->timeinfos;
308 err = make_dev_s(&mda, &pvc->cdev, PVCLOCK_CDEVNAME);
310 device_printf(dev, "Could not create /dev/%s, error %d. Fast "
311 "time of day will be unavailable for this timecounter.\n",
312 PVCLOCK_CDEVNAME, err);
313 KASSERT(pvc->cdev == NULL,
314 ("Failed make_dev_s() unexpectedly inited cdev."));
317 /* Register timecounter: */
321 * Register wallclock:
322 * The RTC registration API expects a resolution in microseconds;
323 * pvclock's 1ns resolution is rounded up to 1us.
325 clock_register(dev, 1);
329 pvclock_destroy(struct pvclock *pvc)
332 * Not currently possible since there is no teardown counterpart of