2 * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
3 * Copyright (c) 2016, 2017, 2019 The FreeBSD Foundation
6 * Portions of this software were developed by Konstantin Belousov
7 * under sponsorship from the FreeBSD Foundation.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
34 #include <sys/param.h>
35 #include "namespace.h"
36 #include <sys/capsicum.h>
38 #include <sys/fcntl.h>
45 #include "un-namespace.h"
46 #include <machine/atomic.h>
47 #include <machine/cpufunc.h>
48 #include <machine/pvclock.h>
49 #include <machine/specialreg.h>
50 #include <dev/acpica/acpi_hpet.h>
52 #include <dev/hyperv/hyperv.h>
54 #include <x86/ifunc.h>
55 #include "libc_private.h"
58 rdtsc_low(const struct vdso_timehands *th)
62 __asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
63 : "=a" (rv) : "c" (th->th_x86_shift) : "edx");
68 rdtscp_low(const struct vdso_timehands *th)
72 __asm __volatile("rdtscp; movl %%edi,%%ecx; shrd %%cl, %%edx, %0"
73 : "=a" (rv) : "D" (th->th_x86_shift) : "ecx", "edx");
78 rdtsc_low_mb_lfence(const struct vdso_timehands *th)
81 return (rdtsc_low(th));
85 rdtsc_low_mb_mfence(const struct vdso_timehands *th)
88 return (rdtsc_low(th));
92 rdtsc_low_mb_none(const struct vdso_timehands *th)
94 return (rdtsc_low(th));
98 rdtsc32_mb_lfence(void)
105 rdtsc32_mb_mfence(void)
112 rdtsc32_mb_none(void)
123 struct tsc_selector_tag {
124 u_int (*ts_rdtsc32)(void);
125 u_int (*ts_rdtsc_low)(const struct vdso_timehands *);
128 static const struct tsc_selector_tag tsc_selector[] = {
129 [0] = { /* Intel, LFENCE */
130 .ts_rdtsc32 = rdtsc32_mb_lfence,
131 .ts_rdtsc_low = rdtsc_low_mb_lfence,
133 [1] = { /* AMD, MFENCE */
134 .ts_rdtsc32 = rdtsc32_mb_mfence,
135 .ts_rdtsc_low = rdtsc_low_mb_mfence,
137 [2] = { /* No SSE2 */
138 .ts_rdtsc32 = rdtsc32_mb_none,
139 .ts_rdtsc_low = rdtsc_low_mb_none,
142 .ts_rdtsc32 = rdtscp32_,
143 .ts_rdtsc_low = rdtscp_low,
148 tsc_selector_idx(u_int cpu_feature)
150 u_int amd_feature, cpu_exthigh, cpu_id, p[4], v[3];
151 static const char amd_id[] = "AuthenticAMD";
152 static const char hygon_id[] = "HygonGenuine";
155 if (cpu_feature == 0)
156 return (2); /* should not happen due to RDTSC */
162 amd_cpu = memcmp(v, amd_id, sizeof(amd_id) - 1) == 0 ||
163 memcmp(v, hygon_id, sizeof(hygon_id) - 1) == 0;
168 if (cpu_feature != 0) {
169 do_cpuid(0x80000000, p);
174 if (cpu_exthigh >= 0x80000001) {
175 do_cpuid(0x80000001, p);
181 if ((amd_feature & AMDID_RDTSCP) != 0)
183 if ((cpu_feature & CPUID_SSE2) == 0)
185 return (amd_cpu ? 1 : 0);
188 DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc_low,
189 (const struct vdso_timehands *th))
191 return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc_low);
194 DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc32, (void))
196 return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc32);
199 #define HPET_DEV_MAP_MAX 10
200 static volatile char *hpet_dev_map[HPET_DEV_MAP_MAX];
203 __vdso_init_hpet(uint32_t u)
205 static const char devprefix[] = "/dev/hpet";
206 char devname[64], *c, *c1, t;
207 volatile char *new_map, *old_map;
212 c1 = c = stpcpy(devname, devprefix);
215 *c++ = u1 % 10 + '0';
219 for (c--; c1 != c; c1++, c--) {
225 old_map = hpet_dev_map[u];
230 * Explicitely check for the capability mode to avoid
231 * triggering trap_enocap on the device open by absolute path.
233 if ((cap_getmode(&mode) == 0 && mode != 0) ||
234 (fd = _open(devname, O_RDONLY | O_CLOEXEC)) == -1) {
235 /* Prevent the caller from re-entering. */
236 atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
237 (uintptr_t)old_map, (uintptr_t)MAP_FAILED);
241 new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0);
243 if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
244 (uintptr_t)old_map, (uintptr_t)new_map) == 0 &&
245 new_map != MAP_FAILED)
246 munmap((void *)new_map, PAGE_SIZE);
251 #define HYPERV_REFTSC_DEVPATH "/dev/" HYPERV_REFTSC_DEVNAME
255 * We use 'NULL' for this variable to indicate that initialization
256 * is required. And if this variable is 'MAP_FAILED', then Hyper-V
257 * reference TSC can not be used, e.g. in misconfigured jail.
259 static struct hyperv_reftsc *hyperv_ref_tsc;
262 __vdso_init_hyperv_tsc(void)
267 if (cap_getmode(&mode) == 0 && mode != 0)
270 fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY | O_CLOEXEC);
273 hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ,
279 /* Prevent the caller from re-entering. */
280 hyperv_ref_tsc = MAP_FAILED;
284 __vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc)
286 uint64_t disc, ret, tsc, scale;
290 while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {
291 scale = tsc_ref->tsc_scale;
292 ofs = tsc_ref->tsc_ofs;
294 mfence(); /* XXXKIB */
297 /* ret = ((tsc * scale) >> 64) + ofs */
298 __asm__ __volatile__ ("mulq %3" :
299 "=d" (ret), "=a" (disc) :
300 "a" (tsc), "r" (scale));
303 atomic_thread_fence_acq();
304 if (tsc_ref->tsc_seq == seq) {
309 /* Sequence changed; re-sync. */
314 #endif /* WANT_HYPERV */
316 static struct pvclock_vcpu_time_info *pvclock_timeinfos;
319 __vdso_pvclock_gettc(const struct vdso_timehands *th, u_int *tc)
321 uint64_t delta, ns, tsc;
322 struct pvclock_vcpu_time_info *ti;
323 uint32_t cpuid_ti, cpuid_tsc, version;
327 ti = &pvclock_timeinfos[0];
328 version = atomic_load_acq_32(&ti->version);
329 stable = (ti->flags & th->th_x86_pvc_stable_mask) != 0;
333 (void)rdtscp_aux(&cpuid_ti);
334 ti = &pvclock_timeinfos[cpuid_ti];
335 version = atomic_load_acq_32(&ti->version);
336 tsc = rdtscp_aux(&cpuid_tsc);
338 delta = tsc - ti->tsc_timestamp;
339 ns = ti->system_time + pvclock_scale_delta(delta,
340 ti->tsc_to_system_mul, ti->tsc_shift);
341 atomic_thread_fence_acq();
342 } while ((ti->version & 1) != 0 || ti->version != version ||
343 (!stable && cpuid_ti != cpuid_tsc));
344 *tc = MAX(ns, th->th_x86_pvc_last_systime);
349 __vdso_init_pvclock_timeinfos(void)
351 struct pvclock_vcpu_time_info *timeinfos;
356 timeinfos = MAP_FAILED;
357 if (_elf_aux_info(AT_NCPUS, &ncpus, sizeof(ncpus)) != 0 ||
358 (cap_getmode(&mode) == 0 && mode != 0) ||
359 (fd = _open("/dev/" PVCLOCK_CDEVNAME, O_RDONLY | O_CLOEXEC)) < 0)
361 len = ncpus * sizeof(*pvclock_timeinfos);
362 timeinfos = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
365 if (atomic_cmpset_rel_ptr(
366 (volatile uintptr_t *)&pvclock_timeinfos, (uintptr_t)NULL,
367 (uintptr_t)timeinfos) == 0 && timeinfos != MAP_FAILED)
368 (void)munmap((void *)timeinfos, len);
371 #pragma weak __vdso_gettc
373 __vdso_gettc(const struct vdso_timehands *th, u_int *tc)
378 switch (th->th_algo) {
379 case VDSO_TH_ALGO_X86_TSC:
380 *tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) :
381 __vdso_gettc_rdtsc32();
383 case VDSO_TH_ALGO_X86_HPET:
384 idx = th->th_x86_hpet_idx;
385 if (idx >= HPET_DEV_MAP_MAX)
387 map = (volatile char *)atomic_load_acq_ptr(
388 (volatile uintptr_t *)&hpet_dev_map[idx]);
390 __vdso_init_hpet(idx);
391 map = (volatile char *)atomic_load_acq_ptr(
392 (volatile uintptr_t *)&hpet_dev_map[idx]);
394 if (map == MAP_FAILED)
396 *tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER);
399 case VDSO_TH_ALGO_X86_HVTSC:
400 if (hyperv_ref_tsc == NULL)
401 __vdso_init_hyperv_tsc();
402 if (hyperv_ref_tsc == MAP_FAILED)
404 return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc));
406 case VDSO_TH_ALGO_X86_PVCLK:
407 if (pvclock_timeinfos == NULL)
408 __vdso_init_pvclock_timeinfos();
409 if (pvclock_timeinfos == MAP_FAILED)
411 return (__vdso_pvclock_gettc(th, tc));
417 #pragma weak __vdso_gettimekeep
419 __vdso_gettimekeep(struct vdso_timekeep **tk)
422 return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk)));