]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/x86/x86/tsc.c
Don't use memcpy() in the early microcode loading code.
[FreeBSD/FreeBSD.git] / sys / x86 / x86 / tsc.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1998-2003 Poul-Henning Kamp
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include "opt_clock.h"
33
34 #include <sys/param.h>
35 #include <sys/bus.h>
36 #include <sys/cpu.h>
37 #include <sys/limits.h>
38 #include <sys/malloc.h>
39 #include <sys/systm.h>
40 #include <sys/sysctl.h>
41 #include <sys/time.h>
42 #include <sys/timetc.h>
43 #include <sys/kernel.h>
44 #include <sys/power.h>
45 #include <sys/smp.h>
46 #include <sys/vdso.h>
47 #include <machine/clock.h>
48 #include <machine/cputypes.h>
49 #include <machine/md_var.h>
50 #include <machine/specialreg.h>
51 #include <x86/vmware.h>
52 #include <dev/acpica/acpi_hpet.h>
53 #include <contrib/dev/acpica/include/acpi.h>
54
55 #include "cpufreq_if.h"
56
57 uint64_t        tsc_freq;
58 int             tsc_is_invariant;
59 int             tsc_perf_stat;
60
61 static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
62
63 SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
64     &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
65
66 #ifdef SMP
67 int     smp_tsc;
68 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
69     "Indicates whether the TSC is safe to use in SMP mode");
70
71 int     smp_tsc_adjust = 0;
72 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN,
73     &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP");
74 #endif
75
76 static int      tsc_shift = 1;
77 SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN,
78     &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency");
79
80 static int      tsc_disabled;
81 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0,
82     "Disable x86 Time Stamp Counter");
83
84 static int      tsc_skip_calibration;
85 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN |
86     CTLFLAG_NOFETCH, &tsc_skip_calibration, 0,
87     "Disable TSC frequency calibration");
88
89 static void tsc_freq_changed(void *arg, const struct cf_level *level,
90     int status);
91 static void tsc_freq_changing(void *arg, const struct cf_level *level,
92     int *status);
93 static unsigned tsc_get_timecount(struct timecounter *tc);
94 static inline unsigned tsc_get_timecount_low(struct timecounter *tc);
95 static unsigned tsc_get_timecount_lfence(struct timecounter *tc);
96 static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc);
97 static unsigned tsc_get_timecount_mfence(struct timecounter *tc);
98 static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc);
99 static void tsc_levels_changed(void *arg, int unit);
100 static uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th,
101     struct timecounter *tc);
102 #ifdef COMPAT_FREEBSD32
103 static uint32_t x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
104     struct timecounter *tc);
105 #endif
106
107 static struct timecounter tsc_timecounter = {
108         .tc_get_timecount =             tsc_get_timecount,
109         .tc_counter_mask =              ~0u,
110         .tc_name =                      "TSC",
111         .tc_quality =                   800,    /* adjusted in code */
112         .tc_fill_vdso_timehands =       x86_tsc_vdso_timehands,
113 #ifdef COMPAT_FREEBSD32
114         .tc_fill_vdso_timehands32 =     x86_tsc_vdso_timehands32,
115 #endif
116 };
117
118 static void
119 tsc_freq_vmware(void)
120 {
121         u_int regs[4];
122
123         if (hv_high >= 0x40000010) {
124                 do_cpuid(0x40000010, regs);
125                 tsc_freq = regs[0] * 1000;
126         } else {
127                 vmware_hvcall(VMW_HVCMD_GETHZ, regs);
128                 if (regs[1] != UINT_MAX)
129                         tsc_freq = regs[0] | ((uint64_t)regs[1] << 32);
130         }
131         tsc_is_invariant = 1;
132 }
133
134 /*
135  * Calculate TSC frequency using information from the CPUID leaf 0x15
136  * 'Time Stamp Counter and Nominal Core Crystal Clock'.  It should be
137  * an improvement over the parsing of the CPU model name in
138  * tsc_freq_intel(), when available.
139  */
140 static bool
141 tsc_freq_cpuid(void)
142 {
143         u_int regs[4];
144
145         if (cpu_high < 0x15)
146                 return (false);
147         do_cpuid(0x15, regs);
148         if (regs[0] == 0 || regs[1] == 0 || regs[2] == 0)
149                 return (false);
150         tsc_freq = (uint64_t)regs[2] * regs[1] / regs[0];
151         return (true);
152 }
153
154 static void
155 tsc_freq_intel(void)
156 {
157         char brand[48];
158         u_int regs[4];
159         uint64_t freq;
160         char *p;
161         u_int i;
162
163         /*
164          * Intel Processor Identification and the CPUID Instruction
165          * Application Note 485.
166          * http://www.intel.com/assets/pdf/appnote/241618.pdf
167          */
168         if (cpu_exthigh >= 0x80000004) {
169                 p = brand;
170                 for (i = 0x80000002; i < 0x80000005; i++) {
171                         do_cpuid(i, regs);
172                         memcpy(p, regs, sizeof(regs));
173                         p += sizeof(regs);
174                 }
175                 p = NULL;
176                 for (i = 0; i < sizeof(brand) - 1; i++)
177                         if (brand[i] == 'H' && brand[i + 1] == 'z')
178                                 p = brand + i;
179                 if (p != NULL) {
180                         p -= 5;
181                         switch (p[4]) {
182                         case 'M':
183                                 i = 1;
184                                 break;
185                         case 'G':
186                                 i = 1000;
187                                 break;
188                         case 'T':
189                                 i = 1000000;
190                                 break;
191                         default:
192                                 return;
193                         }
194 #define C2D(c)  ((c) - '0')
195                         if (p[1] == '.') {
196                                 freq = C2D(p[0]) * 1000;
197                                 freq += C2D(p[2]) * 100;
198                                 freq += C2D(p[3]) * 10;
199                                 freq *= i * 1000;
200                         } else {
201                                 freq = C2D(p[0]) * 1000;
202                                 freq += C2D(p[1]) * 100;
203                                 freq += C2D(p[2]) * 10;
204                                 freq += C2D(p[3]);
205                                 freq *= i * 1000000;
206                         }
207 #undef C2D
208                         tsc_freq = freq;
209                 }
210         }
211 }
212
213 static void
214 probe_tsc_freq(void)
215 {
216         u_int regs[4];
217         uint64_t tsc1, tsc2;
218         uint16_t bootflags;
219
220         if (cpu_high >= 6) {
221                 do_cpuid(6, regs);
222                 if ((regs[2] & CPUID_PERF_STAT) != 0) {
223                         /*
224                          * XXX Some emulators expose host CPUID without actual
225                          * support for these MSRs.  We must test whether they
226                          * really work.
227                          */
228                         wrmsr(MSR_MPERF, 0);
229                         wrmsr(MSR_APERF, 0);
230                         DELAY(10);
231                         if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0)
232                                 tsc_perf_stat = 1;
233                 }
234         }
235
236         if (vm_guest == VM_GUEST_VMWARE) {
237                 tsc_freq_vmware();
238                 return;
239         }
240
241         switch (cpu_vendor_id) {
242         case CPU_VENDOR_AMD:
243                 if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
244                     (vm_guest == VM_GUEST_NO &&
245                     CPUID_TO_FAMILY(cpu_id) >= 0x10))
246                         tsc_is_invariant = 1;
247                 if (cpu_feature & CPUID_SSE2) {
248                         tsc_timecounter.tc_get_timecount =
249                             tsc_get_timecount_mfence;
250                 }
251                 break;
252         case CPU_VENDOR_INTEL:
253                 if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
254                     (vm_guest == VM_GUEST_NO &&
255                     ((CPUID_TO_FAMILY(cpu_id) == 0x6 &&
256                     CPUID_TO_MODEL(cpu_id) >= 0xe) ||
257                     (CPUID_TO_FAMILY(cpu_id) == 0xf &&
258                     CPUID_TO_MODEL(cpu_id) >= 0x3))))
259                         tsc_is_invariant = 1;
260                 if (cpu_feature & CPUID_SSE2) {
261                         tsc_timecounter.tc_get_timecount =
262                             tsc_get_timecount_lfence;
263                 }
264                 break;
265         case CPU_VENDOR_CENTAUR:
266                 if (vm_guest == VM_GUEST_NO &&
267                     CPUID_TO_FAMILY(cpu_id) == 0x6 &&
268                     CPUID_TO_MODEL(cpu_id) >= 0xf &&
269                     (rdmsr(0x1203) & 0x100000000ULL) == 0)
270                         tsc_is_invariant = 1;
271                 if (cpu_feature & CPUID_SSE2) {
272                         tsc_timecounter.tc_get_timecount =
273                             tsc_get_timecount_lfence;
274                 }
275                 break;
276         }
277
278         if (!TUNABLE_INT_FETCH("machdep.disable_tsc_calibration",
279             &tsc_skip_calibration)) {
280                 /*
281                  * User did not give the order about calibration.
282                  * If he did, we do not try to guess.
283                  *
284                  * Otherwise, if ACPI FADT reports that the platform
285                  * is legacy-free and CPUID provides TSC frequency,
286                  * use it.  The calibration could fail anyway since
287                  * ISA timer can be absent or power gated.
288                  */
289                 if (acpi_get_fadt_bootflags(&bootflags) &&
290                     (bootflags & ACPI_FADT_LEGACY_DEVICES) == 0 &&
291                     tsc_freq_cpuid()) {
292                         printf("Skipping TSC calibration since no legacy "
293                             "devices reported by FADT and CPUID works\n");
294                         tsc_skip_calibration = 1;
295                 }
296         }
297         if (tsc_skip_calibration) {
298                 if (tsc_freq_cpuid())
299                         ;
300                 else if (cpu_vendor_id == CPU_VENDOR_INTEL)
301                         tsc_freq_intel();
302         } else {
303                 if (bootverbose)
304                         printf("Calibrating TSC clock ... ");
305                 tsc1 = rdtsc();
306                 DELAY(1000000);
307                 tsc2 = rdtsc();
308                 tsc_freq = tsc2 - tsc1;
309         }
310         if (bootverbose)
311                 printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
312 }
313
314 void
315 init_TSC(void)
316 {
317
318         if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
319                 return;
320
321 #ifdef __i386__
322         /* The TSC is known to be broken on certain CPUs. */
323         switch (cpu_vendor_id) {
324         case CPU_VENDOR_AMD:
325                 switch (cpu_id & 0xFF0) {
326                 case 0x500:
327                         /* K5 Model 0 */
328                         return;
329                 }
330                 break;
331         case CPU_VENDOR_CENTAUR:
332                 switch (cpu_id & 0xff0) {
333                 case 0x540:
334                         /*
335                          * http://www.centtech.com/c6_data_sheet.pdf
336                          *
337                          * I-12 RDTSC may return incoherent values in EDX:EAX
338                          * I-13 RDTSC hangs when certain event counters are used
339                          */
340                         return;
341                 }
342                 break;
343         case CPU_VENDOR_NSC:
344                 switch (cpu_id & 0xff0) {
345                 case 0x540:
346                         if ((cpu_id & CPUID_STEPPING) == 0)
347                                 return;
348                         break;
349                 }
350                 break;
351         }
352 #endif
353                 
354         probe_tsc_freq();
355
356         /*
357          * Inform CPU accounting about our boot-time clock rate.  This will
358          * be updated if someone loads a cpufreq driver after boot that
359          * discovers a new max frequency.
360          */
361         if (tsc_freq != 0)
362                 set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
363
364         if (tsc_is_invariant)
365                 return;
366
367         /* Register to find out about changes in CPU frequency. */
368         tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
369             tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
370         tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
371             tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
372         tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
373             tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
374 }
375
376 #ifdef SMP
377
378 /*
379  * RDTSC is not a serializing instruction, and does not drain
380  * instruction stream, so we need to drain the stream before executing
381  * it.  It could be fixed by use of RDTSCP, except the instruction is
382  * not available everywhere.
383  *
384  * Use CPUID for draining in the boot-time SMP constistency test.  The
385  * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel
386  * and VIA) when SSE2 is present, and nothing on older machines which
387  * also do not issue RDTSC prematurely.  There, testing for SSE2 and
388  * vendor is too cumbersome, and we learn about TSC presence from CPUID.
389  *
390  * Do not use do_cpuid(), since we do not need CPUID results, which
391  * have to be written into memory with do_cpuid().
392  */
393 #define TSC_READ(x)                                                     \
394 static void                                                             \
395 tsc_read_##x(void *arg)                                                 \
396 {                                                                       \
397         uint64_t *tsc = arg;                                            \
398         u_int cpu = PCPU_GET(cpuid);                                    \
399                                                                         \
400         __asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx");     \
401         tsc[cpu * 3 + x] = rdtsc();                                     \
402 }
403 TSC_READ(0)
404 TSC_READ(1)
405 TSC_READ(2)
406 #undef TSC_READ
407
408 #define N       1000
409
410 static void
411 comp_smp_tsc(void *arg)
412 {
413         uint64_t *tsc;
414         int64_t d1, d2;
415         u_int cpu = PCPU_GET(cpuid);
416         u_int i, j, size;
417
418         size = (mp_maxid + 1) * 3;
419         for (i = 0, tsc = arg; i < N; i++, tsc += size)
420                 CPU_FOREACH(j) {
421                         if (j == cpu)
422                                 continue;
423                         d1 = tsc[cpu * 3 + 1] - tsc[j * 3];
424                         d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1];
425                         if (d1 <= 0 || d2 <= 0) {
426                                 smp_tsc = 0;
427                                 return;
428                         }
429                 }
430 }
431
432 static void
433 adj_smp_tsc(void *arg)
434 {
435         uint64_t *tsc;
436         int64_t d, min, max;
437         u_int cpu = PCPU_GET(cpuid);
438         u_int first, i, size;
439
440         first = CPU_FIRST();
441         if (cpu == first)
442                 return;
443         min = INT64_MIN;
444         max = INT64_MAX;
445         size = (mp_maxid + 1) * 3;
446         for (i = 0, tsc = arg; i < N; i++, tsc += size) {
447                 d = tsc[first * 3] - tsc[cpu * 3 + 1];
448                 if (d > min)
449                         min = d;
450                 d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2];
451                 if (d > min)
452                         min = d;
453                 d = tsc[first * 3 + 1] - tsc[cpu * 3];
454                 if (d < max)
455                         max = d;
456                 d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1];
457                 if (d < max)
458                         max = d;
459         }
460         if (min > max)
461                 return;
462         d = min / 2 + max / 2;
463         __asm __volatile (
464                 "movl $0x10, %%ecx\n\t"
465                 "rdmsr\n\t"
466                 "addl %%edi, %%eax\n\t"
467                 "adcl %%esi, %%edx\n\t"
468                 "wrmsr\n"
469                 : /* No output */
470                 : "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32))
471                 : "ax", "cx", "dx", "cc"
472         );
473 }
474
475 static int
476 test_tsc(int adj_max_count)
477 {
478         uint64_t *data, *tsc;
479         u_int i, size, adj;
480
481         if ((!smp_tsc && !tsc_is_invariant) || vm_guest)
482                 return (-100);
483         size = (mp_maxid + 1) * 3;
484         data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK);
485         adj = 0;
486 retry:
487         for (i = 0, tsc = data; i < N; i++, tsc += size)
488                 smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc);
489         smp_tsc = 1;    /* XXX */
490         smp_rendezvous(smp_no_rendezvous_barrier, comp_smp_tsc,
491             smp_no_rendezvous_barrier, data);
492         if (!smp_tsc && adj < adj_max_count) {
493                 adj++;
494                 smp_rendezvous(smp_no_rendezvous_barrier, adj_smp_tsc,
495                     smp_no_rendezvous_barrier, data);
496                 goto retry;
497         }
498         free(data, M_TEMP);
499         if (bootverbose)
500                 printf("SMP: %sed TSC synchronization test%s\n",
501                     smp_tsc ? "pass" : "fail", 
502                     adj > 0 ? " after adjustment" : "");
503         if (smp_tsc && tsc_is_invariant) {
504                 switch (cpu_vendor_id) {
505                 case CPU_VENDOR_AMD:
506                         /*
507                          * Starting with Family 15h processors, TSC clock
508                          * source is in the north bridge.  Check whether
509                          * we have a single-socket/multi-core platform.
510                          * XXX Need more work for complex cases.
511                          */
512                         if (CPUID_TO_FAMILY(cpu_id) < 0x15 ||
513                             (amd_feature2 & AMDID2_CMP) == 0 ||
514                             smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1)
515                                 break;
516                         return (1000);
517                 case CPU_VENDOR_INTEL:
518                         /*
519                          * XXX Assume Intel platforms have synchronized TSCs.
520                          */
521                         return (1000);
522                 }
523                 return (800);
524         }
525         return (-100);
526 }
527
528 #undef N
529
530 #endif /* SMP */
531
532 static void
533 init_TSC_tc(void)
534 {
535         uint64_t max_freq;
536         int shift;
537
538         if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
539                 return;
540
541         /*
542          * Limit timecounter frequency to fit in an int and prevent it from
543          * overflowing too fast.
544          */
545         max_freq = UINT_MAX;
546
547         /*
548          * We can not use the TSC if we support APM.  Precise timekeeping
549          * on an APM'ed machine is at best a fools pursuit, since 
550          * any and all of the time spent in various SMM code can't 
551          * be reliably accounted for.  Reading the RTC is your only
552          * source of reliable time info.  The i8254 loses too, of course,
553          * but we need to have some kind of time...
554          * We don't know at this point whether APM is going to be used
555          * or not, nor when it might be activated.  Play it safe.
556          */
557         if (power_pm_get_type() == POWER_PM_TYPE_APM) {
558                 tsc_timecounter.tc_quality = -1000;
559                 if (bootverbose)
560                         printf("TSC timecounter disabled: APM enabled.\n");
561                 goto init;
562         }
563
564         /*
565          * Intel CPUs without a C-state invariant TSC can stop the TSC
566          * in either C2 or C3.  Disable use of C2 and C3 while using
567          * the TSC as the timecounter.  The timecounter can be changed
568          * to enable C2 and C3.
569          *
570          * Note that the TSC is used as the cputicker for computing
571          * thread runtime regardless of the timecounter setting, so
572          * using an alternate timecounter and enabling C2 or C3 can
573          * result incorrect runtimes for kernel idle threads (but not
574          * for any non-idle threads).
575          */
576         if (cpu_vendor_id == CPU_VENDOR_INTEL &&
577             (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) {
578                 tsc_timecounter.tc_flags |= TC_FLAGS_C2STOP;
579                 if (bootverbose)
580                         printf("TSC timecounter disables C2 and C3.\n");
581         }
582
583         /*
584          * We can not use the TSC in SMP mode unless the TSCs on all CPUs
585          * are synchronized.  If the user is sure that the system has
586          * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a
587          * non-zero value.  The TSC seems unreliable in virtualized SMP
588          * environments, so it is set to a negative quality in those cases.
589          */
590 #ifdef SMP
591         if (mp_ncpus > 1)
592                 tsc_timecounter.tc_quality = test_tsc(smp_tsc_adjust);
593         else
594 #endif /* SMP */
595         if (tsc_is_invariant)
596                 tsc_timecounter.tc_quality = 1000;
597         max_freq >>= tsc_shift;
598
599 init:
600         for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++)
601                 ;
602         if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) {
603                 if (cpu_vendor_id == CPU_VENDOR_AMD) {
604                         tsc_timecounter.tc_get_timecount = shift > 0 ?
605                             tsc_get_timecount_low_mfence :
606                             tsc_get_timecount_mfence;
607                 } else {
608                         tsc_timecounter.tc_get_timecount = shift > 0 ?
609                             tsc_get_timecount_low_lfence :
610                             tsc_get_timecount_lfence;
611                 }
612         } else {
613                 tsc_timecounter.tc_get_timecount = shift > 0 ?
614                     tsc_get_timecount_low : tsc_get_timecount;
615         }
616         if (shift > 0) {
617                 tsc_timecounter.tc_name = "TSC-low";
618                 if (bootverbose)
619                         printf("TSC timecounter discards lower %d bit(s)\n",
620                             shift);
621         }
622         if (tsc_freq != 0) {
623                 tsc_timecounter.tc_frequency = tsc_freq >> shift;
624                 tsc_timecounter.tc_priv = (void *)(intptr_t)shift;
625                 tc_init(&tsc_timecounter);
626         }
627 }
628 SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL);
629
630 void
631 resume_TSC(void)
632 {
633 #ifdef SMP
634         int quality;
635
636         /* If TSC was not good on boot, it is unlikely to become good now. */
637         if (tsc_timecounter.tc_quality < 0)
638                 return;
639         /* Nothing to do with UP. */
640         if (mp_ncpus < 2)
641                 return;
642
643         /*
644          * If TSC was good, a single synchronization should be enough,
645          * but honour smp_tsc_adjust if it's set.
646          */
647         quality = test_tsc(MAX(smp_tsc_adjust, 1));
648         if (quality != tsc_timecounter.tc_quality) {
649                 printf("TSC timecounter quality changed: %d -> %d\n",
650                     tsc_timecounter.tc_quality, quality);
651                 tsc_timecounter.tc_quality = quality;
652         }
653 #endif /* SMP */
654 }
655
656 /*
657  * When cpufreq levels change, find out about the (new) max frequency.  We
658  * use this to update CPU accounting in case it got a lower estimate at boot.
659  */
660 static void
661 tsc_levels_changed(void *arg, int unit)
662 {
663         device_t cf_dev;
664         struct cf_level *levels;
665         int count, error;
666         uint64_t max_freq;
667
668         /* Only use values from the first CPU, assuming all are equal. */
669         if (unit != 0)
670                 return;
671
672         /* Find the appropriate cpufreq device instance. */
673         cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
674         if (cf_dev == NULL) {
675                 printf("tsc_levels_changed() called but no cpufreq device?\n");
676                 return;
677         }
678
679         /* Get settings from the device and find the max frequency. */
680         count = 64;
681         levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
682         if (levels == NULL)
683                 return;
684         error = CPUFREQ_LEVELS(cf_dev, levels, &count);
685         if (error == 0 && count != 0) {
686                 max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
687                 set_cputicker(rdtsc, max_freq, 1);
688         } else
689                 printf("tsc_levels_changed: no max freq found\n");
690         free(levels, M_TEMP);
691 }
692
693 /*
694  * If the TSC timecounter is in use, veto the pending change.  It may be
695  * possible in the future to handle a dynamically-changing timecounter rate.
696  */
697 static void
698 tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
699 {
700
701         if (*status != 0 || timecounter != &tsc_timecounter)
702                 return;
703
704         printf("timecounter TSC must not be in use when "
705             "changing frequencies; change denied\n");
706         *status = EBUSY;
707 }
708
709 /* Update TSC freq with the value indicated by the caller. */
710 static void
711 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
712 {
713         uint64_t freq;
714
715         /* If there was an error during the transition, don't do anything. */
716         if (tsc_disabled || status != 0)
717                 return;
718
719         /* Total setting for this level gives the new frequency in MHz. */
720         freq = (uint64_t)level->total_set.freq * 1000000;
721         atomic_store_rel_64(&tsc_freq, freq);
722         tsc_timecounter.tc_frequency =
723             freq >> (int)(intptr_t)tsc_timecounter.tc_priv;
724 }
725
726 static int
727 sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
728 {
729         int error;
730         uint64_t freq;
731
732         freq = atomic_load_acq_64(&tsc_freq);
733         if (freq == 0)
734                 return (EOPNOTSUPP);
735         error = sysctl_handle_64(oidp, &freq, 0, req);
736         if (error == 0 && req->newptr != NULL) {
737                 atomic_store_rel_64(&tsc_freq, freq);
738                 atomic_store_rel_64(&tsc_timecounter.tc_frequency,
739                     freq >> (int)(intptr_t)tsc_timecounter.tc_priv);
740         }
741         return (error);
742 }
743
744 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW,
745     0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency");
746
747 static u_int
748 tsc_get_timecount(struct timecounter *tc __unused)
749 {
750
751         return (rdtsc32());
752 }
753
754 static inline u_int
755 tsc_get_timecount_low(struct timecounter *tc)
756 {
757         uint32_t rv;
758
759         __asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
760             : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
761         return (rv);
762 }
763
764 static u_int
765 tsc_get_timecount_lfence(struct timecounter *tc __unused)
766 {
767
768         lfence();
769         return (rdtsc32());
770 }
771
772 static u_int
773 tsc_get_timecount_low_lfence(struct timecounter *tc)
774 {
775
776         lfence();
777         return (tsc_get_timecount_low(tc));
778 }
779
780 static u_int
781 tsc_get_timecount_mfence(struct timecounter *tc __unused)
782 {
783
784         mfence();
785         return (rdtsc32());
786 }
787
788 static u_int
789 tsc_get_timecount_low_mfence(struct timecounter *tc)
790 {
791
792         mfence();
793         return (tsc_get_timecount_low(tc));
794 }
795
796 static uint32_t
797 x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc)
798 {
799
800         vdso_th->th_algo = VDSO_TH_ALGO_X86_TSC;
801         vdso_th->th_x86_shift = (int)(intptr_t)tc->tc_priv;
802         vdso_th->th_x86_hpet_idx = 0xffffffff;
803         bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
804         return (1);
805 }
806
807 #ifdef COMPAT_FREEBSD32
808 static uint32_t
809 x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
810     struct timecounter *tc)
811 {
812
813         vdso_th32->th_algo = VDSO_TH_ALGO_X86_TSC;
814         vdso_th32->th_x86_shift = (int)(intptr_t)tc->tc_priv;
815         vdso_th32->th_x86_hpet_idx = 0xffffffff;
816         bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
817         return (1);
818 }
819 #endif