]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/x86/x86/tsc.c
Update to bmake-20171028
[FreeBSD/FreeBSD.git] / sys / x86 / x86 / tsc.c
1 /*-
2  * Copyright (c) 1998-2003 Poul-Henning Kamp
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include "opt_compat.h"
31 #include "opt_clock.h"
32
33 #include <sys/param.h>
34 #include <sys/bus.h>
35 #include <sys/cpu.h>
36 #include <sys/limits.h>
37 #include <sys/malloc.h>
38 #include <sys/systm.h>
39 #include <sys/sysctl.h>
40 #include <sys/time.h>
41 #include <sys/timetc.h>
42 #include <sys/kernel.h>
43 #include <sys/power.h>
44 #include <sys/smp.h>
45 #include <sys/vdso.h>
46 #include <machine/clock.h>
47 #include <machine/cputypes.h>
48 #include <machine/md_var.h>
49 #include <machine/specialreg.h>
50 #include <x86/vmware.h>
51 #include <dev/acpica/acpi_hpet.h>
52
53 #include "cpufreq_if.h"
54
55 uint64_t        tsc_freq;
56 int             tsc_is_invariant;
57 int             tsc_perf_stat;
58
59 static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
60
61 SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
62     &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
63
64 #ifdef SMP
65 int     smp_tsc;
66 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
67     "Indicates whether the TSC is safe to use in SMP mode");
68
69 int     smp_tsc_adjust = 0;
70 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN,
71     &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP");
72 #endif
73
74 static int      tsc_shift = 1;
75 SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN,
76     &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency");
77
78 static int      tsc_disabled;
79 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0,
80     "Disable x86 Time Stamp Counter");
81
82 static int      tsc_skip_calibration;
83 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN,
84     &tsc_skip_calibration, 0, "Disable TSC frequency calibration");
85
86 static void tsc_freq_changed(void *arg, const struct cf_level *level,
87     int status);
88 static void tsc_freq_changing(void *arg, const struct cf_level *level,
89     int *status);
90 static unsigned tsc_get_timecount(struct timecounter *tc);
91 static inline unsigned tsc_get_timecount_low(struct timecounter *tc);
92 static unsigned tsc_get_timecount_lfence(struct timecounter *tc);
93 static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc);
94 static unsigned tsc_get_timecount_mfence(struct timecounter *tc);
95 static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc);
96 static void tsc_levels_changed(void *arg, int unit);
97 static uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th,
98     struct timecounter *tc);
99 #ifdef COMPAT_FREEBSD32
100 static uint32_t x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
101     struct timecounter *tc);
102 #endif
103
104 static struct timecounter tsc_timecounter = {
105         .tc_get_timecount =             tsc_get_timecount,
106         .tc_counter_mask =              ~0u,
107         .tc_name =                      "TSC",
108         .tc_quality =                   800,    /* adjusted in code */
109         .tc_fill_vdso_timehands =       x86_tsc_vdso_timehands,
110 #ifdef COMPAT_FREEBSD32
111         .tc_fill_vdso_timehands32 =     x86_tsc_vdso_timehands32,
112 #endif
113 };
114
115 static void
116 tsc_freq_vmware(void)
117 {
118         u_int regs[4];
119
120         if (hv_high >= 0x40000010) {
121                 do_cpuid(0x40000010, regs);
122                 tsc_freq = regs[0] * 1000;
123         } else {
124                 vmware_hvcall(VMW_HVCMD_GETHZ, regs);
125                 if (regs[1] != UINT_MAX)
126                         tsc_freq = regs[0] | ((uint64_t)regs[1] << 32);
127         }
128         tsc_is_invariant = 1;
129 }
130
131 static void
132 tsc_freq_intel(void)
133 {
134         char brand[48];
135         u_int regs[4];
136         uint64_t freq;
137         char *p;
138         u_int i;
139
140         /*
141          * Intel Processor Identification and the CPUID Instruction
142          * Application Note 485.
143          * http://www.intel.com/assets/pdf/appnote/241618.pdf
144          */
145         if (cpu_exthigh >= 0x80000004) {
146                 p = brand;
147                 for (i = 0x80000002; i < 0x80000005; i++) {
148                         do_cpuid(i, regs);
149                         memcpy(p, regs, sizeof(regs));
150                         p += sizeof(regs);
151                 }
152                 p = NULL;
153                 for (i = 0; i < sizeof(brand) - 1; i++)
154                         if (brand[i] == 'H' && brand[i + 1] == 'z')
155                                 p = brand + i;
156                 if (p != NULL) {
157                         p -= 5;
158                         switch (p[4]) {
159                         case 'M':
160                                 i = 1;
161                                 break;
162                         case 'G':
163                                 i = 1000;
164                                 break;
165                         case 'T':
166                                 i = 1000000;
167                                 break;
168                         default:
169                                 return;
170                         }
171 #define C2D(c)  ((c) - '0')
172                         if (p[1] == '.') {
173                                 freq = C2D(p[0]) * 1000;
174                                 freq += C2D(p[2]) * 100;
175                                 freq += C2D(p[3]) * 10;
176                                 freq *= i * 1000;
177                         } else {
178                                 freq = C2D(p[0]) * 1000;
179                                 freq += C2D(p[1]) * 100;
180                                 freq += C2D(p[2]) * 10;
181                                 freq += C2D(p[3]);
182                                 freq *= i * 1000000;
183                         }
184 #undef C2D
185                         tsc_freq = freq;
186                 }
187         }
188 }
189
190 static void
191 probe_tsc_freq(void)
192 {
193         u_int regs[4];
194         uint64_t tsc1, tsc2;
195
196         if (cpu_high >= 6) {
197                 do_cpuid(6, regs);
198                 if ((regs[2] & CPUID_PERF_STAT) != 0) {
199                         /*
200                          * XXX Some emulators expose host CPUID without actual
201                          * support for these MSRs.  We must test whether they
202                          * really work.
203                          */
204                         wrmsr(MSR_MPERF, 0);
205                         wrmsr(MSR_APERF, 0);
206                         DELAY(10);
207                         if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0)
208                                 tsc_perf_stat = 1;
209                 }
210         }
211
212         if (vm_guest == VM_GUEST_VMWARE) {
213                 tsc_freq_vmware();
214                 return;
215         }
216
217         switch (cpu_vendor_id) {
218         case CPU_VENDOR_AMD:
219                 if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
220                     (vm_guest == VM_GUEST_NO &&
221                     CPUID_TO_FAMILY(cpu_id) >= 0x10))
222                         tsc_is_invariant = 1;
223                 if (cpu_feature & CPUID_SSE2) {
224                         tsc_timecounter.tc_get_timecount =
225                             tsc_get_timecount_mfence;
226                 }
227                 break;
228         case CPU_VENDOR_INTEL:
229                 if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
230                     (vm_guest == VM_GUEST_NO &&
231                     ((CPUID_TO_FAMILY(cpu_id) == 0x6 &&
232                     CPUID_TO_MODEL(cpu_id) >= 0xe) ||
233                     (CPUID_TO_FAMILY(cpu_id) == 0xf &&
234                     CPUID_TO_MODEL(cpu_id) >= 0x3))))
235                         tsc_is_invariant = 1;
236                 if (cpu_feature & CPUID_SSE2) {
237                         tsc_timecounter.tc_get_timecount =
238                             tsc_get_timecount_lfence;
239                 }
240                 break;
241         case CPU_VENDOR_CENTAUR:
242                 if (vm_guest == VM_GUEST_NO &&
243                     CPUID_TO_FAMILY(cpu_id) == 0x6 &&
244                     CPUID_TO_MODEL(cpu_id) >= 0xf &&
245                     (rdmsr(0x1203) & 0x100000000ULL) == 0)
246                         tsc_is_invariant = 1;
247                 if (cpu_feature & CPUID_SSE2) {
248                         tsc_timecounter.tc_get_timecount =
249                             tsc_get_timecount_lfence;
250                 }
251                 break;
252         }
253
254         if (tsc_skip_calibration) {
255                 if (cpu_vendor_id == CPU_VENDOR_INTEL)
256                         tsc_freq_intel();
257                 return;
258         }
259
260         if (bootverbose)
261                 printf("Calibrating TSC clock ... ");
262         tsc1 = rdtsc();
263         DELAY(1000000);
264         tsc2 = rdtsc();
265         tsc_freq = tsc2 - tsc1;
266         if (bootverbose)
267                 printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
268 }
269
270 void
271 init_TSC(void)
272 {
273
274         if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
275                 return;
276
277 #ifdef __i386__
278         /* The TSC is known to be broken on certain CPUs. */
279         switch (cpu_vendor_id) {
280         case CPU_VENDOR_AMD:
281                 switch (cpu_id & 0xFF0) {
282                 case 0x500:
283                         /* K5 Model 0 */
284                         return;
285                 }
286                 break;
287         case CPU_VENDOR_CENTAUR:
288                 switch (cpu_id & 0xff0) {
289                 case 0x540:
290                         /*
291                          * http://www.centtech.com/c6_data_sheet.pdf
292                          *
293                          * I-12 RDTSC may return incoherent values in EDX:EAX
294                          * I-13 RDTSC hangs when certain event counters are used
295                          */
296                         return;
297                 }
298                 break;
299         case CPU_VENDOR_NSC:
300                 switch (cpu_id & 0xff0) {
301                 case 0x540:
302                         if ((cpu_id & CPUID_STEPPING) == 0)
303                                 return;
304                         break;
305                 }
306                 break;
307         }
308 #endif
309                 
310         probe_tsc_freq();
311
312         /*
313          * Inform CPU accounting about our boot-time clock rate.  This will
314          * be updated if someone loads a cpufreq driver after boot that
315          * discovers a new max frequency.
316          */
317         if (tsc_freq != 0)
318                 set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
319
320         if (tsc_is_invariant)
321                 return;
322
323         /* Register to find out about changes in CPU frequency. */
324         tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
325             tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
326         tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
327             tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
328         tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
329             tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
330 }
331
332 #ifdef SMP
333
334 /*
335  * RDTSC is not a serializing instruction, and does not drain
336  * instruction stream, so we need to drain the stream before executing
337  * it.  It could be fixed by use of RDTSCP, except the instruction is
338  * not available everywhere.
339  *
340  * Use CPUID for draining in the boot-time SMP constistency test.  The
341  * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel
342  * and VIA) when SSE2 is present, and nothing on older machines which
343  * also do not issue RDTSC prematurely.  There, testing for SSE2 and
344  * vendor is too cumbersome, and we learn about TSC presence from CPUID.
345  *
346  * Do not use do_cpuid(), since we do not need CPUID results, which
347  * have to be written into memory with do_cpuid().
348  */
349 #define TSC_READ(x)                                                     \
350 static void                                                             \
351 tsc_read_##x(void *arg)                                                 \
352 {                                                                       \
353         uint64_t *tsc = arg;                                            \
354         u_int cpu = PCPU_GET(cpuid);                                    \
355                                                                         \
356         __asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx");     \
357         tsc[cpu * 3 + x] = rdtsc();                                     \
358 }
359 TSC_READ(0)
360 TSC_READ(1)
361 TSC_READ(2)
362 #undef TSC_READ
363
364 #define N       1000
365
366 static void
367 comp_smp_tsc(void *arg)
368 {
369         uint64_t *tsc;
370         int64_t d1, d2;
371         u_int cpu = PCPU_GET(cpuid);
372         u_int i, j, size;
373
374         size = (mp_maxid + 1) * 3;
375         for (i = 0, tsc = arg; i < N; i++, tsc += size)
376                 CPU_FOREACH(j) {
377                         if (j == cpu)
378                                 continue;
379                         d1 = tsc[cpu * 3 + 1] - tsc[j * 3];
380                         d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1];
381                         if (d1 <= 0 || d2 <= 0) {
382                                 smp_tsc = 0;
383                                 return;
384                         }
385                 }
386 }
387
388 static void
389 adj_smp_tsc(void *arg)
390 {
391         uint64_t *tsc;
392         int64_t d, min, max;
393         u_int cpu = PCPU_GET(cpuid);
394         u_int first, i, size;
395
396         first = CPU_FIRST();
397         if (cpu == first)
398                 return;
399         min = INT64_MIN;
400         max = INT64_MAX;
401         size = (mp_maxid + 1) * 3;
402         for (i = 0, tsc = arg; i < N; i++, tsc += size) {
403                 d = tsc[first * 3] - tsc[cpu * 3 + 1];
404                 if (d > min)
405                         min = d;
406                 d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2];
407                 if (d > min)
408                         min = d;
409                 d = tsc[first * 3 + 1] - tsc[cpu * 3];
410                 if (d < max)
411                         max = d;
412                 d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1];
413                 if (d < max)
414                         max = d;
415         }
416         if (min > max)
417                 return;
418         d = min / 2 + max / 2;
419         __asm __volatile (
420                 "movl $0x10, %%ecx\n\t"
421                 "rdmsr\n\t"
422                 "addl %%edi, %%eax\n\t"
423                 "adcl %%esi, %%edx\n\t"
424                 "wrmsr\n"
425                 : /* No output */
426                 : "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32))
427                 : "ax", "cx", "dx", "cc"
428         );
429 }
430
431 static int
432 test_tsc(void)
433 {
434         uint64_t *data, *tsc;
435         u_int i, size, adj;
436
437         if ((!smp_tsc && !tsc_is_invariant) || vm_guest)
438                 return (-100);
439         size = (mp_maxid + 1) * 3;
440         data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK);
441         adj = 0;
442 retry:
443         for (i = 0, tsc = data; i < N; i++, tsc += size)
444                 smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc);
445         smp_tsc = 1;    /* XXX */
446         smp_rendezvous(smp_no_rendezvous_barrier, comp_smp_tsc,
447             smp_no_rendezvous_barrier, data);
448         if (!smp_tsc && adj < smp_tsc_adjust) {
449                 adj++;
450                 smp_rendezvous(smp_no_rendezvous_barrier, adj_smp_tsc,
451                     smp_no_rendezvous_barrier, data);
452                 goto retry;
453         }
454         free(data, M_TEMP);
455         if (bootverbose)
456                 printf("SMP: %sed TSC synchronization test%s\n",
457                     smp_tsc ? "pass" : "fail", 
458                     adj > 0 ? " after adjustment" : "");
459         if (smp_tsc && tsc_is_invariant) {
460                 switch (cpu_vendor_id) {
461                 case CPU_VENDOR_AMD:
462                         /*
463                          * Starting with Family 15h processors, TSC clock
464                          * source is in the north bridge.  Check whether
465                          * we have a single-socket/multi-core platform.
466                          * XXX Need more work for complex cases.
467                          */
468                         if (CPUID_TO_FAMILY(cpu_id) < 0x15 ||
469                             (amd_feature2 & AMDID2_CMP) == 0 ||
470                             smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1)
471                                 break;
472                         return (1000);
473                 case CPU_VENDOR_INTEL:
474                         /*
475                          * XXX Assume Intel platforms have synchronized TSCs.
476                          */
477                         return (1000);
478                 }
479                 return (800);
480         }
481         return (-100);
482 }
483
484 #undef N
485
486 #else
487
488 /*
489  * The function is not called, it is provided to avoid linking failure
490  * on uniprocessor kernel.
491  */
492 static int
493 test_tsc(void)
494 {
495
496         return (0);
497 }
498
499 #endif /* SMP */
500
501 static void
502 init_TSC_tc(void)
503 {
504         uint64_t max_freq;
505         int shift;
506
507         if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
508                 return;
509
510         /*
511          * Limit timecounter frequency to fit in an int and prevent it from
512          * overflowing too fast.
513          */
514         max_freq = UINT_MAX;
515
516         /*
517          * We can not use the TSC if we support APM.  Precise timekeeping
518          * on an APM'ed machine is at best a fools pursuit, since 
519          * any and all of the time spent in various SMM code can't 
520          * be reliably accounted for.  Reading the RTC is your only
521          * source of reliable time info.  The i8254 loses too, of course,
522          * but we need to have some kind of time...
523          * We don't know at this point whether APM is going to be used
524          * or not, nor when it might be activated.  Play it safe.
525          */
526         if (power_pm_get_type() == POWER_PM_TYPE_APM) {
527                 tsc_timecounter.tc_quality = -1000;
528                 if (bootverbose)
529                         printf("TSC timecounter disabled: APM enabled.\n");
530                 goto init;
531         }
532
533         /*
534          * Intel CPUs without a C-state invariant TSC can stop the TSC
535          * in either C2 or C3.  Disable use of C2 and C3 while using
536          * the TSC as the timecounter.  The timecounter can be changed
537          * to enable C2 and C3.
538          *
539          * Note that the TSC is used as the cputicker for computing
540          * thread runtime regardless of the timecounter setting, so
541          * using an alternate timecounter and enabling C2 or C3 can
542          * result incorrect runtimes for kernel idle threads (but not
543          * for any non-idle threads).
544          */
545         if (cpu_vendor_id == CPU_VENDOR_INTEL &&
546             (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) {
547                 tsc_timecounter.tc_flags |= TC_FLAGS_C2STOP;
548                 if (bootverbose)
549                         printf("TSC timecounter disables C2 and C3.\n");
550         }
551
552         /*
553          * We can not use the TSC in SMP mode unless the TSCs on all CPUs
554          * are synchronized.  If the user is sure that the system has
555          * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a
556          * non-zero value.  The TSC seems unreliable in virtualized SMP
557          * environments, so it is set to a negative quality in those cases.
558          */
559         if (mp_ncpus > 1)
560                 tsc_timecounter.tc_quality = test_tsc();
561         else if (tsc_is_invariant)
562                 tsc_timecounter.tc_quality = 1000;
563         max_freq >>= tsc_shift;
564
565 init:
566         for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++)
567                 ;
568         if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) {
569                 if (cpu_vendor_id == CPU_VENDOR_AMD) {
570                         tsc_timecounter.tc_get_timecount = shift > 0 ?
571                             tsc_get_timecount_low_mfence :
572                             tsc_get_timecount_mfence;
573                 } else {
574                         tsc_timecounter.tc_get_timecount = shift > 0 ?
575                             tsc_get_timecount_low_lfence :
576                             tsc_get_timecount_lfence;
577                 }
578         } else {
579                 tsc_timecounter.tc_get_timecount = shift > 0 ?
580                     tsc_get_timecount_low : tsc_get_timecount;
581         }
582         if (shift > 0) {
583                 tsc_timecounter.tc_name = "TSC-low";
584                 if (bootverbose)
585                         printf("TSC timecounter discards lower %d bit(s)\n",
586                             shift);
587         }
588         if (tsc_freq != 0) {
589                 tsc_timecounter.tc_frequency = tsc_freq >> shift;
590                 tsc_timecounter.tc_priv = (void *)(intptr_t)shift;
591                 tc_init(&tsc_timecounter);
592         }
593 }
594 SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL);
595
596 /*
597  * When cpufreq levels change, find out about the (new) max frequency.  We
598  * use this to update CPU accounting in case it got a lower estimate at boot.
599  */
600 static void
601 tsc_levels_changed(void *arg, int unit)
602 {
603         device_t cf_dev;
604         struct cf_level *levels;
605         int count, error;
606         uint64_t max_freq;
607
608         /* Only use values from the first CPU, assuming all are equal. */
609         if (unit != 0)
610                 return;
611
612         /* Find the appropriate cpufreq device instance. */
613         cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
614         if (cf_dev == NULL) {
615                 printf("tsc_levels_changed() called but no cpufreq device?\n");
616                 return;
617         }
618
619         /* Get settings from the device and find the max frequency. */
620         count = 64;
621         levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
622         if (levels == NULL)
623                 return;
624         error = CPUFREQ_LEVELS(cf_dev, levels, &count);
625         if (error == 0 && count != 0) {
626                 max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
627                 set_cputicker(rdtsc, max_freq, 1);
628         } else
629                 printf("tsc_levels_changed: no max freq found\n");
630         free(levels, M_TEMP);
631 }
632
633 /*
634  * If the TSC timecounter is in use, veto the pending change.  It may be
635  * possible in the future to handle a dynamically-changing timecounter rate.
636  */
637 static void
638 tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
639 {
640
641         if (*status != 0 || timecounter != &tsc_timecounter)
642                 return;
643
644         printf("timecounter TSC must not be in use when "
645             "changing frequencies; change denied\n");
646         *status = EBUSY;
647 }
648
649 /* Update TSC freq with the value indicated by the caller. */
650 static void
651 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
652 {
653         uint64_t freq;
654
655         /* If there was an error during the transition, don't do anything. */
656         if (tsc_disabled || status != 0)
657                 return;
658
659         /* Total setting for this level gives the new frequency in MHz. */
660         freq = (uint64_t)level->total_set.freq * 1000000;
661         atomic_store_rel_64(&tsc_freq, freq);
662         tsc_timecounter.tc_frequency =
663             freq >> (int)(intptr_t)tsc_timecounter.tc_priv;
664 }
665
666 static int
667 sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
668 {
669         int error;
670         uint64_t freq;
671
672         freq = atomic_load_acq_64(&tsc_freq);
673         if (freq == 0)
674                 return (EOPNOTSUPP);
675         error = sysctl_handle_64(oidp, &freq, 0, req);
676         if (error == 0 && req->newptr != NULL) {
677                 atomic_store_rel_64(&tsc_freq, freq);
678                 atomic_store_rel_64(&tsc_timecounter.tc_frequency,
679                     freq >> (int)(intptr_t)tsc_timecounter.tc_priv);
680         }
681         return (error);
682 }
683
684 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW,
685     0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency");
686
687 static u_int
688 tsc_get_timecount(struct timecounter *tc __unused)
689 {
690
691         return (rdtsc32());
692 }
693
694 static inline u_int
695 tsc_get_timecount_low(struct timecounter *tc)
696 {
697         uint32_t rv;
698
699         __asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
700             : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
701         return (rv);
702 }
703
704 static u_int
705 tsc_get_timecount_lfence(struct timecounter *tc __unused)
706 {
707
708         lfence();
709         return (rdtsc32());
710 }
711
712 static u_int
713 tsc_get_timecount_low_lfence(struct timecounter *tc)
714 {
715
716         lfence();
717         return (tsc_get_timecount_low(tc));
718 }
719
720 static u_int
721 tsc_get_timecount_mfence(struct timecounter *tc __unused)
722 {
723
724         mfence();
725         return (rdtsc32());
726 }
727
728 static u_int
729 tsc_get_timecount_low_mfence(struct timecounter *tc)
730 {
731
732         mfence();
733         return (tsc_get_timecount_low(tc));
734 }
735
736 static uint32_t
737 x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc)
738 {
739
740         vdso_th->th_algo = VDSO_TH_ALGO_X86_TSC;
741         vdso_th->th_x86_shift = (int)(intptr_t)tc->tc_priv;
742         vdso_th->th_x86_hpet_idx = 0xffffffff;
743         bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
744         return (1);
745 }
746
747 #ifdef COMPAT_FREEBSD32
748 static uint32_t
749 x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
750     struct timecounter *tc)
751 {
752
753         vdso_th32->th_algo = VDSO_TH_ALGO_X86_TSC;
754         vdso_th32->th_x86_shift = (int)(intptr_t)tc->tc_priv;
755         vdso_th32->th_x86_hpet_idx = 0xffffffff;
756         bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
757         return (1);
758 }
759 #endif