]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/x86/x86/tsc.c
Upgrade Unbound to 1.7.1.
[FreeBSD/FreeBSD.git] / sys / x86 / x86 / tsc.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1998-2003 Poul-Henning Kamp
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include "opt_clock.h"
33
34 #include <sys/param.h>
35 #include <sys/bus.h>
36 #include <sys/cpu.h>
37 #include <sys/limits.h>
38 #include <sys/malloc.h>
39 #include <sys/systm.h>
40 #include <sys/sysctl.h>
41 #include <sys/time.h>
42 #include <sys/timetc.h>
43 #include <sys/kernel.h>
44 #include <sys/power.h>
45 #include <sys/smp.h>
46 #include <sys/vdso.h>
47 #include <machine/clock.h>
48 #include <machine/cputypes.h>
49 #include <machine/md_var.h>
50 #include <machine/specialreg.h>
51 #include <x86/vmware.h>
52 #include <dev/acpica/acpi_hpet.h>
53
54 #include "cpufreq_if.h"
55
56 uint64_t        tsc_freq;
57 int             tsc_is_invariant;
58 int             tsc_perf_stat;
59
60 static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
61
62 SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
63     &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
64
65 #ifdef SMP
66 int     smp_tsc;
67 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
68     "Indicates whether the TSC is safe to use in SMP mode");
69
70 int     smp_tsc_adjust = 0;
71 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN,
72     &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP");
73 #endif
74
75 static int      tsc_shift = 1;
76 SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN,
77     &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency");
78
79 static int      tsc_disabled;
80 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0,
81     "Disable x86 Time Stamp Counter");
82
83 static int      tsc_skip_calibration;
84 SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN,
85     &tsc_skip_calibration, 0, "Disable TSC frequency calibration");
86
87 static void tsc_freq_changed(void *arg, const struct cf_level *level,
88     int status);
89 static void tsc_freq_changing(void *arg, const struct cf_level *level,
90     int *status);
91 static unsigned tsc_get_timecount(struct timecounter *tc);
92 static inline unsigned tsc_get_timecount_low(struct timecounter *tc);
93 static unsigned tsc_get_timecount_lfence(struct timecounter *tc);
94 static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc);
95 static unsigned tsc_get_timecount_mfence(struct timecounter *tc);
96 static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc);
97 static void tsc_levels_changed(void *arg, int unit);
98 static uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th,
99     struct timecounter *tc);
100 #ifdef COMPAT_FREEBSD32
101 static uint32_t x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
102     struct timecounter *tc);
103 #endif
104
105 static struct timecounter tsc_timecounter = {
106         .tc_get_timecount =             tsc_get_timecount,
107         .tc_counter_mask =              ~0u,
108         .tc_name =                      "TSC",
109         .tc_quality =                   800,    /* adjusted in code */
110         .tc_fill_vdso_timehands =       x86_tsc_vdso_timehands,
111 #ifdef COMPAT_FREEBSD32
112         .tc_fill_vdso_timehands32 =     x86_tsc_vdso_timehands32,
113 #endif
114 };
115
116 static void
117 tsc_freq_vmware(void)
118 {
119         u_int regs[4];
120
121         if (hv_high >= 0x40000010) {
122                 do_cpuid(0x40000010, regs);
123                 tsc_freq = regs[0] * 1000;
124         } else {
125                 vmware_hvcall(VMW_HVCMD_GETHZ, regs);
126                 if (regs[1] != UINT_MAX)
127                         tsc_freq = regs[0] | ((uint64_t)regs[1] << 32);
128         }
129         tsc_is_invariant = 1;
130 }
131
132 /*
133  * Calculate TSC frequency using information from the CPUID leaf 0x15
134  * 'Time Stamp Counter and Nominal Core Crystal Clock'.  It should be
135  * an improvement over the parsing of the CPU model name in
136  * tsc_freq_intel(), when available.
137  */
138 static bool
139 tsc_freq_cpuid(void)
140 {
141         u_int regs[4];
142
143         if (cpu_high < 0x15)
144                 return (false);
145         do_cpuid(0x15, regs);
146         if (regs[0] == 0 || regs[1] == 0 || regs[2] == 0)
147                 return (false);
148         tsc_freq = (uint64_t)regs[2] * regs[1] / regs[0];
149         return (true);
150 }
151
152 static void
153 tsc_freq_intel(void)
154 {
155         char brand[48];
156         u_int regs[4];
157         uint64_t freq;
158         char *p;
159         u_int i;
160
161         /*
162          * Intel Processor Identification and the CPUID Instruction
163          * Application Note 485.
164          * http://www.intel.com/assets/pdf/appnote/241618.pdf
165          */
166         if (cpu_exthigh >= 0x80000004) {
167                 p = brand;
168                 for (i = 0x80000002; i < 0x80000005; i++) {
169                         do_cpuid(i, regs);
170                         memcpy(p, regs, sizeof(regs));
171                         p += sizeof(regs);
172                 }
173                 p = NULL;
174                 for (i = 0; i < sizeof(brand) - 1; i++)
175                         if (brand[i] == 'H' && brand[i + 1] == 'z')
176                                 p = brand + i;
177                 if (p != NULL) {
178                         p -= 5;
179                         switch (p[4]) {
180                         case 'M':
181                                 i = 1;
182                                 break;
183                         case 'G':
184                                 i = 1000;
185                                 break;
186                         case 'T':
187                                 i = 1000000;
188                                 break;
189                         default:
190                                 return;
191                         }
192 #define C2D(c)  ((c) - '0')
193                         if (p[1] == '.') {
194                                 freq = C2D(p[0]) * 1000;
195                                 freq += C2D(p[2]) * 100;
196                                 freq += C2D(p[3]) * 10;
197                                 freq *= i * 1000;
198                         } else {
199                                 freq = C2D(p[0]) * 1000;
200                                 freq += C2D(p[1]) * 100;
201                                 freq += C2D(p[2]) * 10;
202                                 freq += C2D(p[3]);
203                                 freq *= i * 1000000;
204                         }
205 #undef C2D
206                         tsc_freq = freq;
207                 }
208         }
209 }
210
211 static void
212 probe_tsc_freq(void)
213 {
214         u_int regs[4];
215         uint64_t tsc1, tsc2;
216
217         if (cpu_high >= 6) {
218                 do_cpuid(6, regs);
219                 if ((regs[2] & CPUID_PERF_STAT) != 0) {
220                         /*
221                          * XXX Some emulators expose host CPUID without actual
222                          * support for these MSRs.  We must test whether they
223                          * really work.
224                          */
225                         wrmsr(MSR_MPERF, 0);
226                         wrmsr(MSR_APERF, 0);
227                         DELAY(10);
228                         if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0)
229                                 tsc_perf_stat = 1;
230                 }
231         }
232
233         if (vm_guest == VM_GUEST_VMWARE) {
234                 tsc_freq_vmware();
235                 return;
236         }
237
238         switch (cpu_vendor_id) {
239         case CPU_VENDOR_AMD:
240                 if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
241                     (vm_guest == VM_GUEST_NO &&
242                     CPUID_TO_FAMILY(cpu_id) >= 0x10))
243                         tsc_is_invariant = 1;
244                 if (cpu_feature & CPUID_SSE2) {
245                         tsc_timecounter.tc_get_timecount =
246                             tsc_get_timecount_mfence;
247                 }
248                 break;
249         case CPU_VENDOR_INTEL:
250                 if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
251                     (vm_guest == VM_GUEST_NO &&
252                     ((CPUID_TO_FAMILY(cpu_id) == 0x6 &&
253                     CPUID_TO_MODEL(cpu_id) >= 0xe) ||
254                     (CPUID_TO_FAMILY(cpu_id) == 0xf &&
255                     CPUID_TO_MODEL(cpu_id) >= 0x3))))
256                         tsc_is_invariant = 1;
257                 if (cpu_feature & CPUID_SSE2) {
258                         tsc_timecounter.tc_get_timecount =
259                             tsc_get_timecount_lfence;
260                 }
261                 break;
262         case CPU_VENDOR_CENTAUR:
263                 if (vm_guest == VM_GUEST_NO &&
264                     CPUID_TO_FAMILY(cpu_id) == 0x6 &&
265                     CPUID_TO_MODEL(cpu_id) >= 0xf &&
266                     (rdmsr(0x1203) & 0x100000000ULL) == 0)
267                         tsc_is_invariant = 1;
268                 if (cpu_feature & CPUID_SSE2) {
269                         tsc_timecounter.tc_get_timecount =
270                             tsc_get_timecount_lfence;
271                 }
272                 break;
273         }
274
275         if (tsc_skip_calibration) {
276                 if (tsc_freq_cpuid())
277                         ;
278                 else if (cpu_vendor_id == CPU_VENDOR_INTEL)
279                         tsc_freq_intel();
280         } else {
281                 if (bootverbose)
282                         printf("Calibrating TSC clock ... ");
283                 tsc1 = rdtsc();
284                 DELAY(1000000);
285                 tsc2 = rdtsc();
286                 tsc_freq = tsc2 - tsc1;
287         }
288         if (bootverbose)
289                 printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
290 }
291
292 void
293 init_TSC(void)
294 {
295
296         if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
297                 return;
298
299 #ifdef __i386__
300         /* The TSC is known to be broken on certain CPUs. */
301         switch (cpu_vendor_id) {
302         case CPU_VENDOR_AMD:
303                 switch (cpu_id & 0xFF0) {
304                 case 0x500:
305                         /* K5 Model 0 */
306                         return;
307                 }
308                 break;
309         case CPU_VENDOR_CENTAUR:
310                 switch (cpu_id & 0xff0) {
311                 case 0x540:
312                         /*
313                          * http://www.centtech.com/c6_data_sheet.pdf
314                          *
315                          * I-12 RDTSC may return incoherent values in EDX:EAX
316                          * I-13 RDTSC hangs when certain event counters are used
317                          */
318                         return;
319                 }
320                 break;
321         case CPU_VENDOR_NSC:
322                 switch (cpu_id & 0xff0) {
323                 case 0x540:
324                         if ((cpu_id & CPUID_STEPPING) == 0)
325                                 return;
326                         break;
327                 }
328                 break;
329         }
330 #endif
331                 
332         probe_tsc_freq();
333
334         /*
335          * Inform CPU accounting about our boot-time clock rate.  This will
336          * be updated if someone loads a cpufreq driver after boot that
337          * discovers a new max frequency.
338          */
339         if (tsc_freq != 0)
340                 set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
341
342         if (tsc_is_invariant)
343                 return;
344
345         /* Register to find out about changes in CPU frequency. */
346         tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
347             tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
348         tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
349             tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
350         tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
351             tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
352 }
353
354 #ifdef SMP
355
356 /*
357  * RDTSC is not a serializing instruction, and does not drain
358  * instruction stream, so we need to drain the stream before executing
359  * it.  It could be fixed by use of RDTSCP, except the instruction is
360  * not available everywhere.
361  *
362  * Use CPUID for draining in the boot-time SMP constistency test.  The
363  * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel
364  * and VIA) when SSE2 is present, and nothing on older machines which
365  * also do not issue RDTSC prematurely.  There, testing for SSE2 and
366  * vendor is too cumbersome, and we learn about TSC presence from CPUID.
367  *
368  * Do not use do_cpuid(), since we do not need CPUID results, which
369  * have to be written into memory with do_cpuid().
370  */
371 #define TSC_READ(x)                                                     \
372 static void                                                             \
373 tsc_read_##x(void *arg)                                                 \
374 {                                                                       \
375         uint64_t *tsc = arg;                                            \
376         u_int cpu = PCPU_GET(cpuid);                                    \
377                                                                         \
378         __asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx");     \
379         tsc[cpu * 3 + x] = rdtsc();                                     \
380 }
381 TSC_READ(0)
382 TSC_READ(1)
383 TSC_READ(2)
384 #undef TSC_READ
385
386 #define N       1000
387
388 static void
389 comp_smp_tsc(void *arg)
390 {
391         uint64_t *tsc;
392         int64_t d1, d2;
393         u_int cpu = PCPU_GET(cpuid);
394         u_int i, j, size;
395
396         size = (mp_maxid + 1) * 3;
397         for (i = 0, tsc = arg; i < N; i++, tsc += size)
398                 CPU_FOREACH(j) {
399                         if (j == cpu)
400                                 continue;
401                         d1 = tsc[cpu * 3 + 1] - tsc[j * 3];
402                         d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1];
403                         if (d1 <= 0 || d2 <= 0) {
404                                 smp_tsc = 0;
405                                 return;
406                         }
407                 }
408 }
409
410 static void
411 adj_smp_tsc(void *arg)
412 {
413         uint64_t *tsc;
414         int64_t d, min, max;
415         u_int cpu = PCPU_GET(cpuid);
416         u_int first, i, size;
417
418         first = CPU_FIRST();
419         if (cpu == first)
420                 return;
421         min = INT64_MIN;
422         max = INT64_MAX;
423         size = (mp_maxid + 1) * 3;
424         for (i = 0, tsc = arg; i < N; i++, tsc += size) {
425                 d = tsc[first * 3] - tsc[cpu * 3 + 1];
426                 if (d > min)
427                         min = d;
428                 d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2];
429                 if (d > min)
430                         min = d;
431                 d = tsc[first * 3 + 1] - tsc[cpu * 3];
432                 if (d < max)
433                         max = d;
434                 d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1];
435                 if (d < max)
436                         max = d;
437         }
438         if (min > max)
439                 return;
440         d = min / 2 + max / 2;
441         __asm __volatile (
442                 "movl $0x10, %%ecx\n\t"
443                 "rdmsr\n\t"
444                 "addl %%edi, %%eax\n\t"
445                 "adcl %%esi, %%edx\n\t"
446                 "wrmsr\n"
447                 : /* No output */
448                 : "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32))
449                 : "ax", "cx", "dx", "cc"
450         );
451 }
452
453 static int
454 test_tsc(void)
455 {
456         uint64_t *data, *tsc;
457         u_int i, size, adj;
458
459         if ((!smp_tsc && !tsc_is_invariant) || vm_guest)
460                 return (-100);
461         size = (mp_maxid + 1) * 3;
462         data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK);
463         adj = 0;
464 retry:
465         for (i = 0, tsc = data; i < N; i++, tsc += size)
466                 smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc);
467         smp_tsc = 1;    /* XXX */
468         smp_rendezvous(smp_no_rendezvous_barrier, comp_smp_tsc,
469             smp_no_rendezvous_barrier, data);
470         if (!smp_tsc && adj < smp_tsc_adjust) {
471                 adj++;
472                 smp_rendezvous(smp_no_rendezvous_barrier, adj_smp_tsc,
473                     smp_no_rendezvous_barrier, data);
474                 goto retry;
475         }
476         free(data, M_TEMP);
477         if (bootverbose)
478                 printf("SMP: %sed TSC synchronization test%s\n",
479                     smp_tsc ? "pass" : "fail", 
480                     adj > 0 ? " after adjustment" : "");
481         if (smp_tsc && tsc_is_invariant) {
482                 switch (cpu_vendor_id) {
483                 case CPU_VENDOR_AMD:
484                         /*
485                          * Starting with Family 15h processors, TSC clock
486                          * source is in the north bridge.  Check whether
487                          * we have a single-socket/multi-core platform.
488                          * XXX Need more work for complex cases.
489                          */
490                         if (CPUID_TO_FAMILY(cpu_id) < 0x15 ||
491                             (amd_feature2 & AMDID2_CMP) == 0 ||
492                             smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1)
493                                 break;
494                         return (1000);
495                 case CPU_VENDOR_INTEL:
496                         /*
497                          * XXX Assume Intel platforms have synchronized TSCs.
498                          */
499                         return (1000);
500                 }
501                 return (800);
502         }
503         return (-100);
504 }
505
506 #undef N
507
508 #else
509
510 /*
511  * The function is not called, it is provided to avoid linking failure
512  * on uniprocessor kernel.
513  */
514 static int
515 test_tsc(void)
516 {
517
518         return (0);
519 }
520
521 #endif /* SMP */
522
523 static void
524 init_TSC_tc(void)
525 {
526         uint64_t max_freq;
527         int shift;
528
529         if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
530                 return;
531
532         /*
533          * Limit timecounter frequency to fit in an int and prevent it from
534          * overflowing too fast.
535          */
536         max_freq = UINT_MAX;
537
538         /*
539          * We can not use the TSC if we support APM.  Precise timekeeping
540          * on an APM'ed machine is at best a fools pursuit, since 
541          * any and all of the time spent in various SMM code can't 
542          * be reliably accounted for.  Reading the RTC is your only
543          * source of reliable time info.  The i8254 loses too, of course,
544          * but we need to have some kind of time...
545          * We don't know at this point whether APM is going to be used
546          * or not, nor when it might be activated.  Play it safe.
547          */
548         if (power_pm_get_type() == POWER_PM_TYPE_APM) {
549                 tsc_timecounter.tc_quality = -1000;
550                 if (bootverbose)
551                         printf("TSC timecounter disabled: APM enabled.\n");
552                 goto init;
553         }
554
555         /*
556          * Intel CPUs without a C-state invariant TSC can stop the TSC
557          * in either C2 or C3.  Disable use of C2 and C3 while using
558          * the TSC as the timecounter.  The timecounter can be changed
559          * to enable C2 and C3.
560          *
561          * Note that the TSC is used as the cputicker for computing
562          * thread runtime regardless of the timecounter setting, so
563          * using an alternate timecounter and enabling C2 or C3 can
564          * result incorrect runtimes for kernel idle threads (but not
565          * for any non-idle threads).
566          */
567         if (cpu_vendor_id == CPU_VENDOR_INTEL &&
568             (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) {
569                 tsc_timecounter.tc_flags |= TC_FLAGS_C2STOP;
570                 if (bootverbose)
571                         printf("TSC timecounter disables C2 and C3.\n");
572         }
573
574         /*
575          * We can not use the TSC in SMP mode unless the TSCs on all CPUs
576          * are synchronized.  If the user is sure that the system has
577          * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a
578          * non-zero value.  The TSC seems unreliable in virtualized SMP
579          * environments, so it is set to a negative quality in those cases.
580          */
581         if (mp_ncpus > 1)
582                 tsc_timecounter.tc_quality = test_tsc();
583         else if (tsc_is_invariant)
584                 tsc_timecounter.tc_quality = 1000;
585         max_freq >>= tsc_shift;
586
587 init:
588         for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++)
589                 ;
590         if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) {
591                 if (cpu_vendor_id == CPU_VENDOR_AMD) {
592                         tsc_timecounter.tc_get_timecount = shift > 0 ?
593                             tsc_get_timecount_low_mfence :
594                             tsc_get_timecount_mfence;
595                 } else {
596                         tsc_timecounter.tc_get_timecount = shift > 0 ?
597                             tsc_get_timecount_low_lfence :
598                             tsc_get_timecount_lfence;
599                 }
600         } else {
601                 tsc_timecounter.tc_get_timecount = shift > 0 ?
602                     tsc_get_timecount_low : tsc_get_timecount;
603         }
604         if (shift > 0) {
605                 tsc_timecounter.tc_name = "TSC-low";
606                 if (bootverbose)
607                         printf("TSC timecounter discards lower %d bit(s)\n",
608                             shift);
609         }
610         if (tsc_freq != 0) {
611                 tsc_timecounter.tc_frequency = tsc_freq >> shift;
612                 tsc_timecounter.tc_priv = (void *)(intptr_t)shift;
613                 tc_init(&tsc_timecounter);
614         }
615 }
616 SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL);
617
618 /*
619  * When cpufreq levels change, find out about the (new) max frequency.  We
620  * use this to update CPU accounting in case it got a lower estimate at boot.
621  */
622 static void
623 tsc_levels_changed(void *arg, int unit)
624 {
625         device_t cf_dev;
626         struct cf_level *levels;
627         int count, error;
628         uint64_t max_freq;
629
630         /* Only use values from the first CPU, assuming all are equal. */
631         if (unit != 0)
632                 return;
633
634         /* Find the appropriate cpufreq device instance. */
635         cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
636         if (cf_dev == NULL) {
637                 printf("tsc_levels_changed() called but no cpufreq device?\n");
638                 return;
639         }
640
641         /* Get settings from the device and find the max frequency. */
642         count = 64;
643         levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
644         if (levels == NULL)
645                 return;
646         error = CPUFREQ_LEVELS(cf_dev, levels, &count);
647         if (error == 0 && count != 0) {
648                 max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
649                 set_cputicker(rdtsc, max_freq, 1);
650         } else
651                 printf("tsc_levels_changed: no max freq found\n");
652         free(levels, M_TEMP);
653 }
654
655 /*
656  * If the TSC timecounter is in use, veto the pending change.  It may be
657  * possible in the future to handle a dynamically-changing timecounter rate.
658  */
659 static void
660 tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
661 {
662
663         if (*status != 0 || timecounter != &tsc_timecounter)
664                 return;
665
666         printf("timecounter TSC must not be in use when "
667             "changing frequencies; change denied\n");
668         *status = EBUSY;
669 }
670
671 /* Update TSC freq with the value indicated by the caller. */
672 static void
673 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
674 {
675         uint64_t freq;
676
677         /* If there was an error during the transition, don't do anything. */
678         if (tsc_disabled || status != 0)
679                 return;
680
681         /* Total setting for this level gives the new frequency in MHz. */
682         freq = (uint64_t)level->total_set.freq * 1000000;
683         atomic_store_rel_64(&tsc_freq, freq);
684         tsc_timecounter.tc_frequency =
685             freq >> (int)(intptr_t)tsc_timecounter.tc_priv;
686 }
687
688 static int
689 sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
690 {
691         int error;
692         uint64_t freq;
693
694         freq = atomic_load_acq_64(&tsc_freq);
695         if (freq == 0)
696                 return (EOPNOTSUPP);
697         error = sysctl_handle_64(oidp, &freq, 0, req);
698         if (error == 0 && req->newptr != NULL) {
699                 atomic_store_rel_64(&tsc_freq, freq);
700                 atomic_store_rel_64(&tsc_timecounter.tc_frequency,
701                     freq >> (int)(intptr_t)tsc_timecounter.tc_priv);
702         }
703         return (error);
704 }
705
706 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW,
707     0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency");
708
709 static u_int
710 tsc_get_timecount(struct timecounter *tc __unused)
711 {
712
713         return (rdtsc32());
714 }
715
716 static inline u_int
717 tsc_get_timecount_low(struct timecounter *tc)
718 {
719         uint32_t rv;
720
721         __asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
722             : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
723         return (rv);
724 }
725
726 static u_int
727 tsc_get_timecount_lfence(struct timecounter *tc __unused)
728 {
729
730         lfence();
731         return (rdtsc32());
732 }
733
734 static u_int
735 tsc_get_timecount_low_lfence(struct timecounter *tc)
736 {
737
738         lfence();
739         return (tsc_get_timecount_low(tc));
740 }
741
742 static u_int
743 tsc_get_timecount_mfence(struct timecounter *tc __unused)
744 {
745
746         mfence();
747         return (rdtsc32());
748 }
749
750 static u_int
751 tsc_get_timecount_low_mfence(struct timecounter *tc)
752 {
753
754         mfence();
755         return (tsc_get_timecount_low(tc));
756 }
757
758 static uint32_t
759 x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc)
760 {
761
762         vdso_th->th_algo = VDSO_TH_ALGO_X86_TSC;
763         vdso_th->th_x86_shift = (int)(intptr_t)tc->tc_priv;
764         vdso_th->th_x86_hpet_idx = 0xffffffff;
765         bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
766         return (1);
767 }
768
769 #ifdef COMPAT_FREEBSD32
770 static uint32_t
771 x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
772     struct timecounter *tc)
773 {
774
775         vdso_th32->th_algo = VDSO_TH_ALGO_X86_TSC;
776         vdso_th32->th_x86_shift = (int)(intptr_t)tc->tc_priv;
777         vdso_th32->th_x86_hpet_idx = 0xffffffff;
778         bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
779         return (1);
780 }
781 #endif