2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2018 Intel Corporation
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted providing that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include <sys/types.h>
33 #include <sys/module.h>
34 #include <sys/systm.h>
35 #include <sys/errno.h>
36 #include <sys/param.h>
37 #include <sys/kernel.h>
42 #include <sys/sched.h>
44 #include <machine/cpu.h>
45 #include <machine/md_var.h>
46 #include <machine/cputypes.h>
47 #include <machine/specialreg.h>
49 #include <contrib/dev/acpica/include/acpi.h>
51 #include <dev/acpica/acpivar.h>
53 #include <x86/cpufreq/hwpstate_intel_internal.h>
56 #include "cpufreq_if.h"
58 extern uint64_t tsc_freq;
60 static int intel_hwpstate_probe(device_t dev);
61 static int intel_hwpstate_attach(device_t dev);
62 static int intel_hwpstate_detach(device_t dev);
63 static int intel_hwpstate_suspend(device_t dev);
64 static int intel_hwpstate_resume(device_t dev);
66 static int intel_hwpstate_get(device_t dev, struct cf_setting *cf);
67 static int intel_hwpstate_type(device_t dev, int *type);
69 static device_method_t intel_hwpstate_methods[] = {
70 /* Device interface */
71 DEVMETHOD(device_identify, intel_hwpstate_identify),
72 DEVMETHOD(device_probe, intel_hwpstate_probe),
73 DEVMETHOD(device_attach, intel_hwpstate_attach),
74 DEVMETHOD(device_detach, intel_hwpstate_detach),
75 DEVMETHOD(device_suspend, intel_hwpstate_suspend),
76 DEVMETHOD(device_resume, intel_hwpstate_resume),
78 /* cpufreq interface */
79 DEVMETHOD(cpufreq_drv_get, intel_hwpstate_get),
80 DEVMETHOD(cpufreq_drv_type, intel_hwpstate_type),
87 bool hwp_notifications;
88 bool hwp_activity_window;
93 bool hwp_perf_bias_cached;
95 uint64_t req; /* Cached copy of HWP_REQUEST */
96 uint64_t hwp_energy_perf_bias; /* Cache PERF_BIAS */
104 static devclass_t hwpstate_intel_devclass;
105 static driver_t hwpstate_intel_driver = {
107 intel_hwpstate_methods,
108 sizeof(struct hwp_softc),
111 DRIVER_MODULE(hwpstate_intel, cpu, hwpstate_intel_driver,
112 hwpstate_intel_devclass, NULL, NULL);
113 MODULE_VERSION(hwpstate_intel, 1);
115 static bool hwpstate_pkg_ctrl_enable = true;
116 SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_pkg_ctrl, CTLFLAG_RDTUN,
117 &hwpstate_pkg_ctrl_enable, 0,
118 "Set 1 (default) to enable package-level control, 0 to disable");
121 intel_hwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS)
126 struct hwp_softc *sc;
127 uint64_t data, data2;
130 sc = (struct hwp_softc *)arg1;
133 pc = cpu_get_pcpu(dev);
137 sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
139 thread_lock(curthread);
140 sched_bind(curthread, pc->pc_cpuid);
141 thread_unlock(curthread);
143 rdmsr_safe(MSR_IA32_PM_ENABLE, &data);
144 sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid,
145 ((data & 1) ? "En" : "Dis"));
152 rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &data);
153 sbuf_printf(sb, "\tHighest Performance: %03ju\n", data & 0xff);
154 sbuf_printf(sb, "\tGuaranteed Performance: %03ju\n", (data >> 8) & 0xff);
155 sbuf_printf(sb, "\tEfficient Performance: %03ju\n", (data >> 16) & 0xff);
156 sbuf_printf(sb, "\tLowest Performance: %03ju\n", (data >> 24) & 0xff);
158 rdmsr_safe(MSR_IA32_HWP_REQUEST, &data);
160 if (sc->hwp_pkg_ctrl && (data & IA32_HWP_REQUEST_PACKAGE_CONTROL))
161 rdmsr_safe(MSR_IA32_HWP_REQUEST_PKG, &data2);
165 #define pkg_print(x, name, offset) do { \
166 if (!sc->hwp_pkg_ctrl || (data & x) != 0) \
167 sbuf_printf(sb, "\t%s: %03u\n", name, \
168 (unsigned)(data >> offset) & 0xff); \
170 sbuf_printf(sb, "\t%s: %03u\n", name, \
171 (unsigned)(data2 >> offset) & 0xff); \
174 pkg_print(IA32_HWP_REQUEST_EPP_VALID,
175 "Requested Efficiency Performance Preference", 24);
176 pkg_print(IA32_HWP_REQUEST_DESIRED_VALID,
177 "Requested Desired Performance", 16);
178 pkg_print(IA32_HWP_REQUEST_MAXIMUM_VALID,
179 "Requested Maximum Performance", 8);
180 pkg_print(IA32_HWP_REQUEST_MINIMUM_VALID,
181 "Requested Minimum Performance", 0);
187 thread_lock(curthread);
188 sched_unbind(curthread);
189 thread_unlock(curthread);
191 ret = sbuf_finish(sb);
193 ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
200 percent_to_raw(int x)
203 MPASS(x <= 100 && x >= 0);
204 return (0xff * x / 100);
208 * Given x * 10 in [0, 1000], round to the integer nearest x.
210 * This allows round-tripping nice human readable numbers through this
211 * interface. Otherwise, user-provided percentages such as 25, 50, 75 get
212 * rounded down to 24, 49, and 74, which is a bit ugly.
215 round10(int xtimes10)
217 return ((xtimes10 + 5) / 10);
221 raw_to_percent(int x)
223 MPASS(x <= 0xff && x >= 0);
224 return (round10(x * 1000 / 0xff));
227 /* Range of MSR_IA32_ENERGY_PERF_BIAS is more limited: 0-0xf. */
229 percent_to_raw_perf_bias(int x)
232 * Round up so that raw values present as nice round human numbers and
233 * also round-trip to the same raw value.
235 MPASS(x <= 100 && x >= 0);
236 return (((0xf * x) + 50) / 100);
240 raw_to_percent_perf_bias(int x)
242 /* Rounding to nice human numbers despite a step interval of 6.67%. */
243 MPASS(x <= 0xf && x >= 0);
244 return (((x * 20) / 0xf) * 5);
248 sysctl_epp_select(SYSCTL_HANDLER_ARGS)
250 struct hwp_softc *sc;
257 dev = oidp->oid_arg1;
258 sc = device_get_softc(dev);
259 if (!sc->hwp_pref_ctrl && !sc->hwp_perf_bias)
262 pc = cpu_get_pcpu(dev);
266 thread_lock(curthread);
267 sched_bind(curthread, pc->pc_cpuid);
268 thread_unlock(curthread);
270 if (sc->hwp_pref_ctrl) {
271 val = (sc->req & IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) >> 24;
272 val = raw_to_percent(val);
275 * If cpuid indicates EPP is not supported, the HWP controller
276 * uses MSR_IA32_ENERGY_PERF_BIAS instead (Intel SDM ยง14.4.4).
277 * This register is per-core (but not HT).
279 if (!sc->hwp_perf_bias_cached) {
280 ret = rdmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, &epb);
283 sc->hwp_energy_perf_bias = epb;
284 sc->hwp_perf_bias_cached = true;
286 val = sc->hwp_energy_perf_bias &
287 IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK;
288 val = raw_to_percent_perf_bias(val);
291 MPASS(val >= 0 && val <= 100);
293 ret = sysctl_handle_int(oidp, &val, 0, req);
294 if (ret || req->newptr == NULL)
302 if (sc->hwp_pref_ctrl) {
303 val = percent_to_raw(val);
306 ((sc->req & ~IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE)
309 if (sc->hwp_pkg_ctrl_en)
310 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req);
312 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
314 val = percent_to_raw_perf_bias(val);
315 MPASS((val & ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) == 0);
317 sc->hwp_energy_perf_bias =
318 ((sc->hwp_energy_perf_bias &
319 ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) | val);
320 ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS,
321 sc->hwp_energy_perf_bias);
325 thread_lock(curthread);
326 sched_unbind(curthread);
327 thread_unlock(curthread);
333 intel_hwpstate_identify(driver_t *driver, device_t parent)
335 if (device_find_child(parent, "hwpstate_intel", -1) != NULL)
338 if (cpu_vendor_id != CPU_VENDOR_INTEL)
341 if (resource_disabled("hwpstate_intel", 0))
345 * Intel SDM 14.4.1 (HWP Programming Interfaces):
346 * Availability of HWP baseline resource and capability,
347 * CPUID.06H:EAX[bit 7]: If this bit is set, HWP provides several new
348 * architectural MSRs: IA32_PM_ENABLE, IA32_HWP_CAPABILITIES,
349 * IA32_HWP_REQUEST, IA32_HWP_STATUS.
351 if ((cpu_power_eax & CPUTPM1_HWP) == 0)
354 if (BUS_ADD_CHILD(parent, 10, "hwpstate_intel", -1) == NULL)
358 device_printf(parent, "hwpstate registered\n");
362 intel_hwpstate_probe(device_t dev)
365 device_set_desc(dev, "Intel Speed Shift");
366 return (BUS_PROBE_NOWILDCARD);
370 set_autonomous_hwp(struct hwp_softc *sc)
379 pc = cpu_get_pcpu(dev);
383 thread_lock(curthread);
384 sched_bind(curthread, pc->pc_cpuid);
385 thread_unlock(curthread);
387 /* XXX: Many MSRs aren't readable until feature is enabled */
388 ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1);
391 * This is actually a package-level MSR, and only the first
392 * write is not ignored. So it is harmless to enable it across
393 * all devices, and this allows us not to care especially in
394 * which order cores (and packages) are probed. This error
395 * condition should not happen given we gate on the HWP CPUID
396 * feature flag, if the Intel SDM is correct.
398 device_printf(dev, "Failed to enable HWP for cpu%d (%d)\n",
403 ret = rdmsr_safe(MSR_IA32_HWP_REQUEST, &sc->req);
406 "Failed to read HWP request MSR for cpu%d (%d)\n",
411 ret = rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &caps);
414 "Failed to read HWP capabilities MSR for cpu%d (%d)\n",
420 * High and low are static; "guaranteed" is dynamic; and efficient is
423 sc->high = IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(caps);
424 sc->guaranteed = IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(caps);
425 sc->efficient = IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(caps);
426 sc->low = IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(caps);
428 /* hardware autonomous selection determines the performance target */
429 sc->req &= ~IA32_HWP_DESIRED_PERFORMANCE;
431 /* enable HW dynamic selection of window size */
432 sc->req &= ~IA32_HWP_ACTIVITY_WINDOW;
434 /* IA32_HWP_REQUEST.Minimum_Performance = IA32_HWP_CAPABILITIES.Lowest_Performance */
435 sc->req &= ~IA32_HWP_MINIMUM_PERFORMANCE;
438 /* IA32_HWP_REQUEST.Maximum_Performance = IA32_HWP_CAPABILITIES.Highest_Performance. */
439 sc->req &= ~IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE;
440 sc->req |= sc->high << 8;
442 /* If supported, request package-level control for this CPU. */
443 if (sc->hwp_pkg_ctrl_en)
444 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req |
445 IA32_HWP_REQUEST_PACKAGE_CONTROL);
447 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
450 "Failed to setup%s autonomous HWP for cpu%d\n",
451 sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid);
455 /* If supported, write the PKG-wide control MSR. */
456 if (sc->hwp_pkg_ctrl_en) {
458 * "The structure of the IA32_HWP_REQUEST_PKG MSR
459 * (package-level) is identical to the IA32_HWP_REQUEST MSR
460 * with the exception of the Package Control field, which does
461 * not exist." (Intel SDM ยง14.4.4)
463 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req);
465 "Failed to set autonomous HWP for package\n");
469 thread_lock(curthread);
470 sched_unbind(curthread);
471 thread_unlock(curthread);
477 intel_hwpstate_attach(device_t dev)
479 struct hwp_softc *sc;
482 sc = device_get_softc(dev);
486 if (cpu_power_eax & CPUTPM1_HWP_NOTIFICATION)
487 sc->hwp_notifications = true;
488 if (cpu_power_eax & CPUTPM1_HWP_ACTIVITY_WINDOW)
489 sc->hwp_activity_window = true;
490 if (cpu_power_eax & CPUTPM1_HWP_PERF_PREF)
491 sc->hwp_pref_ctrl = true;
492 if (cpu_power_eax & CPUTPM1_HWP_PKG)
493 sc->hwp_pkg_ctrl = true;
495 /* Allow administrators to disable pkg-level control. */
496 sc->hwp_pkg_ctrl_en = (sc->hwp_pkg_ctrl && hwpstate_pkg_ctrl_enable);
499 if (cpu_power_ecx & CPUID_PERF_BIAS)
500 sc->hwp_perf_bias = true;
502 ret = set_autonomous_hwp(sc);
506 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
507 SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, device_get_nameunit(dev),
508 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_NEEDGIANT,
509 sc, 0, intel_hwp_dump_sysctl_handler, "A", "");
511 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
512 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
513 "epp", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, dev, 0,
514 sysctl_epp_select, "I",
515 "Efficiency/Performance Preference "
516 "(range from 0, most performant, through 100, most efficient)");
518 return (cpufreq_register(dev));
522 intel_hwpstate_detach(device_t dev)
525 return (cpufreq_unregister(dev));
529 intel_hwpstate_get(device_t dev, struct cf_setting *set)
538 pc = cpu_get_pcpu(dev);
542 memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set));
545 ret = cpu_est_clockrate(pc->pc_cpuid, &rate);
547 set->freq = rate / 1000000;
549 set->volts = CPUFREQ_VAL_UNKNOWN;
550 set->power = CPUFREQ_VAL_UNKNOWN;
551 set->lat = CPUFREQ_VAL_UNKNOWN;
557 intel_hwpstate_type(device_t dev, int *type)
561 *type = CPUFREQ_TYPE_ABSOLUTE | CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED;
567 intel_hwpstate_suspend(device_t dev)
573 * Redo a subset of set_autonomous_hwp on resume; untested. Without this,
574 * testers observed that on resume MSR_IA32_HWP_REQUEST was bogus.
577 intel_hwpstate_resume(device_t dev)
579 struct hwp_softc *sc;
583 sc = device_get_softc(dev);
585 pc = cpu_get_pcpu(dev);
589 thread_lock(curthread);
590 sched_bind(curthread, pc->pc_cpuid);
591 thread_unlock(curthread);
593 ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1);
596 "Failed to enable HWP for cpu%d after suspend (%d)\n",
601 if (sc->hwp_pkg_ctrl_en)
602 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req |
603 IA32_HWP_REQUEST_PACKAGE_CONTROL);
605 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
608 "Failed to set%s autonomous HWP for cpu%d after suspend\n",
609 sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid);
612 if (sc->hwp_pkg_ctrl_en) {
613 ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req);
616 "Failed to set autonomous HWP for package after "
621 if (!sc->hwp_pref_ctrl && sc->hwp_perf_bias_cached) {
622 ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS,
623 sc->hwp_energy_perf_bias);
626 "Failed to set energy perf bias for cpu%d after "
627 "suspend\n", pc->pc_cpuid);
632 thread_lock(curthread);
633 sched_unbind(curthread);
634 thread_unlock(curthread);