2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
5 * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include "opt_bhyve_snapshot.h"
37 #include <sys/param.h>
39 #include <sys/mutex.h>
40 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/systm.h>
44 #include <dev/acpica/acpi_hpet.h>
46 #include <machine/vmm.h>
47 #include <machine/vmm_dev.h>
48 #include <machine/vmm_snapshot.h>
50 #include "vmm_lapic.h"
57 static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet");
59 #define HPET_FREQ 16777216 /* 16.7 (2^24) Mhz */
60 #define FS_PER_S 1000000000000000ul
62 /* Timer N Configuration and Capabilities Register */
63 #define HPET_TCAP_RO_MASK (HPET_TCAP_INT_ROUTE | \
64 HPET_TCAP_FSB_INT_DEL | \
68 * HPET requires at least 3 timers and up to 32 timers per block.
70 #define VHPET_NUM_TIMERS 8
71 CTASSERT(VHPET_NUM_TIMERS >= 3 && VHPET_NUM_TIMERS <= 32);
73 struct vhpet_callout_arg {
83 uint64_t config; /* Configuration */
84 uint64_t isr; /* Interrupt Status */
85 uint32_t countbase; /* HPET counter base value */
86 sbintime_t countbase_sbt; /* uptime corresponding to base value */
89 uint64_t cap_config; /* Configuration */
90 uint64_t msireg; /* FSB interrupt routing */
91 uint32_t compval; /* Comparator */
93 struct callout callout;
94 sbintime_t callout_sbt; /* time when counter==compval */
95 struct vhpet_callout_arg arg;
96 } timer[VHPET_NUM_TIMERS];
99 #define VHPET_LOCK(vhp) mtx_lock(&((vhp)->mtx))
100 #define VHPET_UNLOCK(vhp) mtx_unlock(&((vhp)->mtx))
102 static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter,
106 vhpet_capabilities(void)
110 cap |= 0x8086 << 16; /* vendor id */
111 cap |= (VHPET_NUM_TIMERS - 1) << 8; /* number of timers */
112 cap |= 1; /* revision */
113 cap &= ~HPET_CAP_COUNT_SIZE; /* 32-bit timer */
116 cap |= (FS_PER_S / HPET_FREQ) << 32; /* tick period in fs */
122 vhpet_counter_enabled(struct vhpet *vhpet)
125 return ((vhpet->config & HPET_CNF_ENABLE) ? true : false);
129 vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
131 const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
133 if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
140 vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
143 * If the timer is configured to use MSI then treat it as if the
144 * timer is not connected to the ioapic.
146 if (vhpet_timer_msi_enabled(vhpet, n))
149 return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
153 vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
156 sbintime_t now, delta;
158 val = vhpet->countbase;
159 if (vhpet_counter_enabled(vhpet)) {
161 delta = now - vhpet->countbase_sbt;
162 KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: "
163 "%#lx to %#lx", vhpet->countbase_sbt, now));
164 val += delta / vhpet->freq_sbt;
169 * The sbinuptime corresponding to the 'countbase' is
170 * meaningless when the counter is disabled. Make sure
171 * that the caller doesn't want to use it.
173 KASSERT(nowptr == NULL, ("vhpet_counter: nowptr must be NULL"));
179 vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
183 if (vhpet->isr & (1 << n)) {
184 pin = vhpet_timer_ioapic_pin(vhpet, n);
185 KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
186 vioapic_deassert_irq(vhpet->vm, pin);
187 vhpet->isr &= ~(1 << n);
192 vhpet_periodic_timer(struct vhpet *vhpet, int n)
195 return ((vhpet->timer[n].cap_config & HPET_TCNF_TYPE) != 0);
199 vhpet_timer_interrupt_enabled(struct vhpet *vhpet, int n)
202 return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ENB) != 0);
206 vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
209 KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
210 "timer %d is using MSI", n));
212 if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
219 vhpet_timer_interrupt(struct vhpet *vhpet, int n)
223 /* If interrupts are not enabled for this timer then just return. */
224 if (!vhpet_timer_interrupt_enabled(vhpet, n))
228 * If a level triggered interrupt is already asserted then just return.
230 if ((vhpet->isr & (1 << n)) != 0) {
231 VM_CTR1(vhpet->vm, "hpet t%d intr is already asserted", n);
235 if (vhpet_timer_msi_enabled(vhpet, n)) {
236 lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
237 vhpet->timer[n].msireg & 0xffffffff);
241 pin = vhpet_timer_ioapic_pin(vhpet, n);
243 VM_CTR1(vhpet->vm, "hpet t%d intr is not routed to ioapic", n);
247 if (vhpet_timer_edge_trig(vhpet, n)) {
248 vioapic_pulse_irq(vhpet->vm, pin);
250 vhpet->isr |= 1 << n;
251 vioapic_assert_irq(vhpet->vm, pin);
256 vhpet_adjust_compval(struct vhpet *vhpet, int n, uint32_t counter)
258 uint32_t compval, comprate, compnext;
260 KASSERT(vhpet->timer[n].comprate != 0, ("hpet t%d is not periodic", n));
262 compval = vhpet->timer[n].compval;
263 comprate = vhpet->timer[n].comprate;
266 * Calculate the comparator value to be used for the next periodic
269 * This function is commonly called from the callout handler.
270 * In this scenario the 'counter' is ahead of 'compval'. To find
271 * the next value to program into the accumulator we divide the
272 * number space between 'compval' and 'counter' into 'comprate'
273 * sized units. The 'compval' is rounded up such that is "ahead"
276 compnext = compval + ((counter - compval) / comprate + 1) * comprate;
278 vhpet->timer[n].compval = compnext;
282 vhpet_handler(void *a)
288 struct callout *callout;
289 struct vhpet_callout_arg *arg;
294 callout = &vhpet->timer[n].callout;
296 VM_CTR1(vhpet->vm, "hpet t%d fired", n);
300 if (callout_pending(callout)) /* callout was reset */
303 if (!callout_active(callout)) /* callout was stopped */
306 callout_deactivate(callout);
308 if (!vhpet_counter_enabled(vhpet))
309 panic("vhpet(%p) callout with counter disabled", vhpet);
311 counter = vhpet_counter(vhpet, &now);
312 vhpet_start_timer(vhpet, n, counter, now);
313 vhpet_timer_interrupt(vhpet, n);
320 vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
323 VM_CTR1(vhpet->vm, "hpet t%d stopped", n);
324 callout_stop(&vhpet->timer[n].callout);
327 * If the callout was scheduled to expire in the past but hasn't
328 * had a chance to execute yet then trigger the timer interrupt
329 * here. Failing to do so will result in a missed timer interrupt
330 * in the guest. This is especially bad in one-shot mode because
331 * the next interrupt has to wait for the counter to wrap around.
333 if (vhpet->timer[n].callout_sbt < now) {
334 VM_CTR1(vhpet->vm, "hpet t%d interrupt triggered after "
335 "stopping timer", n);
336 vhpet_timer_interrupt(vhpet, n);
341 vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now)
343 sbintime_t delta, precision;
345 if (vhpet->timer[n].comprate != 0)
346 vhpet_adjust_compval(vhpet, n, counter);
349 * In one-shot mode it is the guest's responsibility to make
350 * sure that the comparator value is not in the "past". The
351 * hardware doesn't have any belt-and-suspenders to deal with
352 * this so we don't either.
356 delta = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt;
357 precision = delta >> tc_precexp;
358 vhpet->timer[n].callout_sbt = now + delta;
359 callout_reset_sbt(&vhpet->timer[n].callout, vhpet->timer[n].callout_sbt,
360 precision, vhpet_handler, &vhpet->timer[n].arg, C_ABSOLUTE);
364 vhpet_start_counting(struct vhpet *vhpet)
368 vhpet->countbase_sbt = sbinuptime();
369 for (i = 0; i < VHPET_NUM_TIMERS; i++) {
371 * Restart the timers based on the value of the main counter
372 * when it stopped counting.
374 vhpet_start_timer(vhpet, i, vhpet->countbase,
375 vhpet->countbase_sbt);
380 vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, sbintime_t now)
384 vhpet->countbase = counter;
385 for (i = 0; i < VHPET_NUM_TIMERS; i++)
386 vhpet_stop_timer(vhpet, i, now);
390 update_register(uint64_t *regptr, uint64_t data, uint64_t mask)
394 *regptr |= (data & mask);
398 vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data,
402 int old_pin, new_pin;
403 uint32_t allowed_irqs;
404 uint64_t oldval, newval;
406 if (vhpet_timer_msi_enabled(vhpet, n) ||
407 vhpet_timer_edge_trig(vhpet, n)) {
408 if (vhpet->isr & (1 << n))
409 panic("vhpet timer %d isr should not be asserted", n);
411 old_pin = vhpet_timer_ioapic_pin(vhpet, n);
412 oldval = vhpet->timer[n].cap_config;
415 update_register(&newval, data, mask);
416 newval &= ~(HPET_TCAP_RO_MASK | HPET_TCNF_32MODE);
417 newval |= oldval & HPET_TCAP_RO_MASK;
419 if (newval == oldval)
422 vhpet->timer[n].cap_config = newval;
423 VM_CTR2(vhpet->vm, "hpet t%d cap_config set to 0x%016x", n, newval);
426 * Validate the interrupt routing in the HPET_TCNF_INT_ROUTE field.
427 * If it does not match the bits set in HPET_TCAP_INT_ROUTE then set
428 * it to the default value of 0.
430 allowed_irqs = vhpet->timer[n].cap_config >> 32;
431 new_pin = vhpet_timer_ioapic_pin(vhpet, n);
432 if (new_pin != 0 && (allowed_irqs & (1 << new_pin)) == 0) {
433 VM_CTR3(vhpet->vm, "hpet t%d configured invalid irq %d, "
434 "allowed_irqs 0x%08x", n, new_pin, allowed_irqs);
436 vhpet->timer[n].cap_config &= ~HPET_TCNF_INT_ROUTE;
439 if (!vhpet_periodic_timer(vhpet, n))
440 vhpet->timer[n].comprate = 0;
443 * If the timer's ISR bit is set then clear it in the following cases:
444 * - interrupt is disabled
445 * - interrupt type is changed from level to edge or fsb.
446 * - interrupt routing is changed
448 * This is to ensure that this timer's level triggered interrupt does
449 * not remain asserted forever.
451 if (vhpet->isr & (1 << n)) {
452 KASSERT(old_pin != 0, ("timer %d isr asserted to ioapic pin %d",
454 if (!vhpet_timer_interrupt_enabled(vhpet, n))
456 else if (vhpet_timer_msi_enabled(vhpet, n))
458 else if (vhpet_timer_edge_trig(vhpet, n))
460 else if (vhpet_timer_ioapic_pin(vhpet, n) != old_pin)
466 VM_CTR1(vhpet->vm, "hpet t%d isr cleared due to "
467 "configuration change", n);
468 vioapic_deassert_irq(vhpet->vm, old_pin);
469 vhpet->isr &= ~(1 << n);
475 vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size,
479 uint64_t data, mask, oldval, val64;
480 uint32_t isr_clear_mask, old_compval, old_comprate, counter;
481 sbintime_t now, *nowptr;
485 offset = gpa - VHPET_BASE;
489 /* Accesses to the HPET should be 4 or 8 bytes wide */
492 mask = 0xffffffffffffffff;
498 if ((offset & 0x4) != 0) {
504 VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
505 "offset 0x%08x, size %d", offset, size);
509 /* Access to the HPET should be naturally aligned to its width */
510 if (offset & (size - 1)) {
511 VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
512 "offset 0x%08x, size %d", offset, size);
516 if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
518 * Get the most recent value of the counter before updating
519 * the 'config' register. If the HPET is going to be disabled
520 * then we need to update 'countbase' with the value right
521 * before it is disabled.
523 nowptr = vhpet_counter_enabled(vhpet) ? &now : NULL;
524 counter = vhpet_counter(vhpet, nowptr);
525 oldval = vhpet->config;
526 update_register(&vhpet->config, data, mask);
529 * LegacyReplacement Routing is not supported so clear the
532 vhpet->config &= ~HPET_CNF_LEG_RT;
534 if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
535 if (vhpet_counter_enabled(vhpet)) {
536 vhpet_start_counting(vhpet);
537 VM_CTR0(vhpet->vm, "hpet enabled");
539 vhpet_stop_counting(vhpet, counter, now);
540 VM_CTR0(vhpet->vm, "hpet disabled");
546 if (offset == HPET_ISR || offset == HPET_ISR + 4) {
547 isr_clear_mask = vhpet->isr & data;
548 for (i = 0; i < VHPET_NUM_TIMERS; i++) {
549 if ((isr_clear_mask & (1 << i)) != 0) {
550 VM_CTR1(vhpet->vm, "hpet t%d isr cleared", i);
551 vhpet_timer_clear_isr(vhpet, i);
557 if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
558 /* Zero-extend the counter to 64-bits before updating it */
559 val64 = vhpet_counter(vhpet, NULL);
560 update_register(&val64, data, mask);
561 vhpet->countbase = val64;
562 if (vhpet_counter_enabled(vhpet))
563 vhpet_start_counting(vhpet);
567 for (i = 0; i < VHPET_NUM_TIMERS; i++) {
568 if (offset == HPET_TIMER_CAP_CNF(i) ||
569 offset == HPET_TIMER_CAP_CNF(i) + 4) {
570 vhpet_timer_update_config(vhpet, i, data, mask);
574 if (offset == HPET_TIMER_COMPARATOR(i) ||
575 offset == HPET_TIMER_COMPARATOR(i) + 4) {
576 old_compval = vhpet->timer[i].compval;
577 old_comprate = vhpet->timer[i].comprate;
578 if (vhpet_periodic_timer(vhpet, i)) {
580 * In periodic mode writes to the comparator
581 * change the 'compval' register only if the
582 * HPET_TCNF_VAL_SET bit is set in the config
585 val64 = vhpet->timer[i].comprate;
586 update_register(&val64, data, mask);
587 vhpet->timer[i].comprate = val64;
588 if ((vhpet->timer[i].cap_config &
589 HPET_TCNF_VAL_SET) != 0) {
590 vhpet->timer[i].compval = val64;
593 KASSERT(vhpet->timer[i].comprate == 0,
594 ("vhpet one-shot timer %d has invalid "
595 "rate %u", i, vhpet->timer[i].comprate));
596 val64 = vhpet->timer[i].compval;
597 update_register(&val64, data, mask);
598 vhpet->timer[i].compval = val64;
600 vhpet->timer[i].cap_config &= ~HPET_TCNF_VAL_SET;
602 if (vhpet->timer[i].compval != old_compval ||
603 vhpet->timer[i].comprate != old_comprate) {
604 if (vhpet_counter_enabled(vhpet)) {
605 counter = vhpet_counter(vhpet, &now);
606 vhpet_start_timer(vhpet, i, counter,
613 if (offset == HPET_TIMER_FSB_VAL(i) ||
614 offset == HPET_TIMER_FSB_ADDR(i)) {
615 update_register(&vhpet->timer[i].msireg, data, mask);
625 vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, int size,
633 offset = gpa - VHPET_BASE;
637 /* Accesses to the HPET should be 4 or 8 bytes wide */
638 if (size != 4 && size != 8) {
639 VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
640 "offset 0x%08x, size %d", offset, size);
645 /* Access to the HPET should be naturally aligned to its width */
646 if (offset & (size - 1)) {
647 VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
648 "offset 0x%08x, size %d", offset, size);
653 if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) {
654 data = vhpet_capabilities();
658 if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
659 data = vhpet->config;
663 if (offset == HPET_ISR || offset == HPET_ISR + 4) {
668 if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
669 data = vhpet_counter(vhpet, NULL);
673 for (i = 0; i < VHPET_NUM_TIMERS; i++) {
674 if (offset == HPET_TIMER_CAP_CNF(i) ||
675 offset == HPET_TIMER_CAP_CNF(i) + 4) {
676 data = vhpet->timer[i].cap_config;
680 if (offset == HPET_TIMER_COMPARATOR(i) ||
681 offset == HPET_TIMER_COMPARATOR(i) + 4) {
682 data = vhpet->timer[i].compval;
686 if (offset == HPET_TIMER_FSB_VAL(i) ||
687 offset == HPET_TIMER_FSB_ADDR(i)) {
688 data = vhpet->timer[i].msireg;
693 if (i >= VHPET_NUM_TIMERS)
707 vhpet_init(struct vm *vm)
711 uint64_t allowed_irqs;
712 struct vhpet_callout_arg *arg;
715 vhpet = malloc(sizeof(struct vhpet), M_VHPET, M_WAITOK | M_ZERO);
717 mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF);
719 FREQ2BT(HPET_FREQ, &bt);
720 vhpet->freq_sbt = bttosbt(bt);
722 pincount = vioapic_pincount(vm);
724 allowed_irqs = 0xff000000; /* irqs 24-31 */
725 else if (pincount >= 20)
726 allowed_irqs = 0xf << (pincount - 4); /* 4 upper irqs */
731 * Initialize HPET timer hardware state.
733 for (i = 0; i < VHPET_NUM_TIMERS; i++) {
734 vhpet->timer[i].cap_config = allowed_irqs << 32;
735 vhpet->timer[i].cap_config |= HPET_TCAP_PER_INT;
736 vhpet->timer[i].cap_config |= HPET_TCAP_FSB_INT_DEL;
738 vhpet->timer[i].compval = 0xffffffff;
739 callout_init(&vhpet->timer[i].callout, 1);
741 arg = &vhpet->timer[i].arg;
750 vhpet_cleanup(struct vhpet *vhpet)
754 for (i = 0; i < VHPET_NUM_TIMERS; i++)
755 callout_drain(&vhpet->timer[i].callout);
757 free(vhpet, M_VHPET);
761 vhpet_getcap(struct vm_hpet_cap *cap)
764 cap->capabilities = vhpet_capabilities();
768 #ifdef BHYVE_SNAPSHOT
770 vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta)
775 SNAPSHOT_VAR_OR_LEAVE(vhpet->freq_sbt, meta, ret, done);
776 SNAPSHOT_VAR_OR_LEAVE(vhpet->config, meta, ret, done);
777 SNAPSHOT_VAR_OR_LEAVE(vhpet->isr, meta, ret, done);
779 /* at restore time the countbase should have the value it had when the
780 * snapshot was created; since the value is not directly kept in
781 * vhpet->countbase, but rather computed relative to the current system
782 * uptime using countbase_sbt, save the value retured by vhpet_counter
784 if (meta->op == VM_SNAPSHOT_SAVE)
785 countbase = vhpet_counter(vhpet, NULL);
786 SNAPSHOT_VAR_OR_LEAVE(countbase, meta, ret, done);
787 if (meta->op == VM_SNAPSHOT_RESTORE)
788 vhpet->countbase = countbase;
790 for (i = 0; i < nitems(vhpet->timer); i++) {
791 SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].cap_config,
793 SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].msireg, meta, ret, done);
794 SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].compval, meta, ret, done);
795 SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].comprate, meta, ret, done);
796 SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].callout_sbt,
805 vhpet_restore_time(struct vhpet *vhpet)
807 if (vhpet_counter_enabled(vhpet))
808 vhpet_start_counting(vhpet);