]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/amd64/vmm/io/vlapic.c
vlapic code restructuring to make it easy to support hardware-assist for APIC
[FreeBSD/FreeBSD.git] / sys / amd64 / vmm / io / vlapic.c
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/lock.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/mutex.h>
37 #include <sys/systm.h>
38 #include <sys/smp.h>
39
40 #include <x86/specialreg.h>
41 #include <x86/apicreg.h>
42
43 #include <machine/clock.h>
44 #include <machine/smp.h>
45
46 #include <machine/vmm.h>
47
48 #include "vmm_ipi.h"
49 #include "vmm_lapic.h"
50 #include "vmm_ktr.h"
51 #include "vmm_stat.h"
52
53 #include "vlapic.h"
54 #include "vlapic_priv.h"
55 #include "vioapic.h"
56
57 #define VLAPIC_CTR0(vlapic, format)                                     \
58         VCPU_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
59
60 #define VLAPIC_CTR1(vlapic, format, p1)                                 \
61         VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
62
63 #define VLAPIC_CTR2(vlapic, format, p1, p2)                             \
64         VCPU_CTR2((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2)
65
66 #define VLAPIC_CTR_IRR(vlapic, msg)                                     \
67 do {                                                                    \
68         uint32_t *irrptr = &(vlapic)->apic_page->irr0;                  \
69         irrptr[0] = irrptr[0];  /* silence compiler */                  \
70         VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]);      \
71         VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]);      \
72         VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]);      \
73         VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]);      \
74         VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]);      \
75         VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]);      \
76         VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]);      \
77         VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]);      \
78 } while (0)
79
80 #define VLAPIC_CTR_ISR(vlapic, msg)                                     \
81 do {                                                                    \
82         uint32_t *isrptr = &(vlapic)->apic_page->isr0;                  \
83         isrptr[0] = isrptr[0];  /* silence compiler */                  \
84         VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]);      \
85         VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]);      \
86         VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]);      \
87         VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]);      \
88         VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]);      \
89         VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]);      \
90         VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]);      \
91         VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]);      \
92 } while (0)
93
94 #define PRIO(x)                 ((x) >> 4)
95
96 #define VLAPIC_VERSION          (16)
97 #define VLAPIC_MAXLVT_ENTRIES   (APIC_LVT_CMCI)
98
99 #define x2apic(vlapic)  (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
100
101 /*
102  * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the
103  * vlapic_callout_handler() and vcpu accesses to the following registers:
104  * - initial count register aka icr_timer
105  * - current count register aka ccr_timer
106  * - divide config register aka dcr_timer
107  * - timer LVT register
108  *
109  * Note that the vlapic_callout_handler() does not write to any of these
110  * registers so they can be safely read from the vcpu context without locking.
111  */
112 #define VLAPIC_TIMER_LOCK(vlapic)       mtx_lock_spin(&((vlapic)->timer_mtx))
113 #define VLAPIC_TIMER_UNLOCK(vlapic)     mtx_unlock_spin(&((vlapic)->timer_mtx))
114 #define VLAPIC_TIMER_LOCKED(vlapic)     mtx_owned(&((vlapic)->timer_mtx))
115
116 #define VLAPIC_BUS_FREQ tsc_freq
117
118 static __inline uint32_t
119 vlapic_get_id(struct vlapic *vlapic)
120 {
121
122         if (x2apic(vlapic))
123                 return (vlapic->vcpuid);
124         else
125                 return (vlapic->vcpuid << 24);
126 }
127
128 static __inline uint32_t
129 vlapic_get_ldr(struct vlapic *vlapic)
130 {
131         struct LAPIC *lapic;
132         int apicid;
133         uint32_t ldr;
134
135         lapic = vlapic->apic_page;
136         if (x2apic(vlapic)) {
137                 apicid = vlapic_get_id(vlapic);
138                 ldr = 1 << (apicid & 0xf);
139                 ldr |= (apicid & 0xffff0) << 12;
140                 return (ldr);
141         } else
142                 return (lapic->ldr);
143 }
144
145 static __inline uint32_t
146 vlapic_get_dfr(struct vlapic *vlapic)
147 {
148         struct LAPIC *lapic;
149
150         lapic = vlapic->apic_page;
151         if (x2apic(vlapic))
152                 return (0);
153         else
154                 return (lapic->dfr);
155 }
156
157 static void
158 vlapic_set_dfr(struct vlapic *vlapic, uint32_t data)
159 {
160         uint32_t dfr;
161         struct LAPIC *lapic;
162         
163         if (x2apic(vlapic)) {
164                 VM_CTR1(vlapic->vm, "write to DFR in x2apic mode: %#x", data);
165                 return;
166         }
167
168         lapic = vlapic->apic_page;
169         dfr = (lapic->dfr & APIC_DFR_RESERVED) | (data & APIC_DFR_MODEL_MASK);
170         if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
171                 VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
172         else if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
173                 VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model");
174         else
175                 VLAPIC_CTR1(vlapic, "vlapic DFR in Unknown Model %#x", dfr);
176
177         lapic->dfr = dfr;
178 }
179
180 static void
181 vlapic_set_ldr(struct vlapic *vlapic, uint32_t data)
182 {
183         struct LAPIC *lapic;
184
185         /* LDR is read-only in x2apic mode */
186         if (x2apic(vlapic)) {
187                 VLAPIC_CTR1(vlapic, "write to LDR in x2apic mode: %#x", data);
188                 return;
189         }
190
191         lapic = vlapic->apic_page;
192         lapic->ldr = data & ~APIC_LDR_RESERVED;
193         VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
194 }
195
196 static int
197 vlapic_timer_divisor(uint32_t dcr)
198 {
199         switch (dcr & 0xB) {
200         case APIC_TDCR_1:
201                 return (1);
202         case APIC_TDCR_2:
203                 return (2);
204         case APIC_TDCR_4:
205                 return (4);
206         case APIC_TDCR_8:
207                 return (8);
208         case APIC_TDCR_16:
209                 return (16);
210         case APIC_TDCR_32:
211                 return (32);
212         case APIC_TDCR_64:
213                 return (64);
214         case APIC_TDCR_128:
215                 return (128);
216         default:
217                 panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
218         }
219 }
220
221 static void
222 vlapic_mask_lvts(uint32_t *lvts, int num_lvt)
223 {
224         int i;
225         for (i = 0; i < num_lvt; i++) {
226                 *lvts |= APIC_LVT_M;
227                 lvts += 4;
228         }
229 }
230
231 #if 0
232 static inline void
233 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
234 {
235         printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
236             *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
237             *lvt & APIC_LVTT_M);
238 }
239 #endif
240
241 static uint32_t
242 vlapic_get_ccr(struct vlapic *vlapic)
243 {
244         struct bintime bt_now, bt_rem;
245         struct LAPIC *lapic;
246         uint32_t ccr;
247         
248         ccr = 0;
249         lapic = vlapic->apic_page;
250
251         VLAPIC_TIMER_LOCK(vlapic);
252         if (callout_active(&vlapic->callout)) {
253                 /*
254                  * If the timer is scheduled to expire in the future then
255                  * compute the value of 'ccr' based on the remaining time.
256                  */
257                 binuptime(&bt_now);
258                 if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) {
259                         bt_rem = vlapic->timer_fire_bt;
260                         bintime_sub(&bt_rem, &bt_now);
261                         ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt);
262                         ccr += bt_rem.frac / vlapic->timer_freq_bt.frac;
263                 }
264         }
265         KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, "
266             "icr_timer is %#x", ccr, lapic->icr_timer));
267         VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x",
268             ccr, lapic->icr_timer);
269         VLAPIC_TIMER_UNLOCK(vlapic);
270         return (ccr);
271 }
272
273 static void
274 vlapic_set_dcr(struct vlapic *vlapic, uint32_t dcr)
275 {
276         struct LAPIC *lapic;
277         int divisor;
278         
279         lapic = vlapic->apic_page;
280         VLAPIC_TIMER_LOCK(vlapic);
281
282         lapic->dcr_timer = dcr;
283         divisor = vlapic_timer_divisor(dcr);
284         VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", dcr, divisor);
285
286         /*
287          * Update the timer frequency and the timer period.
288          *
289          * XXX changes to the frequency divider will not take effect until
290          * the timer is reloaded.
291          */
292         FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt);
293         vlapic->timer_period_bt = vlapic->timer_freq_bt;
294         bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
295
296         VLAPIC_TIMER_UNLOCK(vlapic);
297 }
298
299 static void
300 vlapic_update_errors(struct vlapic *vlapic)
301 {
302         struct LAPIC *lapic;
303         
304         lapic = vlapic->apic_page;
305         lapic->esr = vlapic->esr_pending;
306         vlapic->esr_pending = 0;
307 }
308
309 static void
310 vlapic_reset(struct vlapic *vlapic)
311 {
312         struct LAPIC *lapic;
313         
314         lapic = vlapic->apic_page;
315         bzero(lapic, sizeof(struct LAPIC));
316
317         lapic->version = VLAPIC_VERSION;
318         lapic->version |= (VLAPIC_MAXLVT_ENTRIES << MAXLVTSHIFT);
319         lapic->dfr = 0xffffffff;
320         lapic->svr = APIC_SVR_VECTOR;
321         vlapic_mask_lvts(&lapic->lvt_timer, 6);
322         vlapic_mask_lvts(&lapic->lvt_cmci, 1);
323         vlapic_set_dcr(vlapic, 0);
324
325         if (vlapic->vcpuid == 0)
326                 vlapic->boot_state = BS_RUNNING;        /* BSP */
327         else
328                 vlapic->boot_state = BS_INIT;           /* AP */
329 }
330
331 void
332 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
333 {
334         struct LAPIC    *lapic = vlapic->apic_page;
335         uint32_t        *irrptr, *tmrptr, mask;
336         int             idx;
337
338         if (vector < 0 || vector >= 256)
339                 panic("vlapic_set_intr_ready: invalid vector %d\n", vector);
340
341         if (!(lapic->svr & APIC_SVR_ENABLE)) {
342                 VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring "
343                     "interrupt %d", vector);
344                 return;
345         }
346
347         if (vector < 16) {
348                 vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR);
349                 return;
350         }
351                 
352         idx = (vector / 32) * 4;
353         mask = 1 << (vector % 32);
354
355         irrptr = &lapic->irr0;
356         atomic_set_int(&irrptr[idx], mask);
357
358         /*
359          * Upon acceptance of an interrupt into the IRR the corresponding
360          * TMR bit is cleared for edge-triggered interrupts and set for
361          * level-triggered interrupts.
362          */
363         tmrptr = &lapic->tmr0;
364         if (level)
365                 atomic_set_int(&tmrptr[idx], mask);
366         else
367                 atomic_clear_int(&tmrptr[idx], mask);
368
369         VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
370 }
371
372 static __inline uint32_t *
373 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
374 {
375         struct LAPIC    *lapic = vlapic->apic_page;
376         int              i;
377
378         switch (offset) {
379         case APIC_OFFSET_CMCI_LVT:
380                 return (&lapic->lvt_cmci);
381         case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
382                 i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
383                 return ((&lapic->lvt_timer) + i);;
384         default:
385                 panic("vlapic_get_lvt: invalid LVT\n");
386         }
387 }
388
389 static __inline uint32_t
390 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
391 {
392
393         return (*vlapic_get_lvtptr(vlapic, offset));
394 }
395
396 static void
397 vlapic_set_lvt(struct vlapic *vlapic, uint32_t offset, uint32_t val)
398 {
399         uint32_t *lvtptr, mask;
400         struct LAPIC *lapic;
401         
402         lapic = vlapic->apic_page;
403         lvtptr = vlapic_get_lvtptr(vlapic, offset);     
404
405         if (offset == APIC_OFFSET_TIMER_LVT)
406                 VLAPIC_TIMER_LOCK(vlapic);
407
408         if (!(lapic->svr & APIC_SVR_ENABLE))
409                 val |= APIC_LVT_M;
410         mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
411         switch (offset) {
412         case APIC_OFFSET_TIMER_LVT:
413                 mask |= APIC_LVTT_TM;
414                 break;
415         case APIC_OFFSET_ERROR_LVT:
416                 break;
417         case APIC_OFFSET_LINT0_LVT:
418         case APIC_OFFSET_LINT1_LVT:
419                 mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
420                 /* FALLTHROUGH */
421         default:
422                 mask |= APIC_LVT_DM;
423                 break;
424         }
425         *lvtptr = val & mask;
426
427         if (offset == APIC_OFFSET_TIMER_LVT)
428                 VLAPIC_TIMER_UNLOCK(vlapic);
429 }
430
431 static int
432 vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt)
433 {
434         uint32_t vec, mode;
435
436         if (lvt & APIC_LVT_M)
437                 return (0);
438
439         vec = lvt & APIC_LVT_VECTOR;
440         mode = lvt & APIC_LVT_DM;
441
442         switch (mode) {
443         case APIC_LVT_DM_FIXED:
444                 if (vec < 16) {
445                         vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
446                         return (0);
447                 }
448                 vlapic_set_intr_ready(vlapic, vec, false);
449                 vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true);
450                 break;
451         case APIC_LVT_DM_NMI:
452                 vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
453                 break;
454         default:
455                 // Other modes ignored
456                 return (0);
457         }
458         return (1);
459 }
460
461 #if 1
462 static void
463 dump_isrvec_stk(struct vlapic *vlapic)
464 {
465         int i;
466         uint32_t *isrptr;
467
468         isrptr = &vlapic->apic_page->isr0;
469         for (i = 0; i < 8; i++)
470                 printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
471
472         for (i = 0; i <= vlapic->isrvec_stk_top; i++)
473                 printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
474 }
475 #endif
476
477 /*
478  * Algorithm adopted from section "Interrupt, Task and Processor Priority"
479  * in Intel Architecture Manual Vol 3a.
480  */
481 static void
482 vlapic_update_ppr(struct vlapic *vlapic)
483 {
484         int isrvec, tpr, ppr;
485
486         /*
487          * Note that the value on the stack at index 0 is always 0.
488          *
489          * This is a placeholder for the value of ISRV when none of the
490          * bits is set in the ISRx registers.
491          */
492         isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
493         tpr = vlapic->apic_page->tpr;
494
495 #if 1
496         {
497                 int i, lastprio, curprio, vector, idx;
498                 uint32_t *isrptr;
499
500                 if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
501                         panic("isrvec_stk is corrupted: %d", isrvec);
502
503                 /*
504                  * Make sure that the priority of the nested interrupts is
505                  * always increasing.
506                  */
507                 lastprio = -1;
508                 for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
509                         curprio = PRIO(vlapic->isrvec_stk[i]);
510                         if (curprio <= lastprio) {
511                                 dump_isrvec_stk(vlapic);
512                                 panic("isrvec_stk does not satisfy invariant");
513                         }
514                         lastprio = curprio;
515                 }
516
517                 /*
518                  * Make sure that each bit set in the ISRx registers has a
519                  * corresponding entry on the isrvec stack.
520                  */
521                 i = 1;
522                 isrptr = &vlapic->apic_page->isr0;
523                 for (vector = 0; vector < 256; vector++) {
524                         idx = (vector / 32) * 4;
525                         if (isrptr[idx] & (1 << (vector % 32))) {
526                                 if (i > vlapic->isrvec_stk_top ||
527                                     vlapic->isrvec_stk[i] != vector) {
528                                         dump_isrvec_stk(vlapic);
529                                         panic("ISR and isrvec_stk out of sync");
530                                 }
531                                 i++;
532                         }
533                 }
534         }
535 #endif
536
537         if (PRIO(tpr) >= PRIO(isrvec))
538                 ppr = tpr;
539         else
540                 ppr = isrvec & 0xf0;
541
542         vlapic->apic_page->ppr = ppr;
543         VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
544 }
545
546 static void
547 vlapic_process_eoi(struct vlapic *vlapic)
548 {
549         struct LAPIC    *lapic = vlapic->apic_page;
550         uint32_t        *isrptr, *tmrptr;
551         int             i, idx, bitpos, vector;
552
553         isrptr = &lapic->isr0;
554         tmrptr = &lapic->tmr0;
555
556         /*
557          * The x86 architecture reserves the the first 32 vectors for use
558          * by the processor.
559          */
560         for (i = 7; i > 0; i--) {
561                 idx = i * 4;
562                 bitpos = fls(isrptr[idx]);
563                 if (bitpos-- != 0) {
564                         if (vlapic->isrvec_stk_top <= 0) {
565                                 panic("invalid vlapic isrvec_stk_top %d",
566                                       vlapic->isrvec_stk_top);
567                         }
568                         isrptr[idx] &= ~(1 << bitpos);
569                         VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
570                         vlapic->isrvec_stk_top--;
571                         vlapic_update_ppr(vlapic);
572                         if ((tmrptr[idx] & (1 << bitpos)) != 0) {
573                                 vector = i * 32 + bitpos;
574                                 vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
575                                     vector);
576                         }
577                         return;
578                 }
579         }
580 }
581
582 static __inline int
583 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
584 {
585
586         return (lvt & mask);
587 }
588
589 static __inline int
590 vlapic_periodic_timer(struct vlapic *vlapic)
591 {
592         uint32_t lvt;
593         
594         lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
595
596         return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
597 }
598
599 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
600
601 void
602 vlapic_set_error(struct vlapic *vlapic, uint32_t mask)
603 {
604         uint32_t lvt;
605
606         vlapic->esr_pending |= mask;
607         if (vlapic->esr_firing)
608                 return;
609         vlapic->esr_firing = 1;
610
611         // The error LVT always uses the fixed delivery mode.
612         lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
613         if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
614                 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
615         }
616         vlapic->esr_firing = 0;
617 }
618
619 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
620
621 static void
622 vlapic_fire_timer(struct vlapic *vlapic)
623 {
624         uint32_t lvt;
625
626         KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked"));
627         
628         // The timer LVT always uses the fixed delivery mode.
629         lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
630         if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
631                 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
632         }
633 }
634
635 static VMM_STAT(VLAPIC_INTR_CMC,
636     "corrected machine check interrupts generated by vlapic");
637
638 void
639 vlapic_fire_cmci(struct vlapic *vlapic)
640 {
641         uint32_t lvt;
642
643         lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
644         if (vlapic_fire_lvt(vlapic, lvt)) {
645                 vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
646         }
647 }
648
649 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_ENTRIES,
650     "lvts triggered");
651
652 int
653 vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
654 {
655         uint32_t lvt;
656
657         switch (vector) {
658         case APIC_LVT_LINT0:
659                 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT);
660                 break;
661         case APIC_LVT_LINT1:
662                 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT);
663                 break;
664         case APIC_LVT_TIMER:
665                 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
666                 lvt |= APIC_LVT_DM_FIXED;
667                 break;
668         case APIC_LVT_ERROR:
669                 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
670                 lvt |= APIC_LVT_DM_FIXED;
671                 break;
672         case APIC_LVT_PMC:
673                 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT);
674                 break;
675         case APIC_LVT_THERMAL:
676                 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT);
677                 break;
678         case APIC_LVT_CMCI:
679                 lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
680                 break;
681         default:
682                 return (EINVAL);
683         }
684         if (vlapic_fire_lvt(vlapic, lvt)) {
685                 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
686                     LVTS_TRIGGERRED, vector, 1);
687         }
688         return (0);
689 }
690
691 static void
692 vlapic_callout_handler(void *arg)
693 {
694         struct vlapic *vlapic;
695         struct bintime bt, btnow;
696         sbintime_t rem_sbt;
697
698         vlapic = arg;
699
700         VLAPIC_TIMER_LOCK(vlapic);
701         if (callout_pending(&vlapic->callout))  /* callout was reset */
702                 goto done;
703
704         if (!callout_active(&vlapic->callout))  /* callout was stopped */
705                 goto done;
706
707         callout_deactivate(&vlapic->callout);
708
709         KASSERT(vlapic->apic_page->icr_timer != 0, ("timer is disabled"));
710
711         vlapic_fire_timer(vlapic);
712
713         if (vlapic_periodic_timer(vlapic)) {
714                 binuptime(&btnow);
715                 KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=),
716                     ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx",
717                     btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec,
718                     vlapic->timer_fire_bt.frac));
719
720                 /*
721                  * Compute the delta between when the timer was supposed to
722                  * fire and the present time.
723                  */
724                 bt = btnow;
725                 bintime_sub(&bt, &vlapic->timer_fire_bt);
726
727                 rem_sbt = bttosbt(vlapic->timer_period_bt);
728                 if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) {
729                         /*
730                          * Adjust the time until the next countdown downward
731                          * to account for the lost time.
732                          */
733                         rem_sbt -= bttosbt(bt);
734                 } else {
735                         /*
736                          * If the delta is greater than the timer period then
737                          * just reset our time base instead of trying to catch
738                          * up.
739                          */
740                         vlapic->timer_fire_bt = btnow;
741                         VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu "
742                             "usecs, period is %lu usecs - resetting time base",
743                             bttosbt(bt) / SBT_1US,
744                             bttosbt(vlapic->timer_period_bt) / SBT_1US);
745                 }
746
747                 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
748                 callout_reset_sbt(&vlapic->callout, rem_sbt, 0,
749                     vlapic_callout_handler, vlapic, 0);
750         }
751 done:
752         VLAPIC_TIMER_UNLOCK(vlapic);
753 }
754
755 static void
756 vlapic_set_icr_timer(struct vlapic *vlapic, uint32_t icr_timer)
757 {
758         struct LAPIC *lapic;
759         sbintime_t sbt;
760
761         VLAPIC_TIMER_LOCK(vlapic);
762
763         lapic = vlapic->apic_page;
764         lapic->icr_timer = icr_timer;
765
766         vlapic->timer_period_bt = vlapic->timer_freq_bt;
767         bintime_mul(&vlapic->timer_period_bt, icr_timer);
768
769         if (icr_timer != 0) {
770                 binuptime(&vlapic->timer_fire_bt);
771                 bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
772
773                 sbt = bttosbt(vlapic->timer_period_bt);
774                 callout_reset_sbt(&vlapic->callout, sbt, 0,
775                     vlapic_callout_handler, vlapic, 0);
776         } else
777                 callout_stop(&vlapic->callout);
778
779         VLAPIC_TIMER_UNLOCK(vlapic);
780 }
781
782 /*
783  * This function populates 'dmask' with the set of vcpus that match the
784  * addressing specified by the (dest, phys, lowprio) tuple.
785  * 
786  * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
787  * or xAPIC (8-bit) destination field.
788  */
789 static void
790 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
791     bool lowprio, bool x2apic_dest)
792 {
793         struct vlapic *vlapic;
794         uint32_t dfr, ldr, ldest, cluster;
795         uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
796         cpuset_t amask;
797         int vcpuid;
798
799         if ((x2apic_dest && dest == 0xffffffff) ||
800             (!x2apic_dest && dest == 0xff)) {
801                 /*
802                  * Broadcast in both logical and physical modes.
803                  */
804                 *dmask = vm_active_cpus(vm);
805                 return;
806         }
807
808         if (phys) {
809                 /*
810                  * Physical mode: destination is APIC ID.
811                  */
812                 CPU_ZERO(dmask);
813                 vcpuid = vm_apicid2vcpuid(vm, dest);
814                 if (vcpuid < VM_MAXCPU)
815                         CPU_SET(vcpuid, dmask);
816         } else {
817                 /*
818                  * In the "Flat Model" the MDA is interpreted as an 8-bit wide
819                  * bitmask. This model is only avilable in the xAPIC mode.
820                  */
821                 mda_flat_ldest = dest & 0xff;
822
823                 /*
824                  * In the "Cluster Model" the MDA is used to identify a
825                  * specific cluster and a set of APICs in that cluster.
826                  */
827                 if (x2apic_dest) {
828                         mda_cluster_id = dest >> 16;
829                         mda_cluster_ldest = dest & 0xffff;
830                 } else {
831                         mda_cluster_id = (dest >> 4) & 0xf;
832                         mda_cluster_ldest = dest & 0xf;
833                 }
834
835                 /*
836                  * Logical mode: match each APIC that has a bit set
837                  * in it's LDR that matches a bit in the ldest.
838                  */
839                 CPU_ZERO(dmask);
840                 amask = vm_active_cpus(vm);
841                 while ((vcpuid = CPU_FFS(&amask)) != 0) {
842                         vcpuid--;
843                         CPU_CLR(vcpuid, &amask);
844
845                         vlapic = vm_lapic(vm, vcpuid);
846                         dfr = vlapic_get_dfr(vlapic);
847                         ldr = vlapic_get_ldr(vlapic);
848
849                         if ((dfr & APIC_DFR_MODEL_MASK) ==
850                             APIC_DFR_MODEL_FLAT) {
851                                 ldest = ldr >> 24;
852                                 mda_ldest = mda_flat_ldest;
853                         } else if ((dfr & APIC_DFR_MODEL_MASK) ==
854                             APIC_DFR_MODEL_CLUSTER) {
855                                 if (x2apic(vlapic)) {
856                                         cluster = ldr >> 16;
857                                         ldest = ldr & 0xffff;
858                                 } else {
859                                         cluster = ldr >> 28;
860                                         ldest = (ldr >> 24) & 0xf;
861                                 }
862                                 if (cluster != mda_cluster_id)
863                                         continue;
864                                 mda_ldest = mda_cluster_ldest;
865                         } else {
866                                 /*
867                                  * Guest has configured a bad logical
868                                  * model for this vcpu - skip it.
869                                  */
870                                 VLAPIC_CTR1(vlapic, "vlapic has bad logical "
871                                     "model %x - cannot deliver interrupt", dfr);
872                                 continue;
873                         }
874
875                         if ((mda_ldest & ldest) != 0) {
876                                 CPU_SET(vcpuid, dmask);
877                                 if (lowprio)
878                                         break;
879                         }
880                 }
881         }
882 }
883
884 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu");
885
886 static int
887 lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu)
888 {
889         int i;
890         bool phys;
891         cpuset_t dmask;
892         uint32_t dest, vec, mode;
893         struct vlapic *vlapic2;
894         struct vm_exit *vmexit;
895         
896         if (x2apic(vlapic))
897                 dest = icrval >> 32;
898         else
899                 dest = icrval >> (32 + 24);
900         vec = icrval & APIC_VECTOR_MASK;
901         mode = icrval & APIC_DELMODE_MASK;
902
903         if (mode == APIC_DELMODE_FIXED && vec < 16) {
904                 vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
905                 return (0);
906         }
907         
908         if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
909                 switch (icrval & APIC_DEST_MASK) {
910                 case APIC_DEST_DESTFLD:
911                         phys = ((icrval & APIC_DESTMODE_LOG) == 0);
912                         vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
913                             x2apic(vlapic));
914                         break;
915                 case APIC_DEST_SELF:
916                         CPU_SETOF(vlapic->vcpuid, &dmask);
917                         break;
918                 case APIC_DEST_ALLISELF:
919                         dmask = vm_active_cpus(vlapic->vm);
920                         break;
921                 case APIC_DEST_ALLESELF:
922                         dmask = vm_active_cpus(vlapic->vm);
923                         CPU_CLR(vlapic->vcpuid, &dmask);
924                         break;
925                 default:
926                         CPU_ZERO(&dmask);       /* satisfy gcc */
927                         break;
928                 }
929
930                 while ((i = CPU_FFS(&dmask)) != 0) {
931                         i--;
932                         CPU_CLR(i, &dmask);
933                         if (mode == APIC_DELMODE_FIXED) {
934                                 lapic_intr_edge(vlapic->vm, i, vec);
935                                 vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
936                                                     IPIS_SENT, i, 1);
937                         } else
938                                 vm_inject_nmi(vlapic->vm, i);
939                 }
940
941                 return (0);     /* handled completely in the kernel */
942         }
943
944         if (mode == APIC_DELMODE_INIT) {
945                 if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
946                         return (0);
947
948                 if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
949                         vlapic2 = vm_lapic(vlapic->vm, dest);
950
951                         /* move from INIT to waiting-for-SIPI state */
952                         if (vlapic2->boot_state == BS_INIT) {
953                                 vlapic2->boot_state = BS_SIPI;
954                         }
955
956                         return (0);
957                 }
958         }
959
960         if (mode == APIC_DELMODE_STARTUP) {
961                 if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
962                         vlapic2 = vm_lapic(vlapic->vm, dest);
963
964                         /*
965                          * Ignore SIPIs in any state other than wait-for-SIPI
966                          */
967                         if (vlapic2->boot_state != BS_SIPI)
968                                 return (0);
969
970                         /*
971                          * XXX this assumes that the startup IPI always succeeds
972                          */
973                         vlapic2->boot_state = BS_RUNNING;
974                         vm_activate_cpu(vlapic2->vm, dest);
975
976                         *retu = true;
977                         vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
978                         vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
979                         vmexit->u.spinup_ap.vcpu = dest;
980                         vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
981
982                         return (0);
983                 }
984         }
985
986         /*
987          * This will cause a return to userland.
988          */
989         return (1);
990 }
991
992 int
993 vlapic_pending_intr(struct vlapic *vlapic)
994 {
995         struct LAPIC    *lapic = vlapic->apic_page;
996         int              idx, i, bitpos, vector;
997         uint32_t        *irrptr, val;
998
999         irrptr = &lapic->irr0;
1000
1001         /*
1002          * The x86 architecture reserves the the first 32 vectors for use
1003          * by the processor.
1004          */
1005         for (i = 7; i > 0; i--) {
1006                 idx = i * 4;
1007                 val = atomic_load_acq_int(&irrptr[idx]);
1008                 bitpos = fls(val);
1009                 if (bitpos != 0) {
1010                         vector = i * 32 + (bitpos - 1);
1011                         if (PRIO(vector) > PRIO(lapic->ppr)) {
1012                                 VLAPIC_CTR1(vlapic, "pending intr %d", vector);
1013                                 return (vector);
1014                         } else 
1015                                 break;
1016                 }
1017         }
1018         return (-1);
1019 }
1020
1021 void
1022 vlapic_intr_accepted(struct vlapic *vlapic, int vector)
1023 {
1024         struct LAPIC    *lapic = vlapic->apic_page;
1025         uint32_t        *irrptr, *isrptr;
1026         int             idx, stk_top;
1027
1028         /*
1029          * clear the ready bit for vector being accepted in irr 
1030          * and set the vector as in service in isr.
1031          */
1032         idx = (vector / 32) * 4;
1033
1034         irrptr = &lapic->irr0;
1035         atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
1036         VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted");
1037
1038         isrptr = &lapic->isr0;
1039         isrptr[idx] |= 1 << (vector % 32);
1040         VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
1041
1042         /*
1043          * Update the PPR
1044          */
1045         vlapic->isrvec_stk_top++;
1046
1047         stk_top = vlapic->isrvec_stk_top;
1048         if (stk_top >= ISRVEC_STK_SIZE)
1049                 panic("isrvec_stk_top overflow %d", stk_top);
1050
1051         vlapic->isrvec_stk[stk_top] = vector;
1052         vlapic_update_ppr(vlapic);
1053 }
1054
1055 static void
1056 lapic_set_svr(struct vlapic *vlapic, uint32_t new)
1057 {
1058         struct LAPIC *lapic;
1059         uint32_t old, changed;
1060
1061         lapic = vlapic->apic_page;
1062         old = lapic->svr;
1063         changed = old ^ new;
1064         if ((changed & APIC_SVR_ENABLE) != 0) {
1065                 if ((new & APIC_SVR_ENABLE) == 0) {
1066                         /*
1067                          * The apic is now disabled so stop the apic timer.
1068                          */
1069                         VLAPIC_CTR0(vlapic, "vlapic is software-disabled");
1070                         VLAPIC_TIMER_LOCK(vlapic);
1071                         callout_stop(&vlapic->callout);
1072                         VLAPIC_TIMER_UNLOCK(vlapic);
1073                 } else {
1074                         /*
1075                          * The apic is now enabled so restart the apic timer
1076                          * if it is configured in periodic mode.
1077                          */
1078                         VLAPIC_CTR0(vlapic, "vlapic is software-enabled");
1079                         if (vlapic_periodic_timer(vlapic))
1080                                 vlapic_set_icr_timer(vlapic, lapic->icr_timer);
1081                 }
1082         }
1083         lapic->svr = new;
1084 }
1085
1086 int
1087 vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu)
1088 {
1089         struct LAPIC    *lapic = vlapic->apic_page;
1090         uint32_t        *reg;
1091         int              i;
1092
1093         if (offset > sizeof(*lapic)) {
1094                 *data = 0;
1095                 goto done;
1096         }
1097         
1098         offset &= ~3;
1099         switch(offset)
1100         {
1101                 case APIC_OFFSET_ID:
1102                         *data = vlapic_get_id(vlapic);
1103                         break;
1104                 case APIC_OFFSET_VER:
1105                         *data = lapic->version;
1106                         break;
1107                 case APIC_OFFSET_TPR:
1108                         *data = lapic->tpr;
1109                         break;
1110                 case APIC_OFFSET_APR:
1111                         *data = lapic->apr;
1112                         break;
1113                 case APIC_OFFSET_PPR:
1114                         *data = lapic->ppr;
1115                         break;
1116                 case APIC_OFFSET_EOI:
1117                         *data = lapic->eoi;
1118                         break;
1119                 case APIC_OFFSET_LDR:
1120                         *data = vlapic_get_ldr(vlapic);
1121                         break;
1122                 case APIC_OFFSET_DFR:
1123                         *data = vlapic_get_dfr(vlapic);
1124                         break;
1125                 case APIC_OFFSET_SVR:
1126                         *data = lapic->svr;
1127                         break;
1128                 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1129                         i = (offset - APIC_OFFSET_ISR0) >> 2;
1130                         reg = &lapic->isr0;
1131                         *data = *(reg + i);
1132                         break;
1133                 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1134                         i = (offset - APIC_OFFSET_TMR0) >> 2;
1135                         reg = &lapic->tmr0;
1136                         *data = *(reg + i);
1137                         break;
1138                 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1139                         i = (offset - APIC_OFFSET_IRR0) >> 2;
1140                         reg = &lapic->irr0;
1141                         *data = atomic_load_acq_int(reg + i);
1142                         break;
1143                 case APIC_OFFSET_ESR:
1144                         *data = lapic->esr;
1145                         break;
1146                 case APIC_OFFSET_ICR_LOW: 
1147                         *data = lapic->icr_lo;
1148                         break;
1149                 case APIC_OFFSET_ICR_HI: 
1150                         *data = lapic->icr_hi;
1151                         break;
1152                 case APIC_OFFSET_CMCI_LVT:
1153                 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1154                         *data = vlapic_get_lvt(vlapic, offset); 
1155                         break;
1156                 case APIC_OFFSET_TIMER_ICR:
1157                         *data = lapic->icr_timer;
1158                         break;
1159                 case APIC_OFFSET_TIMER_CCR:
1160                         *data = vlapic_get_ccr(vlapic);
1161                         break;
1162                 case APIC_OFFSET_TIMER_DCR:
1163                         *data = lapic->dcr_timer;
1164                         break;
1165                 case APIC_OFFSET_RRR:
1166                 default:
1167                         *data = 0;
1168                         break;
1169         }
1170 done:
1171         VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data);
1172         return 0;
1173 }
1174
1175 int
1176 vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu)
1177 {
1178         struct LAPIC    *lapic = vlapic->apic_page;
1179         int             retval;
1180
1181         VLAPIC_CTR2(vlapic, "vlapic write offset %#x, data %#lx", offset, data);
1182
1183         if (offset > sizeof(*lapic)) {
1184                 return 0;
1185         }
1186
1187         retval = 0;
1188         offset &= ~3;
1189         switch(offset)
1190         {
1191                 case APIC_OFFSET_ID:
1192                         break;
1193                 case APIC_OFFSET_TPR:
1194                         lapic->tpr = data & 0xff;
1195                         vlapic_update_ppr(vlapic);
1196                         break;
1197                 case APIC_OFFSET_EOI:
1198                         vlapic_process_eoi(vlapic);
1199                         break;
1200                 case APIC_OFFSET_LDR:
1201                         vlapic_set_ldr(vlapic, data);
1202                         break;
1203                 case APIC_OFFSET_DFR:
1204                         vlapic_set_dfr(vlapic, data);
1205                         break;
1206                 case APIC_OFFSET_SVR:
1207                         lapic_set_svr(vlapic, data);
1208                         break;
1209                 case APIC_OFFSET_ICR_LOW: 
1210                         if (!x2apic(vlapic)) {
1211                                 data &= 0xffffffff;
1212                                 data |= (uint64_t)lapic->icr_hi << 32;
1213                         }
1214                         retval = lapic_process_icr(vlapic, data, retu);
1215                         break;
1216                 case APIC_OFFSET_ICR_HI:
1217                         if (!x2apic(vlapic)) {
1218                                 retval = 0;
1219                                 lapic->icr_hi = data;
1220                         }
1221                         break;
1222                 case APIC_OFFSET_CMCI_LVT:
1223                 case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1224                         vlapic_set_lvt(vlapic, offset, data);
1225                         break;
1226                 case APIC_OFFSET_TIMER_ICR:
1227                         vlapic_set_icr_timer(vlapic, data);
1228                         break;
1229
1230                 case APIC_OFFSET_TIMER_DCR:
1231                         vlapic_set_dcr(vlapic, data);
1232                         break;
1233
1234                 case APIC_OFFSET_ESR:
1235                         vlapic_update_errors(vlapic);
1236                         break;
1237                 case APIC_OFFSET_VER:
1238                 case APIC_OFFSET_APR:
1239                 case APIC_OFFSET_PPR:
1240                 case APIC_OFFSET_RRR:
1241                 case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1242                 case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1243                 case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1244                 case APIC_OFFSET_TIMER_CCR:
1245                 default:
1246                         // Read only.
1247                         break;
1248         }
1249
1250         return (retval);
1251 }
1252
1253 void
1254 vlapic_init(struct vlapic *vlapic)
1255 {
1256         KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
1257         KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU,
1258             ("vlapic_init: vcpuid is not initialized"));
1259         KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
1260             "initialized"));
1261
1262         /*
1263          * If the vlapic is configured in x2apic mode then it will be
1264          * accessed in the critical section via the MSR emulation code.
1265          *
1266          * Therefore the timer mutex must be a spinlock because blockable
1267          * mutexes cannot be acquired in a critical section.
1268          */
1269         mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN);
1270         callout_init(&vlapic->callout, 1);
1271
1272         vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
1273
1274         if (vlapic->vcpuid == 0)
1275                 vlapic->msr_apicbase |= APICBASE_BSP;
1276
1277         vlapic_reset(vlapic);
1278 }
1279
1280 void
1281 vlapic_cleanup(struct vlapic *vlapic)
1282 {
1283
1284         callout_drain(&vlapic->callout);
1285 }
1286
1287 uint64_t
1288 vlapic_get_apicbase(struct vlapic *vlapic)
1289 {
1290
1291         return (vlapic->msr_apicbase);
1292 }
1293
1294 void
1295 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val)
1296 {
1297         int err;
1298         enum x2apic_state state;
1299
1300         err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state);
1301         if (err)
1302                 panic("vlapic_set_apicbase: err %d fetching x2apic state", err);
1303
1304         if (state == X2APIC_DISABLED)
1305                 val &= ~APICBASE_X2APIC;
1306
1307         vlapic->msr_apicbase = val;
1308 }
1309
1310 void
1311 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1312 {
1313         struct vlapic *vlapic;
1314
1315         vlapic = vm_lapic(vm, vcpuid);
1316
1317         if (state == X2APIC_DISABLED)
1318                 vlapic->msr_apicbase &= ~APICBASE_X2APIC;
1319 }
1320
1321 void
1322 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
1323     int delmode, int vec)
1324 {
1325         bool lowprio;
1326         int vcpuid;
1327         cpuset_t dmask;
1328
1329         if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
1330                 VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode);
1331                 return;
1332         }
1333         lowprio = (delmode == APIC_DELMODE_LOWPRIO);
1334
1335         /*
1336          * We don't provide any virtual interrupt redirection hardware so
1337          * all interrupts originating from the ioapic or MSI specify the
1338          * 'dest' in the legacy xAPIC format.
1339          */
1340         vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
1341
1342         while ((vcpuid = CPU_FFS(&dmask)) != 0) {
1343                 vcpuid--;
1344                 CPU_CLR(vcpuid, &dmask);
1345                 lapic_set_intr(vm, vcpuid, vec, level);
1346         }
1347 }
1348
1349 void
1350 vlapic_post_intr(struct vlapic *vlapic, int hostcpu)
1351 {
1352         /*
1353          * Post an interrupt to the vcpu currently running on 'hostcpu'.
1354          *
1355          * This is done by leveraging features like Posted Interrupts (Intel)
1356          * Doorbell MSR (AMD AVIC) that avoid a VM exit.
1357          *
1358          * If neither of these features are available then fallback to
1359          * sending an IPI to 'hostcpu'.
1360          */
1361         ipi_cpu(hostcpu, vmm_ipinum);
1362 }
1363
1364 bool
1365 vlapic_enabled(struct vlapic *vlapic)
1366 {
1367         struct LAPIC *lapic = vlapic->apic_page;
1368
1369         if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 &&
1370             (lapic->svr & APIC_SVR_ENABLE) != 0)
1371                 return (true);
1372         else
1373                 return (false);
1374 }