]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/x86/x86/local_apic.c
sys: Remove $FreeBSD$: one-line .c pattern
[FreeBSD/FreeBSD.git] / sys / x86 / x86 / local_apic.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1996, by Steve Passe
5  * All rights reserved.
6  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. The name of the developer may NOT be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  * 3. Neither the name of the author nor the names of any co-contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 /*
33  * Local APIC support on Pentium and later processors.
34  */
35
36 #include <sys/cdefs.h>
37 #include "opt_atpic.h"
38 #include "opt_hwpmc_hooks.h"
39
40 #include "opt_ddb.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/asan.h>
45 #include <sys/bus.h>
46 #include <sys/kernel.h>
47 #include <sys/lock.h>
48 #include <sys/malloc.h>
49 #include <sys/mutex.h>
50 #include <sys/pcpu.h>
51 #include <sys/proc.h>
52 #include <sys/sched.h>
53 #include <sys/smp.h>
54 #include <sys/sysctl.h>
55 #include <sys/timeet.h>
56 #include <sys/timetc.h>
57
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60
61 #include <x86/apicreg.h>
62 #include <machine/clock.h>
63 #include <machine/cpufunc.h>
64 #include <machine/cputypes.h>
65 #include <machine/fpu.h>
66 #include <machine/frame.h>
67 #include <machine/intr_machdep.h>
68 #include <x86/apicvar.h>
69 #include <x86/mca.h>
70 #include <machine/md_var.h>
71 #include <machine/smp.h>
72 #include <machine/specialreg.h>
73 #include <x86/init.h>
74
75 #ifdef DDB
76 #include <sys/interrupt.h>
77 #include <ddb/ddb.h>
78 #endif
79
80 #ifdef __amd64__
81 #define SDT_APIC        SDT_SYSIGT
82 #define GSEL_APIC       0
83 #else
84 #define SDT_APIC        SDT_SYS386IGT
85 #define GSEL_APIC       GSEL(GCODE_SEL, SEL_KPL)
86 #endif
87
88 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items");
89
90 /* Sanity checks on IDT vectors. */
91 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT);
92 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS);
93 CTASSERT(APIC_LOCAL_INTS == 240);
94 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
95
96 /*
97  * I/O interrupts use non-negative IRQ values.  These values are used
98  * to mark unused IDT entries or IDT entries reserved for a non-I/O
99  * interrupt.
100  */
101 #define IRQ_FREE        -1
102 #define IRQ_TIMER       -2
103 #define IRQ_SYSCALL     -3
104 #define IRQ_DTRACE_RET  -4
105 #define IRQ_EVTCHN      -5
106
107 enum lat_timer_mode {
108         LAT_MODE_UNDEF =        0,
109         LAT_MODE_PERIODIC =     1,
110         LAT_MODE_ONESHOT =      2,
111         LAT_MODE_DEADLINE =     3,
112 };
113
114 /*
115  * Support for local APICs.  Local APICs manage interrupts on each
116  * individual processor as opposed to I/O APICs which receive interrupts
117  * from I/O devices and then forward them on to the local APICs.
118  *
119  * Local APICs can also send interrupts to each other thus providing the
120  * mechanism for IPIs.
121  */
122
123 struct lvt {
124         u_int lvt_edgetrigger:1;
125         u_int lvt_activehi:1;
126         u_int lvt_masked:1;
127         u_int lvt_active:1;
128         u_int lvt_mode:16;
129         u_int lvt_vector:8;
130 };
131
132 struct lapic {
133         struct lvt la_lvts[APIC_LVT_MAX + 1];
134         struct lvt la_elvts[APIC_ELVT_MAX + 1];
135         u_int la_id:8;
136         u_int la_cluster:4;
137         u_int la_cluster_id:2;
138         u_int la_present:1;
139         u_long *la_timer_count;
140         uint64_t la_timer_period;
141         enum lat_timer_mode la_timer_mode;
142         uint32_t lvt_timer_base;
143         uint32_t lvt_timer_last;
144         /* Include IDT_SYSCALL to make indexing easier. */
145         int la_ioint_irqs[APIC_NUM_IOINTS + 1];
146 } static *lapics;
147
148 /* Global defaults for local APIC LVT entries. */
149 static struct lvt lvts[APIC_LVT_MAX + 1] = {
150         { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 },  /* LINT0: masked ExtINT */
151         { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },     /* LINT1: NMI */
152         { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT },      /* Timer */
153         { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT },      /* Error */
154         { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 },     /* PMC */
155         { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT },    /* Thermal */
156         { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT },        /* CMCI */
157 };
158
159 /* Global defaults for AMD local APIC ELVT entries. */
160 static struct lvt elvts[APIC_ELVT_MAX + 1] = {
161         { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
162         { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT },
163         { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
164         { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
165 };
166
167 static inthand_t *ioint_handlers[] = {
168         NULL,                   /* 0 - 31 */
169         IDTVEC(apic_isr1),      /* 32 - 63 */
170         IDTVEC(apic_isr2),      /* 64 - 95 */
171         IDTVEC(apic_isr3),      /* 96 - 127 */
172         IDTVEC(apic_isr4),      /* 128 - 159 */
173         IDTVEC(apic_isr5),      /* 160 - 191 */
174         IDTVEC(apic_isr6),      /* 192 - 223 */
175         IDTVEC(apic_isr7),      /* 224 - 255 */
176 };
177
178 static inthand_t *ioint_pti_handlers[] = {
179         NULL,                   /* 0 - 31 */
180         IDTVEC(apic_isr1_pti),  /* 32 - 63 */
181         IDTVEC(apic_isr2_pti),  /* 64 - 95 */
182         IDTVEC(apic_isr3_pti),  /* 96 - 127 */
183         IDTVEC(apic_isr4_pti),  /* 128 - 159 */
184         IDTVEC(apic_isr5_pti),  /* 160 - 191 */
185         IDTVEC(apic_isr6_pti),  /* 192 - 223 */
186         IDTVEC(apic_isr7_pti),  /* 224 - 255 */
187 };
188
189 static u_int32_t lapic_timer_divisors[] = {
190         APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
191         APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
192 };
193
194 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd);
195
196 volatile char *lapic_map;
197 vm_paddr_t lapic_paddr = DEFAULT_APIC_BASE;
198 int x2apic_mode;
199 int lapic_eoi_suppression;
200 static int lapic_timer_tsc_deadline;
201 static u_long lapic_timer_divisor, count_freq;
202 static struct eventtimer lapic_et;
203 #ifdef SMP
204 static uint64_t lapic_ipi_wait_mult;
205 static int __read_mostly lapic_ds_idle_timeout = 1000000;
206 #endif
207 unsigned int max_apic_id;
208
209 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
210     "APIC options");
211 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, "");
212 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD,
213     &lapic_eoi_suppression, 0, "");
214 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD,
215     &lapic_timer_tsc_deadline, 0, "");
216 #ifdef SMP
217 SYSCTL_INT(_hw_apic, OID_AUTO, ds_idle_timeout, CTLFLAG_RWTUN,
218     &lapic_ds_idle_timeout, 0,
219     "timeout (in us) for APIC Delivery Status to become Idle (xAPIC only)");
220 #endif
221
222 static void lapic_calibrate_initcount(struct lapic *la);
223
224 /*
225  * Use __nosanitizethread to exempt the LAPIC I/O accessors from KCSan
226  * instrumentation.  Otherwise, if x2APIC is not available, use of the global
227  * lapic_map will generate a KCSan false positive.  While the mapping is
228  * shared among all CPUs, the physical access will always take place on the
229  * local CPU's APIC, so there isn't in fact a race here.  Furthermore, the
230  * KCSan warning printf can cause a panic if issued during LAPIC access,
231  * due to attempted recursive use of event timer resources.
232  */
233
234 static uint32_t __nosanitizethread
235 lapic_read32(enum LAPIC_REGISTERS reg)
236 {
237         uint32_t res;
238
239         if (x2apic_mode) {
240                 res = rdmsr32(MSR_APIC_000 + reg);
241         } else {
242                 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL);
243         }
244         return (res);
245 }
246
247 static void __nosanitizethread
248 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val)
249 {
250
251         if (x2apic_mode) {
252                 mfence();
253                 lfence();
254                 wrmsr(MSR_APIC_000 + reg, val);
255         } else {
256                 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
257         }
258 }
259
260 static void __nosanitizethread
261 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val)
262 {
263
264         if (x2apic_mode) {
265                 wrmsr(MSR_APIC_000 + reg, val);
266         } else {
267                 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
268         }
269 }
270
271 #ifdef SMP
272 static uint64_t
273 lapic_read_icr_lo(void)
274 {
275
276         return (lapic_read32(LAPIC_ICR_LO));
277 }
278
279 static void
280 lapic_write_icr(uint32_t vhi, uint32_t vlo)
281 {
282         register_t saveintr;
283         uint64_t v;
284
285         if (x2apic_mode) {
286                 v = ((uint64_t)vhi << 32) | vlo;
287                 mfence();
288                 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v);
289         } else {
290                 saveintr = intr_disable();
291                 lapic_write32(LAPIC_ICR_HI, vhi);
292                 lapic_write32(LAPIC_ICR_LO, vlo);
293                 intr_restore(saveintr);
294         }
295 }
296
297 static void
298 lapic_write_icr_lo(uint32_t vlo)
299 {
300
301         if (x2apic_mode) {
302                 mfence();
303                 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, vlo);
304         } else {
305                 lapic_write32(LAPIC_ICR_LO, vlo);
306         }
307 }
308
309 static void
310 lapic_write_self_ipi(uint32_t vector)
311 {
312
313         KASSERT(x2apic_mode, ("SELF IPI write in xAPIC mode"));
314         wrmsr(MSR_APIC_000 + LAPIC_SELF_IPI, vector);
315 }
316 #endif /* SMP */
317
318 static void
319 native_lapic_enable_x2apic(void)
320 {
321         uint64_t apic_base;
322
323         apic_base = rdmsr(MSR_APICBASE);
324         apic_base |= APICBASE_X2APIC | APICBASE_ENABLED;
325         wrmsr(MSR_APICBASE, apic_base);
326 }
327
328 static bool
329 native_lapic_is_x2apic(void)
330 {
331         uint64_t apic_base;
332
333         apic_base = rdmsr(MSR_APICBASE);
334         return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) ==
335             (APICBASE_X2APIC | APICBASE_ENABLED));
336 }
337
338 static void     lapic_enable(void);
339 static void     lapic_resume(struct pic *pic, bool suspend_cancelled);
340 static void     lapic_timer_oneshot(struct lapic *);
341 static void     lapic_timer_oneshot_nointr(struct lapic *, uint32_t);
342 static void     lapic_timer_periodic(struct lapic *);
343 static void     lapic_timer_deadline(struct lapic *);
344 static void     lapic_timer_stop(struct lapic *);
345 static void     lapic_timer_set_divisor(u_int divisor);
346 static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value);
347 static int      lapic_et_start(struct eventtimer *et,
348                     sbintime_t first, sbintime_t period);
349 static int      lapic_et_stop(struct eventtimer *et);
350 static u_int    apic_idt_to_irq(u_int apic_id, u_int vector);
351 static void     lapic_set_tpr(u_int vector);
352
353 struct pic lapic_pic = { .pic_resume = lapic_resume };
354
355 /* Forward declarations for apic_ops */
356 static void     native_lapic_create(u_int apic_id, int boot_cpu);
357 static void     native_lapic_init(vm_paddr_t addr);
358 static void     native_lapic_xapic_mode(void);
359 static void     native_lapic_setup(int boot);
360 static void     native_lapic_dump(const char *str);
361 static void     native_lapic_disable(void);
362 static void     native_lapic_eoi(void);
363 static int      native_lapic_id(void);
364 static int      native_lapic_intr_pending(u_int vector);
365 static u_int    native_apic_cpuid(u_int apic_id);
366 static u_int    native_apic_alloc_vector(u_int apic_id, u_int irq);
367 static u_int    native_apic_alloc_vectors(u_int apic_id, u_int *irqs,
368                     u_int count, u_int align);
369 static void     native_apic_disable_vector(u_int apic_id, u_int vector);
370 static void     native_apic_enable_vector(u_int apic_id, u_int vector);
371 static void     native_apic_free_vector(u_int apic_id, u_int vector, u_int irq);
372 static void     native_lapic_set_logical_id(u_int apic_id, u_int cluster,
373                     u_int cluster_id);
374 static void     native_lapic_calibrate_timer(void);
375 static int      native_lapic_enable_pmc(void);
376 static void     native_lapic_disable_pmc(void);
377 static void     native_lapic_reenable_pmc(void);
378 static void     native_lapic_enable_cmc(void);
379 static int      native_lapic_enable_mca_elvt(void);
380 static int      native_lapic_set_lvt_mask(u_int apic_id, u_int lvt,
381                     u_char masked);
382 static int      native_lapic_set_lvt_mode(u_int apic_id, u_int lvt,
383                     uint32_t mode);
384 static int      native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt,
385                     enum intr_polarity pol);
386 static int      native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt,
387                     enum intr_trigger trigger);
388 #ifdef SMP
389 static void     native_lapic_ipi_raw(register_t icrlo, u_int dest);
390 static void     native_lapic_ipi_vectored(u_int vector, int dest);
391 static int      native_lapic_ipi_wait(int delay);
392 #endif /* SMP */
393 static int      native_lapic_ipi_alloc(inthand_t *ipifunc);
394 static void     native_lapic_ipi_free(int vector);
395
396 struct apic_ops apic_ops = {
397         .create                 = native_lapic_create,
398         .init                   = native_lapic_init,
399         .xapic_mode             = native_lapic_xapic_mode,
400         .is_x2apic              = native_lapic_is_x2apic,
401         .setup                  = native_lapic_setup,
402         .dump                   = native_lapic_dump,
403         .disable                = native_lapic_disable,
404         .eoi                    = native_lapic_eoi,
405         .id                     = native_lapic_id,
406         .intr_pending           = native_lapic_intr_pending,
407         .set_logical_id         = native_lapic_set_logical_id,
408         .cpuid                  = native_apic_cpuid,
409         .alloc_vector           = native_apic_alloc_vector,
410         .alloc_vectors          = native_apic_alloc_vectors,
411         .enable_vector          = native_apic_enable_vector,
412         .disable_vector         = native_apic_disable_vector,
413         .free_vector            = native_apic_free_vector,
414         .calibrate_timer        = native_lapic_calibrate_timer,
415         .enable_pmc             = native_lapic_enable_pmc,
416         .disable_pmc            = native_lapic_disable_pmc,
417         .reenable_pmc           = native_lapic_reenable_pmc,
418         .enable_cmc             = native_lapic_enable_cmc,
419         .enable_mca_elvt        = native_lapic_enable_mca_elvt,
420 #ifdef SMP
421         .ipi_raw                = native_lapic_ipi_raw,
422         .ipi_vectored           = native_lapic_ipi_vectored,
423         .ipi_wait               = native_lapic_ipi_wait,
424 #endif
425         .ipi_alloc              = native_lapic_ipi_alloc,
426         .ipi_free               = native_lapic_ipi_free,
427         .set_lvt_mask           = native_lapic_set_lvt_mask,
428         .set_lvt_mode           = native_lapic_set_lvt_mode,
429         .set_lvt_polarity       = native_lapic_set_lvt_polarity,
430         .set_lvt_triggermode    = native_lapic_set_lvt_triggermode,
431 };
432
433 static uint32_t
434 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value)
435 {
436
437         value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
438             APIC_LVT_VECTOR);
439         if (lvt->lvt_edgetrigger == 0)
440                 value |= APIC_LVT_TM;
441         if (lvt->lvt_activehi == 0)
442                 value |= APIC_LVT_IIPP_INTALO;
443         if (lvt->lvt_masked)
444                 value |= APIC_LVT_M;
445         value |= lvt->lvt_mode;
446         switch (lvt->lvt_mode) {
447         case APIC_LVT_DM_NMI:
448         case APIC_LVT_DM_SMI:
449         case APIC_LVT_DM_INIT:
450         case APIC_LVT_DM_EXTINT:
451                 if (!lvt->lvt_edgetrigger && bootverbose) {
452                         printf("lapic%u: Forcing LINT%u to edge trigger\n",
453                             la->la_id, pin);
454                         value &= ~APIC_LVT_TM;
455                 }
456                 /* Use a vector of 0. */
457                 break;
458         case APIC_LVT_DM_FIXED:
459                 value |= lvt->lvt_vector;
460                 break;
461         default:
462                 panic("bad APIC LVT delivery mode: %#x\n", value);
463         }
464         return (value);
465 }
466
467 static uint32_t
468 lvt_mode(struct lapic *la, u_int pin, uint32_t value)
469 {
470         struct lvt *lvt;
471
472         KASSERT(pin <= APIC_LVT_MAX,
473             ("%s: pin %u out of range", __func__, pin));
474         if (la->la_lvts[pin].lvt_active)
475                 lvt = &la->la_lvts[pin];
476         else
477                 lvt = &lvts[pin];
478
479         return (lvt_mode_impl(la, lvt, pin, value));
480 }
481
482 static uint32_t
483 elvt_mode(struct lapic *la, u_int idx, uint32_t value)
484 {
485         struct lvt *elvt;
486
487         KASSERT(idx <= APIC_ELVT_MAX,
488             ("%s: idx %u out of range", __func__, idx));
489
490         elvt = &la->la_elvts[idx];
491         KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx));
492         KASSERT(elvt->lvt_edgetrigger,
493             ("%s: ELVT%u is not edge triggered", __func__, idx));
494         KASSERT(elvt->lvt_activehi,
495             ("%s: ELVT%u is not active high", __func__, idx));
496         return (lvt_mode_impl(la, elvt, idx, value));
497 }
498
499 /*
500  * Map the local APIC and setup necessary interrupt vectors.
501  */
502 static void
503 native_lapic_init(vm_paddr_t addr)
504 {
505 #ifdef SMP
506         uint64_t r, r1, r2, rx;
507 #endif
508         uint32_t ver;
509         int i;
510         bool arat;
511
512         /*
513          * Enable x2APIC mode if possible. Map the local APIC
514          * registers page.
515          *
516          * Keep the LAPIC registers page mapped uncached for x2APIC
517          * mode too, to have direct map page attribute set to
518          * uncached.  This is needed to work around CPU errata present
519          * on all Intel processors.
520          */
521         KASSERT(trunc_page(addr) == addr,
522             ("local APIC not aligned on a page boundary"));
523         lapic_paddr = addr;
524         lapic_map = pmap_mapdev(addr, PAGE_SIZE);
525         if (x2apic_mode) {
526                 native_lapic_enable_x2apic();
527                 lapic_map = NULL;
528         }
529
530         /* Setup the spurious interrupt handler. */
531         setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL,
532             GSEL_APIC);
533
534         /* Perform basic initialization of the BSP's local APIC. */
535         lapic_enable();
536
537         /* Set BSP's per-CPU local APIC ID. */
538         PCPU_SET(apic_id, lapic_id());
539
540         /* Local APIC timer interrupt. */
541         setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint),
542             SDT_APIC, SEL_KPL, GSEL_APIC);
543
544         /* Local APIC error interrupt. */
545         setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint),
546             SDT_APIC, SEL_KPL, GSEL_APIC);
547
548         /* XXX: Thermal interrupt */
549
550         /* Local APIC CMCI. */
551         setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint),
552             SDT_APIC, SEL_KPL, GSEL_APIC);
553
554         if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
555                 /* Set if APIC timer runs in C3. */
556                 arat = (cpu_power_eax & CPUTPM1_ARAT);
557
558                 bzero(&lapic_et, sizeof(lapic_et));
559                 lapic_et.et_name = "LAPIC";
560                 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
561                     ET_FLAGS_PERCPU;
562                 lapic_et.et_quality = 600;
563                 if (!arat) {
564                         lapic_et.et_flags |= ET_FLAGS_C3STOP;
565                         lapic_et.et_quality = 100;
566                 }
567                 if ((cpu_feature & CPUID_TSC) != 0 &&
568                     (cpu_feature2 & CPUID2_TSCDLT) != 0 &&
569                     tsc_is_invariant && tsc_freq != 0) {
570                         lapic_timer_tsc_deadline = 1;
571                         TUNABLE_INT_FETCH("hw.lapic_tsc_deadline",
572                             &lapic_timer_tsc_deadline);
573                 }
574
575                 lapic_et.et_frequency = 0;
576                 /* We don't know frequency yet, so trying to guess. */
577                 lapic_et.et_min_period = 0x00001000LL;
578                 lapic_et.et_max_period = SBT_1S;
579                 lapic_et.et_start = lapic_et_start;
580                 lapic_et.et_stop = lapic_et_stop;
581                 lapic_et.et_priv = NULL;
582                 et_register(&lapic_et);
583         }
584
585         /*
586          * Set lapic_eoi_suppression after lapic_enable(), to not
587          * enable suppression in the hardware prematurely.  Note that
588          * we by default enable suppression even when system only has
589          * one IO-APIC, since EOI is broadcasted to all APIC agents,
590          * including CPUs, otherwise.
591          *
592          * It seems that at least some KVM versions report
593          * EOI_SUPPRESSION bit, but auto-EOI does not work.
594          */
595         ver = lapic_read32(LAPIC_VERSION);
596         if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) {
597                 lapic_eoi_suppression = 1;
598                 if (vm_guest == VM_GUEST_KVM) {
599                         if (bootverbose)
600                                 printf(
601                        "KVM -- disabling lapic eoi suppression\n");
602                         lapic_eoi_suppression = 0;
603                 }
604                 TUNABLE_INT_FETCH("hw.lapic_eoi_suppression",
605                     &lapic_eoi_suppression);
606         }
607
608 #ifdef SMP
609 #define LOOPS   100000
610         /*
611          * Calibrate the busy loop waiting for IPI ack in xAPIC mode.
612          * lapic_ipi_wait_mult contains the number of iterations which
613          * approximately delay execution for 1 microsecond (the
614          * argument to native_lapic_ipi_wait() is in microseconds).
615          *
616          * We assume that TSC is present and already measured.
617          * Possible TSC frequency jumps are irrelevant to the
618          * calibration loop below, the CPU clock management code is
619          * not yet started, and we do not enter sleep states.
620          */
621         KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0,
622             ("TSC not initialized"));
623         if (!x2apic_mode) {
624                 r = rdtsc();
625                 for (rx = 0; rx < LOOPS; rx++) {
626                         (void)lapic_read_icr_lo();
627                         ia32_pause();
628                 }
629                 r = rdtsc() - r;
630                 r1 = tsc_freq * LOOPS;
631                 r2 = r * 1000000;
632                 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1;
633                 if (bootverbose) {
634                         printf("LAPIC: ipi_wait() us multiplier %ju (r %ju "
635                             "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult,
636                             (uintmax_t)r, (uintmax_t)tsc_freq);
637                 }
638         }
639 #undef LOOPS
640 #endif /* SMP */
641 }
642
643 /*
644  * Create a local APIC instance.
645  */
646 static void
647 native_lapic_create(u_int apic_id, int boot_cpu)
648 {
649         int i;
650
651         if (apic_id > max_apic_id) {
652                 printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
653                 if (boot_cpu)
654                         panic("Can't ignore BSP");
655                 return;
656         }
657         KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
658             apic_id));
659
660         /*
661          * Assume no local LVT overrides and a cluster of 0 and
662          * intra-cluster ID of 0.
663          */
664         lapics[apic_id].la_present = 1;
665         lapics[apic_id].la_id = apic_id;
666         for (i = 0; i <= APIC_LVT_MAX; i++) {
667                 lapics[apic_id].la_lvts[i] = lvts[i];
668                 lapics[apic_id].la_lvts[i].lvt_active = 0;
669         }
670         for (i = 0; i <= APIC_ELVT_MAX; i++) {
671                 lapics[apic_id].la_elvts[i] = elvts[i];
672                 lapics[apic_id].la_elvts[i].lvt_active = 0;
673         }
674         for (i = 0; i <= APIC_NUM_IOINTS; i++)
675             lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE;
676         lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
677         lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
678             IRQ_TIMER;
679 #ifdef KDTRACE_HOOKS
680         lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] =
681             IRQ_DTRACE_RET;
682 #endif
683 #ifdef XENHVM
684         lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN;
685 #endif
686
687 #ifdef SMP
688         cpu_add(apic_id, boot_cpu);
689 #endif
690 }
691
692 static inline uint32_t
693 amd_read_ext_features(void)
694 {
695         uint32_t version;
696
697         if (cpu_vendor_id != CPU_VENDOR_AMD &&
698             cpu_vendor_id != CPU_VENDOR_HYGON)
699                 return (0);
700         version = lapic_read32(LAPIC_VERSION);
701         if ((version & APIC_VER_AMD_EXT_SPACE) != 0)
702                 return (lapic_read32(LAPIC_EXT_FEATURES));
703         else
704                 return (0);
705 }
706
707 static inline uint32_t
708 amd_read_elvt_count(void)
709 {
710         uint32_t extf;
711         uint32_t count;
712
713         extf = amd_read_ext_features();
714         count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT;
715         count = min(count, APIC_ELVT_MAX + 1);
716         return (count);
717 }
718
719 /*
720  * Dump contents of local APIC registers
721  */
722 static void
723 native_lapic_dump(const char* str)
724 {
725         uint32_t version;
726         uint32_t maxlvt;
727         uint32_t extf;
728         int elvt_count;
729         int i;
730
731         version = lapic_read32(LAPIC_VERSION);
732         maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
733         printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
734         printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x",
735             lapic_read32(LAPIC_ID), version,
736             lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR));
737         if ((cpu_feature2 & CPUID2_X2APIC) != 0)
738                 printf(" x2APIC: %d", x2apic_mode);
739         printf("\n  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
740             lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1),
741             lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR));
742         printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x",
743             lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL),
744             lapic_read32(LAPIC_LVT_ERROR));
745         if (maxlvt >= APIC_LVT_PMC)
746                 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT));
747         printf("\n");
748         if (maxlvt >= APIC_LVT_CMCI)
749                 printf("   cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI));
750         extf = amd_read_ext_features();
751         if (extf != 0) {
752                 printf("   AMD ext features: 0x%08x", extf);
753                 elvt_count = amd_read_elvt_count();
754                 for (i = 0; i < elvt_count; i++)
755                         printf("%s elvt%d: 0x%08x", (i % 4) ? "" : "\n ", i,
756                             lapic_read32(LAPIC_EXT_LVT0 + i));
757                 printf("\n");
758         }
759 }
760
761 static void
762 native_lapic_xapic_mode(void)
763 {
764         register_t saveintr;
765
766         saveintr = intr_disable();
767         if (x2apic_mode)
768                 native_lapic_enable_x2apic();
769         intr_restore(saveintr);
770 }
771
772 static void
773 native_lapic_setup(int boot)
774 {
775         struct lapic *la;
776         uint32_t version;
777         uint32_t maxlvt;
778         register_t saveintr;
779         int elvt_count;
780         int i;
781
782         saveintr = intr_disable();
783
784         la = &lapics[lapic_id()];
785         KASSERT(la->la_present, ("missing APIC structure"));
786         version = lapic_read32(LAPIC_VERSION);
787         maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
788
789         /* Initialize the TPR to allow all interrupts. */
790         lapic_set_tpr(0);
791
792         /* Setup spurious vector and enable the local APIC. */
793         lapic_enable();
794
795         /* Program LINT[01] LVT entries. */
796         lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0,
797             lapic_read32(LAPIC_LVT_LINT0)));
798         lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1,
799             lapic_read32(LAPIC_LVT_LINT1)));
800
801         /* Program the PMC LVT entry if present. */
802         if (maxlvt >= APIC_LVT_PMC) {
803                 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
804                     LAPIC_LVT_PCINT));
805         }
806
807         /*
808          * Program the timer LVT.  Calibration is deferred until it is certain
809          * that we have a reliable timecounter.
810          */
811         la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER,
812             lapic_read32(LAPIC_LVT_TIMER));
813         la->lvt_timer_last = la->lvt_timer_base;
814         lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base);
815
816         if (boot)
817                 la->la_timer_mode = LAT_MODE_UNDEF;
818         else if (la->la_timer_mode != LAT_MODE_UNDEF) {
819                 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor",
820                     lapic_id()));
821                 switch (la->la_timer_mode) {
822                 case LAT_MODE_PERIODIC:
823                         lapic_timer_set_divisor(lapic_timer_divisor);
824                         lapic_timer_periodic(la);
825                         break;
826                 case LAT_MODE_ONESHOT:
827                         lapic_timer_set_divisor(lapic_timer_divisor);
828                         lapic_timer_oneshot(la);
829                         break;
830                 case LAT_MODE_DEADLINE:
831                         lapic_timer_deadline(la);
832                         break;
833                 default:
834                         panic("corrupted la_timer_mode %p %d", la,
835                             la->la_timer_mode);
836                 }
837         }
838
839         /* Program error LVT and clear any existing errors. */
840         lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR,
841             lapic_read32(LAPIC_LVT_ERROR)));
842         lapic_write32(LAPIC_ESR, 0);
843
844         /* XXX: Thermal LVT */
845
846         /* Program the CMCI LVT entry if present. */
847         if (maxlvt >= APIC_LVT_CMCI) {
848                 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI,
849                     lapic_read32(LAPIC_LVT_CMCI)));
850         }
851
852         elvt_count = amd_read_elvt_count();
853         for (i = 0; i < elvt_count; i++) {
854                 if (la->la_elvts[i].lvt_active)
855                         lapic_write32(LAPIC_EXT_LVT0 + i,
856                             elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i)));
857         }
858
859         intr_restore(saveintr);
860 }
861
862 static void
863 native_lapic_intrcnt(void *dummy __unused)
864 {
865         struct pcpu *pc;
866         struct lapic *la;
867         char buf[MAXCOMLEN + 1];
868
869         /* If there are no APICs, skip this function. */
870         if (lapics == NULL)
871                 return;
872
873         STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
874                 la = &lapics[pc->pc_apic_id];
875                 if (!la->la_present)
876                     continue;
877
878                 snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid);
879                 intrcnt_add(buf, &la->la_timer_count);
880         }
881 }
882 SYSINIT(native_lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, native_lapic_intrcnt,
883     NULL);
884
885 static void
886 native_lapic_reenable_pmc(void)
887 {
888 #ifdef HWPMC_HOOKS
889         uint32_t value;
890
891         value = lapic_read32(LAPIC_LVT_PCINT);
892         value &= ~APIC_LVT_M;
893         lapic_write32(LAPIC_LVT_PCINT, value);
894 #endif
895 }
896
897 #ifdef HWPMC_HOOKS
898 static void
899 lapic_update_pmc(void *dummy)
900 {
901         struct lapic *la;
902
903         la = &lapics[lapic_id()];
904         lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
905             lapic_read32(LAPIC_LVT_PCINT)));
906 }
907 #endif
908
909 static void
910 native_lapic_calibrate_timer(void)
911 {
912         struct lapic *la;
913         register_t intr;
914
915 #ifdef DEV_ATPIC
916         /* Fail if the local APIC is not present. */
917         if (!x2apic_mode && lapic_map == NULL)
918                 return;
919 #endif
920
921         intr = intr_disable();
922         la = &lapics[lapic_id()];
923
924         lapic_calibrate_initcount(la);
925
926         intr_restore(intr);
927
928         if (lapic_timer_tsc_deadline && bootverbose) {
929                 printf("lapic: deadline tsc mode, Frequency %ju Hz\n",
930                     (uintmax_t)tsc_freq);
931         }
932 }
933
934 static int
935 native_lapic_enable_pmc(void)
936 {
937 #ifdef HWPMC_HOOKS
938         u_int32_t maxlvt;
939
940 #ifdef DEV_ATPIC
941         /* Fail if the local APIC is not present. */
942         if (!x2apic_mode && lapic_map == NULL)
943                 return (0);
944 #endif
945
946         /* Fail if the PMC LVT is not present. */
947         maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
948         if (maxlvt < APIC_LVT_PMC)
949                 return (0);
950
951         lvts[APIC_LVT_PMC].lvt_masked = 0;
952
953 #ifdef EARLY_AP_STARTUP
954         MPASS(mp_ncpus == 1 || smp_started);
955         smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
956 #else
957 #ifdef SMP
958         /*
959          * If hwpmc was loaded at boot time then the APs may not be
960          * started yet.  In that case, don't forward the request to
961          * them as they will program the lvt when they start.
962          */
963         if (smp_started)
964                 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
965         else
966 #endif
967                 lapic_update_pmc(NULL);
968 #endif
969         return (1);
970 #else
971         return (0);
972 #endif
973 }
974
975 static void
976 native_lapic_disable_pmc(void)
977 {
978 #ifdef HWPMC_HOOKS
979         u_int32_t maxlvt;
980
981 #ifdef DEV_ATPIC
982         /* Fail if the local APIC is not present. */
983         if (!x2apic_mode && lapic_map == NULL)
984                 return;
985 #endif
986
987         /* Fail if the PMC LVT is not present. */
988         maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
989         if (maxlvt < APIC_LVT_PMC)
990                 return;
991
992         lvts[APIC_LVT_PMC].lvt_masked = 1;
993
994 #ifdef SMP
995         /* The APs should always be started when hwpmc is unloaded. */
996         KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early"));
997 #endif
998         smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
999 #endif
1000 }
1001
1002 static int
1003 lapic_calibrate_initcount_cpuid_vm(void)
1004 {
1005         u_int regs[4];
1006         uint64_t freq;
1007
1008         /* Get value from CPUID leaf if possible. */
1009         if (vm_guest == VM_GUEST_NO)
1010                 return (false);
1011         if (hv_high < 0x40000010)
1012                 return (false);
1013         do_cpuid(0x40000010, regs);
1014         freq = (uint64_t)(regs[1]) * 1000;
1015
1016         /* Pick timer divisor. */
1017         lapic_timer_divisor = 2;
1018         do {
1019                 if (freq / lapic_timer_divisor < APIC_TIMER_MAX_COUNT)
1020                         break;
1021                 lapic_timer_divisor <<= 1;
1022         } while (lapic_timer_divisor <= 128);
1023         if (lapic_timer_divisor > 128)
1024                 return (false);
1025
1026         /* Record divided frequency. */
1027         count_freq = freq / lapic_timer_divisor;
1028         return (count_freq != 0);
1029 }
1030
1031 static uint64_t
1032 cb_lapic_getcount(void)
1033 {
1034
1035         return (APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER));
1036 }
1037
1038 static void
1039 lapic_calibrate_initcount(struct lapic *la)
1040 {
1041         uint64_t freq;
1042
1043         if (lapic_calibrate_initcount_cpuid_vm())
1044                 goto done;
1045
1046         /* Calibrate the APIC timer frequency. */
1047         lapic_timer_set_divisor(2);
1048         lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT);
1049         fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX);
1050         freq = clockcalib(cb_lapic_getcount, "lapic");
1051         fpu_kern_leave(curthread, NULL);
1052
1053         /* Pick a different divisor if necessary. */
1054         lapic_timer_divisor = 2;
1055         do {
1056                 if (freq * 2 / lapic_timer_divisor < APIC_TIMER_MAX_COUNT)
1057                         break;
1058                 lapic_timer_divisor <<= 1;
1059         } while (lapic_timer_divisor <= 128);
1060         if (lapic_timer_divisor > 128)
1061                 panic("lapic: Divisor too big");
1062         count_freq = freq * 2 / lapic_timer_divisor;
1063 done:
1064         if (bootverbose) {
1065                 printf("lapic: Divisor %lu, Frequency %lu Hz\n",
1066                     lapic_timer_divisor, count_freq);
1067         }
1068 }
1069
1070 static void
1071 lapic_change_mode(struct eventtimer *et, struct lapic *la,
1072     enum lat_timer_mode newmode)
1073 {
1074         if (la->la_timer_mode == newmode)
1075                 return;
1076         switch (newmode) {
1077         case LAT_MODE_PERIODIC:
1078                 lapic_timer_set_divisor(lapic_timer_divisor);
1079                 et->et_frequency = count_freq;
1080                 break;
1081         case LAT_MODE_DEADLINE:
1082                 et->et_frequency = tsc_freq;
1083                 break;
1084         case LAT_MODE_ONESHOT:
1085                 lapic_timer_set_divisor(lapic_timer_divisor);
1086                 et->et_frequency = count_freq;
1087                 break;
1088         default:
1089                 panic("lapic_change_mode %d", newmode);
1090         }
1091         la->la_timer_mode = newmode;
1092         et->et_min_period = (0x00000002LLU << 32) / et->et_frequency;
1093         et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency;
1094 }
1095
1096 static int
1097 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period)
1098 {
1099         struct lapic *la;
1100
1101         la = &lapics[PCPU_GET(apic_id)];
1102         if (period != 0) {
1103                 lapic_change_mode(et, la, LAT_MODE_PERIODIC);
1104                 la->la_timer_period = ((uint32_t)et->et_frequency * period) >>
1105                     32;
1106                 lapic_timer_periodic(la);
1107         } else if (lapic_timer_tsc_deadline) {
1108                 lapic_change_mode(et, la, LAT_MODE_DEADLINE);
1109                 la->la_timer_period = (et->et_frequency * first) >> 32;
1110                 lapic_timer_deadline(la);
1111         } else {
1112                 lapic_change_mode(et, la, LAT_MODE_ONESHOT);
1113                 la->la_timer_period = ((uint32_t)et->et_frequency * first) >>
1114                     32;
1115                 lapic_timer_oneshot(la);
1116         }
1117         return (0);
1118 }
1119
1120 static int
1121 lapic_et_stop(struct eventtimer *et)
1122 {
1123         struct lapic *la;
1124
1125         la = &lapics[PCPU_GET(apic_id)];
1126         lapic_timer_stop(la);
1127         la->la_timer_mode = LAT_MODE_UNDEF;
1128         return (0);
1129 }
1130
1131 static void
1132 native_lapic_disable(void)
1133 {
1134         uint32_t value;
1135
1136         /* Software disable the local APIC. */
1137         value = lapic_read32(LAPIC_SVR);
1138         value &= ~APIC_SVR_SWEN;
1139         lapic_write32(LAPIC_SVR, value);
1140 }
1141
1142 static void
1143 lapic_enable(void)
1144 {
1145         uint32_t value;
1146
1147         /* Program the spurious vector to enable the local APIC. */
1148         value = lapic_read32(LAPIC_SVR);
1149         value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
1150         value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT;
1151         if (lapic_eoi_suppression)
1152                 value |= APIC_SVR_EOI_SUPPRESSION;
1153         lapic_write32(LAPIC_SVR, value);
1154 }
1155
1156 /* Reset the local APIC on the BSP during resume. */
1157 static void
1158 lapic_resume(struct pic *pic, bool suspend_cancelled)
1159 {
1160
1161         lapic_setup(0);
1162 }
1163
1164 static int
1165 native_lapic_id(void)
1166 {
1167         uint32_t v;
1168
1169         KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped"));
1170         v = lapic_read32(LAPIC_ID);
1171         if (!x2apic_mode)
1172                 v >>= APIC_ID_SHIFT;
1173         return (v);
1174 }
1175
1176 static int
1177 native_lapic_intr_pending(u_int vector)
1178 {
1179         uint32_t irr;
1180
1181         /*
1182          * The IRR registers are an array of registers each of which
1183          * only describes 32 interrupts in the low 32 bits.  Thus, we
1184          * divide the vector by 32 to get the register index.
1185          * Finally, we modulus the vector by 32 to determine the
1186          * individual bit to test.
1187          */
1188         irr = lapic_read32(LAPIC_IRR0 + vector / 32);
1189         return (irr & 1 << (vector % 32));
1190 }
1191
1192 static void
1193 native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
1194 {
1195         struct lapic *la;
1196
1197         KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
1198             __func__, apic_id));
1199         KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
1200             __func__, cluster));
1201         KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
1202             ("%s: intra cluster id %u too big", __func__, cluster_id));
1203         la = &lapics[apic_id];
1204         la->la_cluster = cluster;
1205         la->la_cluster_id = cluster_id;
1206 }
1207
1208 static int
1209 native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
1210 {
1211
1212         if (pin > APIC_LVT_MAX)
1213                 return (EINVAL);
1214         if (apic_id == APIC_ID_ALL) {
1215                 lvts[pin].lvt_masked = masked;
1216                 if (bootverbose)
1217                         printf("lapic:");
1218         } else {
1219                 KASSERT(lapics[apic_id].la_present,
1220                     ("%s: missing APIC %u", __func__, apic_id));
1221                 lapics[apic_id].la_lvts[pin].lvt_masked = masked;
1222                 lapics[apic_id].la_lvts[pin].lvt_active = 1;
1223                 if (bootverbose)
1224                         printf("lapic%u:", apic_id);
1225         }
1226         if (bootverbose)
1227                 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
1228         return (0);
1229 }
1230
1231 static int
1232 native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
1233 {
1234         struct lvt *lvt;
1235
1236         if (pin > APIC_LVT_MAX)
1237                 return (EINVAL);
1238         if (apic_id == APIC_ID_ALL) {
1239                 lvt = &lvts[pin];
1240                 if (bootverbose)
1241                         printf("lapic:");
1242         } else {
1243                 KASSERT(lapics[apic_id].la_present,
1244                     ("%s: missing APIC %u", __func__, apic_id));
1245                 lvt = &lapics[apic_id].la_lvts[pin];
1246                 lvt->lvt_active = 1;
1247                 if (bootverbose)
1248                         printf("lapic%u:", apic_id);
1249         }
1250         lvt->lvt_mode = mode;
1251         switch (mode) {
1252         case APIC_LVT_DM_NMI:
1253         case APIC_LVT_DM_SMI:
1254         case APIC_LVT_DM_INIT:
1255         case APIC_LVT_DM_EXTINT:
1256                 lvt->lvt_edgetrigger = 1;
1257                 lvt->lvt_activehi = 1;
1258                 if (mode == APIC_LVT_DM_EXTINT)
1259                         lvt->lvt_masked = 1;
1260                 else
1261                         lvt->lvt_masked = 0;
1262                 break;
1263         default:
1264                 panic("Unsupported delivery mode: 0x%x\n", mode);
1265         }
1266         if (bootverbose) {
1267                 printf(" Routing ");
1268                 switch (mode) {
1269                 case APIC_LVT_DM_NMI:
1270                         printf("NMI");
1271                         break;
1272                 case APIC_LVT_DM_SMI:
1273                         printf("SMI");
1274                         break;
1275                 case APIC_LVT_DM_INIT:
1276                         printf("INIT");
1277                         break;
1278                 case APIC_LVT_DM_EXTINT:
1279                         printf("ExtINT");
1280                         break;
1281                 }
1282                 printf(" -> LINT%u\n", pin);
1283         }
1284         return (0);
1285 }
1286
1287 static int
1288 native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
1289 {
1290
1291         if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM)
1292                 return (EINVAL);
1293         if (apic_id == APIC_ID_ALL) {
1294                 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
1295                 if (bootverbose)
1296                         printf("lapic:");
1297         } else {
1298                 KASSERT(lapics[apic_id].la_present,
1299                     ("%s: missing APIC %u", __func__, apic_id));
1300                 lapics[apic_id].la_lvts[pin].lvt_active = 1;
1301                 lapics[apic_id].la_lvts[pin].lvt_activehi =
1302                     (pol == INTR_POLARITY_HIGH);
1303                 if (bootverbose)
1304                         printf("lapic%u:", apic_id);
1305         }
1306         if (bootverbose)
1307                 printf(" LINT%u polarity: %s\n", pin,
1308                     pol == INTR_POLARITY_HIGH ? "high" : "low");
1309         return (0);
1310 }
1311
1312 static int
1313 native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin,
1314      enum intr_trigger trigger)
1315 {
1316
1317         if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
1318                 return (EINVAL);
1319         if (apic_id == APIC_ID_ALL) {
1320                 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
1321                 if (bootverbose)
1322                         printf("lapic:");
1323         } else {
1324                 KASSERT(lapics[apic_id].la_present,
1325                     ("%s: missing APIC %u", __func__, apic_id));
1326                 lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
1327                     (trigger == INTR_TRIGGER_EDGE);
1328                 lapics[apic_id].la_lvts[pin].lvt_active = 1;
1329                 if (bootverbose)
1330                         printf("lapic%u:", apic_id);
1331         }
1332         if (bootverbose)
1333                 printf(" LINT%u trigger: %s\n", pin,
1334                     trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
1335         return (0);
1336 }
1337
1338 /*
1339  * Adjust the TPR of the current CPU so that it blocks all interrupts below
1340  * the passed in vector.
1341  */
1342 static void
1343 lapic_set_tpr(u_int vector)
1344 {
1345 #ifdef CHEAP_TPR
1346         lapic_write32(LAPIC_TPR, vector);
1347 #else
1348         uint32_t tpr;
1349
1350         tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO;
1351         tpr |= vector;
1352         lapic_write32(LAPIC_TPR, tpr);
1353 #endif
1354 }
1355
1356 static void
1357 native_lapic_eoi(void)
1358 {
1359
1360         lapic_write32_nofence(LAPIC_EOI, 0);
1361 }
1362
1363 void
1364 lapic_handle_intr(int vector, struct trapframe *frame)
1365 {
1366         struct intsrc *isrc;
1367
1368         /* The frame may have been written into a poisoned region. */
1369         kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0);
1370         trap_check_kstack();
1371
1372         isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id),
1373             vector));
1374         intr_execute_handlers(isrc, frame);
1375 }
1376
1377 void
1378 lapic_handle_timer(struct trapframe *frame)
1379 {
1380         struct lapic *la;
1381         struct trapframe *oldframe;
1382         struct thread *td;
1383
1384         /* Send EOI first thing. */
1385         lapic_eoi();
1386
1387         /* The frame may have been written into a poisoned region. */
1388         kasan_mark(frame, sizeof(*frame), sizeof(*frame), 0);
1389         trap_check_kstack();
1390
1391 #if defined(SMP) && !defined(SCHED_ULE)
1392         /*
1393          * Don't do any accounting for the disabled HTT cores, since it
1394          * will provide misleading numbers for the userland.
1395          *
1396          * No locking is necessary here, since even if we lose the race
1397          * when hlt_cpus_mask changes it is not a big deal, really.
1398          *
1399          * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
1400          * and unlike other schedulers it actually schedules threads to
1401          * those CPUs.
1402          */
1403         if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
1404                 return;
1405 #endif
1406
1407         /* Look up our local APIC structure for the tick counters. */
1408         la = &lapics[PCPU_GET(apic_id)];
1409         (*la->la_timer_count)++;
1410         critical_enter();
1411         if (lapic_et.et_active) {
1412                 td = curthread;
1413                 td->td_intr_nesting_level++;
1414                 oldframe = td->td_intr_frame;
1415                 td->td_intr_frame = frame;
1416                 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg);
1417                 td->td_intr_frame = oldframe;
1418                 td->td_intr_nesting_level--;
1419         }
1420         critical_exit();
1421 }
1422
1423 static void
1424 lapic_timer_set_divisor(u_int divisor)
1425 {
1426
1427         KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
1428         KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors),
1429                 ("lapic: invalid divisor %u", divisor));
1430         lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]);
1431 }
1432
1433 static void
1434 lapic_timer_oneshot(struct lapic *la)
1435 {
1436         uint32_t value;
1437
1438         value = la->lvt_timer_base;
1439         value &= ~(APIC_LVTT_TM | APIC_LVT_M);
1440         value |= APIC_LVTT_TM_ONE_SHOT;
1441         la->lvt_timer_last = value;
1442         lapic_write32(LAPIC_LVT_TIMER, value);
1443         lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period);
1444 }
1445
1446 static void
1447 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count)
1448 {
1449         uint32_t value;
1450
1451         value = la->lvt_timer_base;
1452         value &= ~APIC_LVTT_TM;
1453         value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M;
1454         la->lvt_timer_last = value;
1455         lapic_write32(LAPIC_LVT_TIMER, value);
1456         lapic_write32(LAPIC_ICR_TIMER, count);
1457 }
1458
1459 static void
1460 lapic_timer_periodic(struct lapic *la)
1461 {
1462         uint32_t value;
1463
1464         value = la->lvt_timer_base;
1465         value &= ~(APIC_LVTT_TM | APIC_LVT_M);
1466         value |= APIC_LVTT_TM_PERIODIC;
1467         la->lvt_timer_last = value;
1468         lapic_write32(LAPIC_LVT_TIMER, value);
1469         lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period);
1470 }
1471
1472 static void
1473 lapic_timer_deadline(struct lapic *la)
1474 {
1475         uint32_t value;
1476
1477         value = la->lvt_timer_base;
1478         value &= ~(APIC_LVTT_TM | APIC_LVT_M);
1479         value |= APIC_LVTT_TM_TSCDLT;
1480         if (value != la->lvt_timer_last) {
1481                 la->lvt_timer_last = value;
1482                 lapic_write32_nofence(LAPIC_LVT_TIMER, value);
1483                 if (!x2apic_mode)
1484                         mfence();
1485         }
1486         wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc());
1487 }
1488
1489 static void
1490 lapic_timer_stop(struct lapic *la)
1491 {
1492         uint32_t value;
1493
1494         if (la->la_timer_mode == LAT_MODE_DEADLINE) {
1495                 wrmsr(MSR_TSC_DEADLINE, 0);
1496                 mfence();
1497         } else {
1498                 value = la->lvt_timer_base;
1499                 value &= ~APIC_LVTT_TM;
1500                 value |= APIC_LVT_M;
1501                 la->lvt_timer_last = value;
1502                 lapic_write32(LAPIC_LVT_TIMER, value);
1503         }
1504 }
1505
1506 void
1507 lapic_handle_cmc(void)
1508 {
1509         trap_check_kstack();
1510
1511         lapic_eoi();
1512         cmc_intr();
1513 }
1514
1515 /*
1516  * Called from the mca_init() to activate the CMC interrupt if this CPU is
1517  * responsible for monitoring any MC banks for CMC events.  Since mca_init()
1518  * is called prior to lapic_setup() during boot, this just needs to unmask
1519  * this CPU's LVT_CMCI entry.
1520  */
1521 static void
1522 native_lapic_enable_cmc(void)
1523 {
1524         u_int apic_id;
1525
1526 #ifdef DEV_ATPIC
1527         if (!x2apic_mode && lapic_map == NULL)
1528                 return;
1529 #endif
1530         apic_id = PCPU_GET(apic_id);
1531         KASSERT(lapics[apic_id].la_present,
1532             ("%s: missing APIC %u", __func__, apic_id));
1533         lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0;
1534         lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1;
1535 }
1536
1537 static int
1538 native_lapic_enable_mca_elvt(void)
1539 {
1540         u_int apic_id;
1541         uint32_t value;
1542         int elvt_count;
1543
1544 #ifdef DEV_ATPIC
1545         if (lapic_map == NULL)
1546                 return (-1);
1547 #endif
1548
1549         apic_id = PCPU_GET(apic_id);
1550         KASSERT(lapics[apic_id].la_present,
1551             ("%s: missing APIC %u", __func__, apic_id));
1552         elvt_count = amd_read_elvt_count();
1553         if (elvt_count <= APIC_ELVT_MCA)
1554                 return (-1);
1555
1556         value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA);
1557         if ((value & APIC_LVT_M) == 0) {
1558                 if (bootverbose)
1559                         printf("AMD MCE Thresholding Extended LVT is already active\n");
1560                 return (APIC_ELVT_MCA);
1561         }
1562         lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0;
1563         lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1;
1564         return (APIC_ELVT_MCA);
1565 }
1566
1567 void
1568 lapic_handle_error(void)
1569 {
1570         uint32_t esr;
1571
1572         trap_check_kstack();
1573
1574         /*
1575          * Read the contents of the error status register.  Write to
1576          * the register first before reading from it to force the APIC
1577          * to update its value to indicate any errors that have
1578          * occurred since the previous write to the register.
1579          */
1580         lapic_write32(LAPIC_ESR, 0);
1581         esr = lapic_read32(LAPIC_ESR);
1582
1583         printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
1584         lapic_eoi();
1585 }
1586
1587 static u_int
1588 native_apic_cpuid(u_int apic_id)
1589 {
1590 #ifdef SMP
1591         return apic_cpuids[apic_id];
1592 #else
1593         return 0;
1594 #endif
1595 }
1596
1597 /* Request a free IDT vector to be used by the specified IRQ. */
1598 static u_int
1599 native_apic_alloc_vector(u_int apic_id, u_int irq)
1600 {
1601         u_int vector;
1602
1603         KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq));
1604
1605         /*
1606          * Search for a free vector.  Currently we just use a very simple
1607          * algorithm to find the first free vector.
1608          */
1609         mtx_lock_spin(&icu_lock);
1610         for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
1611                 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE)
1612                         continue;
1613                 lapics[apic_id].la_ioint_irqs[vector] = irq;
1614                 mtx_unlock_spin(&icu_lock);
1615                 return (vector + APIC_IO_INTS);
1616         }
1617         mtx_unlock_spin(&icu_lock);
1618         return (0);
1619 }
1620
1621 /*
1622  * Request 'count' free contiguous IDT vectors to be used by 'count'
1623  * IRQs.  'count' must be a power of two and the vectors will be
1624  * aligned on a boundary of 'align'.  If the request cannot be
1625  * satisfied, 0 is returned.
1626  */
1627 static u_int
1628 native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
1629 {
1630         u_int first, run, vector;
1631
1632         KASSERT(powerof2(count), ("bad count"));
1633         KASSERT(powerof2(align), ("bad align"));
1634         KASSERT(align >= count, ("align < count"));
1635 #ifdef INVARIANTS
1636         for (run = 0; run < count; run++)
1637                 KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u",
1638                     irqs[run], run));
1639 #endif
1640
1641         /*
1642          * Search for 'count' free vectors.  As with apic_alloc_vector(),
1643          * this just uses a simple first fit algorithm.
1644          */
1645         run = 0;
1646         first = 0;
1647         mtx_lock_spin(&icu_lock);
1648         for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
1649                 /* Vector is in use, end run. */
1650                 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) {
1651                         run = 0;
1652                         first = 0;
1653                         continue;
1654                 }
1655
1656                 /* Start a new run if run == 0 and vector is aligned. */
1657                 if (run == 0) {
1658                         if ((vector & (align - 1)) != 0)
1659                                 continue;
1660                         first = vector;
1661                 }
1662                 run++;
1663
1664                 /* Keep looping if the run isn't long enough yet. */
1665                 if (run < count)
1666                         continue;
1667
1668                 /* Found a run, assign IRQs and return the first vector. */
1669                 for (vector = 0; vector < count; vector++)
1670                         lapics[apic_id].la_ioint_irqs[first + vector] =
1671                             irqs[vector];
1672                 mtx_unlock_spin(&icu_lock);
1673                 return (first + APIC_IO_INTS);
1674         }
1675         mtx_unlock_spin(&icu_lock);
1676         printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
1677         return (0);
1678 }
1679
1680 /*
1681  * Enable a vector for a particular apic_id.  Since all lapics share idt
1682  * entries and ioint_handlers this enables the vector on all lapics.  lapics
1683  * which do not have the vector configured would report spurious interrupts
1684  * should it fire.
1685  */
1686 static void
1687 native_apic_enable_vector(u_int apic_id, u_int vector)
1688 {
1689
1690         KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
1691         KASSERT(ioint_handlers[vector / 32] != NULL,
1692             ("No ISR handler for vector %u", vector));
1693 #ifdef KDTRACE_HOOKS
1694         KASSERT(vector != IDT_DTRACE_RET,
1695             ("Attempt to overwrite DTrace entry"));
1696 #endif
1697         setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32],
1698             SDT_APIC, SEL_KPL, GSEL_APIC);
1699 }
1700
1701 static void
1702 native_apic_disable_vector(u_int apic_id, u_int vector)
1703 {
1704
1705         KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
1706 #ifdef KDTRACE_HOOKS
1707         KASSERT(vector != IDT_DTRACE_RET,
1708             ("Attempt to overwrite DTrace entry"));
1709 #endif
1710         KASSERT(ioint_handlers[vector / 32] != NULL,
1711             ("No ISR handler for vector %u", vector));
1712 #ifdef notyet
1713         /*
1714          * We can not currently clear the idt entry because other cpus
1715          * may have a valid vector at this offset.
1716          */
1717         setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
1718             SEL_KPL, GSEL_APIC);
1719 #endif
1720 }
1721
1722 /* Release an APIC vector when it's no longer in use. */
1723 static void
1724 native_apic_free_vector(u_int apic_id, u_int vector, u_int irq)
1725 {
1726         struct thread *td;
1727
1728         KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
1729             vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
1730             ("Vector %u does not map to an IRQ line", vector));
1731         KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq));
1732         KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] ==
1733             irq, ("IRQ mismatch"));
1734 #ifdef KDTRACE_HOOKS
1735         KASSERT(vector != IDT_DTRACE_RET,
1736             ("Attempt to overwrite DTrace entry"));
1737 #endif
1738
1739         /*
1740          * Bind us to the cpu that owned the vector before freeing it so
1741          * we don't lose an interrupt delivery race.
1742          */
1743         td = curthread;
1744         if (!rebooting) {
1745                 thread_lock(td);
1746                 if (sched_is_bound(td))
1747                         panic("apic_free_vector: Thread already bound.\n");
1748                 sched_bind(td, apic_cpuid(apic_id));
1749                 thread_unlock(td);
1750         }
1751         mtx_lock_spin(&icu_lock);
1752         lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE;
1753         mtx_unlock_spin(&icu_lock);
1754         if (!rebooting) {
1755                 thread_lock(td);
1756                 sched_unbind(td);
1757                 thread_unlock(td);
1758         }
1759 }
1760
1761 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */
1762 static u_int
1763 apic_idt_to_irq(u_int apic_id, u_int vector)
1764 {
1765         int irq;
1766
1767         KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
1768             vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
1769             ("Vector %u does not map to an IRQ line", vector));
1770 #ifdef KDTRACE_HOOKS
1771         KASSERT(vector != IDT_DTRACE_RET,
1772             ("Attempt to overwrite DTrace entry"));
1773 #endif
1774         irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS];
1775         if (irq < 0)
1776                 irq = 0;
1777         return (irq);
1778 }
1779
1780 #ifdef DDB
1781 /*
1782  * Dump data about APIC IDT vector mappings.
1783  */
1784 DB_SHOW_COMMAND(apic, db_show_apic)
1785 {
1786         struct intsrc *isrc;
1787         int i, verbose;
1788         u_int apic_id;
1789         u_int irq;
1790
1791         if (strcmp(modif, "vv") == 0)
1792                 verbose = 2;
1793         else if (strcmp(modif, "v") == 0)
1794                 verbose = 1;
1795         else
1796                 verbose = 0;
1797         for (apic_id = 0; apic_id <= max_apic_id; apic_id++) {
1798                 if (lapics[apic_id].la_present == 0)
1799                         continue;
1800                 db_printf("Interrupts bound to lapic %u\n", apic_id);
1801                 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
1802                         irq = lapics[apic_id].la_ioint_irqs[i];
1803                         if (irq == IRQ_FREE || irq == IRQ_SYSCALL)
1804                                 continue;
1805 #ifdef KDTRACE_HOOKS
1806                         if (irq == IRQ_DTRACE_RET)
1807                                 continue;
1808 #endif
1809 #ifdef XENHVM
1810                         if (irq == IRQ_EVTCHN)
1811                                 continue;
1812 #endif
1813                         db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
1814                         if (irq == IRQ_TIMER)
1815                                 db_printf("lapic timer\n");
1816                         else if (irq < num_io_irqs) {
1817                                 isrc = intr_lookup_source(irq);
1818                                 if (isrc == NULL || verbose == 0)
1819                                         db_printf("IRQ %u\n", irq);
1820                                 else
1821                                         db_dump_intr_event(isrc->is_event,
1822                                             verbose == 2);
1823                         } else
1824                                 db_printf("IRQ %u ???\n", irq);
1825                 }
1826         }
1827 }
1828
1829 static void
1830 dump_mask(const char *prefix, uint32_t v, int base)
1831 {
1832         int i, first;
1833
1834         first = 1;
1835         for (i = 0; i < 32; i++)
1836                 if (v & (1 << i)) {
1837                         if (first) {
1838                                 db_printf("%s:", prefix);
1839                                 first = 0;
1840                         }
1841                         db_printf(" %02x", base + i);
1842                 }
1843         if (!first)
1844                 db_printf("\n");
1845 }
1846
1847 /* Show info from the lapic regs for this CPU. */
1848 DB_SHOW_COMMAND(lapic, db_show_lapic)
1849 {
1850         uint32_t v;
1851
1852         db_printf("lapic ID = %d\n", lapic_id());
1853         v = lapic_read32(LAPIC_VERSION);
1854         db_printf("version  = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
1855             v & 0xf);
1856         db_printf("max LVT  = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
1857         v = lapic_read32(LAPIC_SVR);
1858         db_printf("SVR      = %02x (%s)\n", v & APIC_SVR_VECTOR,
1859             v & APIC_SVR_ENABLE ? "enabled" : "disabled");
1860         db_printf("TPR      = %02x\n", lapic_read32(LAPIC_TPR));
1861
1862 #define dump_field(prefix, regn, index)                                 \
1863         dump_mask(__XSTRING(prefix ## index),                           \
1864             lapic_read32(LAPIC_ ## regn ## index),                      \
1865             index * 32)
1866
1867         db_printf("In-service Interrupts:\n");
1868         dump_field(isr, ISR, 0);
1869         dump_field(isr, ISR, 1);
1870         dump_field(isr, ISR, 2);
1871         dump_field(isr, ISR, 3);
1872         dump_field(isr, ISR, 4);
1873         dump_field(isr, ISR, 5);
1874         dump_field(isr, ISR, 6);
1875         dump_field(isr, ISR, 7);
1876
1877         db_printf("TMR Interrupts:\n");
1878         dump_field(tmr, TMR, 0);
1879         dump_field(tmr, TMR, 1);
1880         dump_field(tmr, TMR, 2);
1881         dump_field(tmr, TMR, 3);
1882         dump_field(tmr, TMR, 4);
1883         dump_field(tmr, TMR, 5);
1884         dump_field(tmr, TMR, 6);
1885         dump_field(tmr, TMR, 7);
1886
1887         db_printf("IRR Interrupts:\n");
1888         dump_field(irr, IRR, 0);
1889         dump_field(irr, IRR, 1);
1890         dump_field(irr, IRR, 2);
1891         dump_field(irr, IRR, 3);
1892         dump_field(irr, IRR, 4);
1893         dump_field(irr, IRR, 5);
1894         dump_field(irr, IRR, 6);
1895         dump_field(irr, IRR, 7);
1896
1897 #undef dump_field
1898 }
1899 #endif
1900
1901 /*
1902  * APIC probing support code.  This includes code to manage enumerators.
1903  */
1904
1905 static SLIST_HEAD(, apic_enumerator) enumerators =
1906         SLIST_HEAD_INITIALIZER(enumerators);
1907 static struct apic_enumerator *best_enum;
1908
1909 void
1910 apic_register_enumerator(struct apic_enumerator *enumerator)
1911 {
1912 #ifdef INVARIANTS
1913         struct apic_enumerator *apic_enum;
1914
1915         SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
1916                 if (apic_enum == enumerator)
1917                         panic("%s: Duplicate register of %s", __func__,
1918                             enumerator->apic_name);
1919         }
1920 #endif
1921         SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
1922 }
1923
1924 /*
1925  * We have to look for CPU's very, very early because certain subsystems
1926  * want to know how many CPU's we have extremely early on in the boot
1927  * process.
1928  */
1929 static void
1930 apic_init(void *dummy __unused)
1931 {
1932         struct apic_enumerator *enumerator;
1933         int retval, best;
1934
1935         /* We only support built in local APICs. */
1936         if (!(cpu_feature & CPUID_APIC))
1937                 return;
1938
1939         /* Don't probe if APIC mode is disabled. */
1940         if (resource_disabled("apic", 0))
1941                 return;
1942
1943         /* Probe all the enumerators to find the best match. */
1944         best_enum = NULL;
1945         best = 0;
1946         SLIST_FOREACH(enumerator, &enumerators, apic_next) {
1947                 retval = enumerator->apic_probe();
1948                 if (retval > 0)
1949                         continue;
1950                 if (best_enum == NULL || best < retval) {
1951                         best_enum = enumerator;
1952                         best = retval;
1953                 }
1954         }
1955         if (best_enum == NULL) {
1956                 if (bootverbose)
1957                         printf("APIC: Could not find any APICs.\n");
1958 #ifndef DEV_ATPIC
1959                 panic("running without device atpic requires a local APIC");
1960 #endif
1961                 return;
1962         }
1963
1964         if (bootverbose)
1965                 printf("APIC: Using the %s enumerator.\n",
1966                     best_enum->apic_name);
1967
1968 #ifdef I686_CPU
1969         /*
1970          * To work around an errata, we disable the local APIC on some
1971          * CPUs during early startup.  We need to turn the local APIC back
1972          * on on such CPUs now.
1973          */
1974         ppro_reenable_apic();
1975 #endif
1976
1977         /* Probe the CPU's in the system. */
1978         retval = best_enum->apic_probe_cpus();
1979         if (retval != 0)
1980                 printf("%s: Failed to probe CPUs: returned %d\n",
1981                     best_enum->apic_name, retval);
1982
1983 }
1984 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL);
1985
1986 /*
1987  * Setup the local APIC.  We have to do this prior to starting up the APs
1988  * in the SMP case.
1989  */
1990 static void
1991 apic_setup_local(void *dummy __unused)
1992 {
1993         int retval;
1994
1995         if (best_enum == NULL)
1996                 return;
1997
1998         lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC,
1999             M_WAITOK | M_ZERO);
2000
2001         /* Initialize the local APIC. */
2002         retval = best_enum->apic_setup_local();
2003         if (retval != 0)
2004                 printf("%s: Failed to setup the local APIC: returned %d\n",
2005                     best_enum->apic_name, retval);
2006 }
2007 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL);
2008
2009 /*
2010  * Setup the I/O APICs.
2011  */
2012 static void
2013 apic_setup_io(void *dummy __unused)
2014 {
2015         int retval;
2016
2017         if (best_enum == NULL)
2018                 return;
2019
2020         /*
2021          * Local APIC must be registered before other PICs and pseudo PICs
2022          * for proper suspend/resume order.
2023          */
2024         intr_register_pic(&lapic_pic);
2025
2026         retval = best_enum->apic_setup_io();
2027         if (retval != 0)
2028                 printf("%s: Failed to setup I/O APICs: returned %d\n",
2029                     best_enum->apic_name, retval);
2030
2031         /*
2032          * Finish setting up the local APIC on the BSP once we know
2033          * how to properly program the LINT pins.  In particular, this
2034          * enables the EOI suppression mode, if LAPIC supports it and
2035          * user did not disable the mode.
2036          */
2037         lapic_setup(1);
2038         if (bootverbose)
2039                 lapic_dump("BSP");
2040
2041         /* Enable the MSI "pic". */
2042         init_ops.msi_init();
2043
2044 #ifdef XENHVM
2045         xen_intr_alloc_irqs();
2046 #endif
2047 }
2048 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL);
2049
2050 #ifdef SMP
2051 /*
2052  * Inter Processor Interrupt functions.  The lapic_ipi_*() functions are
2053  * private to the MD code.  The public interface for the rest of the
2054  * kernel is defined in mp_machdep.c.
2055  */
2056
2057 /*
2058  * Wait delay microseconds for IPI to be sent.  If delay is -1, we
2059  * wait forever.
2060  */
2061 static int
2062 native_lapic_ipi_wait(int delay)
2063 {
2064         uint64_t rx;
2065
2066         /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */
2067         if (x2apic_mode)
2068                 return (1);
2069
2070         for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) {
2071                 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) ==
2072                     APIC_DELSTAT_IDLE)
2073                         return (1);
2074                 ia32_pause();
2075         }
2076         return (0);
2077 }
2078
2079 static void
2080 native_lapic_ipi_raw(register_t icrlo, u_int dest)
2081 {
2082         uint32_t icrhi;
2083
2084         /* XXX: Need more sanity checking of icrlo? */
2085         KASSERT(x2apic_mode || lapic_map != NULL,
2086             ("%s called too early", __func__));
2087         KASSERT(x2apic_mode ||
2088             (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
2089             ("%s: invalid dest field", __func__));
2090         KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
2091             ("%s: reserved bits set in ICR LO register", __func__));
2092
2093         if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
2094                 if (x2apic_mode)
2095                         icrhi = dest;
2096                 else
2097                         icrhi = dest << APIC_ID_SHIFT;
2098                 lapic_write_icr(icrhi, icrlo);
2099         } else {
2100                 lapic_write_icr_lo(icrlo);
2101         }
2102 }
2103
2104 #ifdef DETECT_DEADLOCK
2105 #define AFTER_SPIN      50
2106 #endif
2107
2108 static void
2109 native_lapic_ipi_vectored(u_int vector, int dest)
2110 {
2111         register_t icrlo, destfield;
2112
2113         KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
2114             ("%s: invalid vector %d", __func__, vector));
2115
2116         destfield = 0;
2117         switch (dest) {
2118         case APIC_IPI_DEST_SELF:
2119                 if (x2apic_mode && vector < IPI_NMI_FIRST) {
2120                         lapic_write_self_ipi(vector);
2121                         return;
2122                 }
2123                 icrlo = APIC_DEST_SELF;
2124                 break;
2125         case APIC_IPI_DEST_ALL:
2126                 icrlo = APIC_DEST_ALLISELF;
2127                 break;
2128         case APIC_IPI_DEST_OTHERS:
2129                 icrlo = APIC_DEST_ALLESELF;
2130                 break;
2131         default:
2132                 icrlo = 0;
2133                 KASSERT(x2apic_mode ||
2134                     (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
2135                     ("%s: invalid destination 0x%x", __func__, dest));
2136                 destfield = dest;
2137         }
2138
2139         /*
2140          * NMI IPIs are just fake vectors used to send a NMI.  Use special rules
2141          * regarding NMIs if passed, otherwise specify the vector.
2142          */
2143         if (vector >= IPI_NMI_FIRST)
2144                 icrlo |= APIC_DELMODE_NMI;
2145         else
2146                 icrlo |= vector | APIC_DELMODE_FIXED;
2147         icrlo |= APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT;
2148
2149         /* Wait for an earlier IPI to finish. */
2150         if (!lapic_ipi_wait(lapic_ds_idle_timeout)) {
2151                 if (KERNEL_PANICKED())
2152                         return;
2153                 else
2154                         panic("APIC: Previous IPI is stuck");
2155         }
2156
2157         lapic_ipi_raw(icrlo, destfield);
2158
2159 #ifdef DETECT_DEADLOCK
2160         /* Wait for IPI to be delivered. */
2161         if (!lapic_ipi_wait(AFTER_SPIN)) {
2162 #ifdef needsattention
2163                 /*
2164                  * XXX FIXME:
2165                  *
2166                  * The above function waits for the message to actually be
2167                  * delivered.  It breaks out after an arbitrary timeout
2168                  * since the message should eventually be delivered (at
2169                  * least in theory) and that if it wasn't we would catch
2170                  * the failure with the check above when the next IPI is
2171                  * sent.
2172                  *
2173                  * We could skip this wait entirely, EXCEPT it probably
2174                  * protects us from other routines that assume that the
2175                  * message was delivered and acted upon when this function
2176                  * returns.
2177                  */
2178                 printf("APIC: IPI might be stuck\n");
2179 #else /* !needsattention */
2180                 /* Wait until mesage is sent without a timeout. */
2181                 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND)
2182                         ia32_pause();
2183 #endif /* needsattention */
2184         }
2185 #endif /* DETECT_DEADLOCK */
2186 }
2187
2188 #endif /* SMP */
2189
2190 /*
2191  * Since the IDT is shared by all CPUs the IPI slot update needs to be globally
2192  * visible.
2193  *
2194  * Consider the case where an IPI is generated immediately after allocation:
2195  *     vector = lapic_ipi_alloc(ipifunc);
2196  *     ipi_selected(other_cpus, vector);
2197  *
2198  * In xAPIC mode a write to ICR_LO has serializing semantics because the
2199  * APIC page is mapped as an uncached region. In x2APIC mode there is an
2200  * explicit 'mfence' before the ICR MSR is written. Therefore in both cases
2201  * the IDT slot update is globally visible before the IPI is delivered.
2202  */
2203 static int
2204 native_lapic_ipi_alloc(inthand_t *ipifunc)
2205 {
2206         struct gate_descriptor *ip;
2207         long func;
2208         int idx, vector;
2209
2210         KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti),
2211             ("invalid ipifunc %p", ipifunc));
2212
2213         vector = -1;
2214         mtx_lock_spin(&icu_lock);
2215         for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) {
2216                 ip = &idt[idx];
2217                 func = (ip->gd_hioffset << 16) | ip->gd_looffset;
2218 #ifdef __i386__
2219                 func -= setidt_disp;
2220 #endif
2221                 if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) ||
2222                     (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) {
2223                         vector = idx;
2224                         setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC);
2225                         break;
2226                 }
2227         }
2228         mtx_unlock_spin(&icu_lock);
2229         return (vector);
2230 }
2231
2232 static void
2233 native_lapic_ipi_free(int vector)
2234 {
2235         struct gate_descriptor *ip;
2236         long func;
2237
2238         KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST,
2239             ("%s: invalid vector %d", __func__, vector));
2240
2241         mtx_lock_spin(&icu_lock);
2242         ip = &idt[vector];
2243         func = (ip->gd_hioffset << 16) | ip->gd_looffset;
2244 #ifdef __i386__
2245         func -= setidt_disp;
2246 #endif
2247         KASSERT(func != (uintptr_t)&IDTVEC(rsvd) &&
2248             func != (uintptr_t)&IDTVEC(rsvd_pti),
2249             ("invalid idtfunc %#lx", func));
2250         setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
2251             SEL_KPL, GSEL_APIC);
2252         mtx_unlock_spin(&icu_lock);
2253 }