]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/x86/x86/local_apic.c
x86: clean up empty lines in .c and .h files
[FreeBSD/FreeBSD.git] / sys / x86 / x86 / local_apic.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1996, by Steve Passe
5  * All rights reserved.
6  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. The name of the developer may NOT be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  * 3. Neither the name of the author nor the names of any co-contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 /*
33  * Local APIC support on Pentium and later processors.
34  */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 #include "opt_atpic.h"
40 #include "opt_hwpmc_hooks.h"
41
42 #include "opt_ddb.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/bus.h>
47 #include <sys/kernel.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/mutex.h>
51 #include <sys/pcpu.h>
52 #include <sys/proc.h>
53 #include <sys/sched.h>
54 #include <sys/smp.h>
55 #include <sys/sysctl.h>
56 #include <sys/timeet.h>
57
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60
61 #include <x86/apicreg.h>
62 #include <machine/clock.h>
63 #include <machine/cpufunc.h>
64 #include <machine/cputypes.h>
65 #include <machine/frame.h>
66 #include <machine/intr_machdep.h>
67 #include <x86/apicvar.h>
68 #include <x86/mca.h>
69 #include <machine/md_var.h>
70 #include <machine/smp.h>
71 #include <machine/specialreg.h>
72 #include <x86/init.h>
73
74 #ifdef DDB
75 #include <sys/interrupt.h>
76 #include <ddb/ddb.h>
77 #endif
78
79 #ifdef __amd64__
80 #define SDT_APIC        SDT_SYSIGT
81 #define GSEL_APIC       0
82 #else
83 #define SDT_APIC        SDT_SYS386IGT
84 #define GSEL_APIC       GSEL(GCODE_SEL, SEL_KPL)
85 #endif
86
87 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items");
88
89 /* Sanity checks on IDT vectors. */
90 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT);
91 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS);
92 CTASSERT(APIC_LOCAL_INTS == 240);
93 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
94
95 /*
96  * I/O interrupts use non-negative IRQ values.  These values are used
97  * to mark unused IDT entries or IDT entries reserved for a non-I/O
98  * interrupt.
99  */
100 #define IRQ_FREE        -1
101 #define IRQ_TIMER       -2
102 #define IRQ_SYSCALL     -3
103 #define IRQ_DTRACE_RET  -4
104 #define IRQ_EVTCHN      -5
105
106 enum lat_timer_mode {
107         LAT_MODE_UNDEF =        0,
108         LAT_MODE_PERIODIC =     1,
109         LAT_MODE_ONESHOT =      2,
110         LAT_MODE_DEADLINE =     3,
111 };
112
113 /*
114  * Support for local APICs.  Local APICs manage interrupts on each
115  * individual processor as opposed to I/O APICs which receive interrupts
116  * from I/O devices and then forward them on to the local APICs.
117  *
118  * Local APICs can also send interrupts to each other thus providing the
119  * mechanism for IPIs.
120  */
121
122 struct lvt {
123         u_int lvt_edgetrigger:1;
124         u_int lvt_activehi:1;
125         u_int lvt_masked:1;
126         u_int lvt_active:1;
127         u_int lvt_mode:16;
128         u_int lvt_vector:8;
129 };
130
131 struct lapic {
132         struct lvt la_lvts[APIC_LVT_MAX + 1];
133         struct lvt la_elvts[APIC_ELVT_MAX + 1];
134         u_int la_id:8;
135         u_int la_cluster:4;
136         u_int la_cluster_id:2;
137         u_int la_present:1;
138         u_long *la_timer_count;
139         uint64_t la_timer_period;
140         enum lat_timer_mode la_timer_mode;
141         uint32_t lvt_timer_base;
142         uint32_t lvt_timer_last;
143         /* Include IDT_SYSCALL to make indexing easier. */
144         int la_ioint_irqs[APIC_NUM_IOINTS + 1];
145 } static *lapics;
146
147 /* Global defaults for local APIC LVT entries. */
148 static struct lvt lvts[APIC_LVT_MAX + 1] = {
149         { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 },  /* LINT0: masked ExtINT */
150         { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },     /* LINT1: NMI */
151         { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT },      /* Timer */
152         { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT },      /* Error */
153         { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 },     /* PMC */
154         { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT },    /* Thermal */
155         { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT },        /* CMCI */
156 };
157
158 /* Global defaults for AMD local APIC ELVT entries. */
159 static struct lvt elvts[APIC_ELVT_MAX + 1] = {
160         { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
161         { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT },
162         { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
163         { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
164 };
165
166 static inthand_t *ioint_handlers[] = {
167         NULL,                   /* 0 - 31 */
168         IDTVEC(apic_isr1),      /* 32 - 63 */
169         IDTVEC(apic_isr2),      /* 64 - 95 */
170         IDTVEC(apic_isr3),      /* 96 - 127 */
171         IDTVEC(apic_isr4),      /* 128 - 159 */
172         IDTVEC(apic_isr5),      /* 160 - 191 */
173         IDTVEC(apic_isr6),      /* 192 - 223 */
174         IDTVEC(apic_isr7),      /* 224 - 255 */
175 };
176
177 static inthand_t *ioint_pti_handlers[] = {
178         NULL,                   /* 0 - 31 */
179         IDTVEC(apic_isr1_pti),  /* 32 - 63 */
180         IDTVEC(apic_isr2_pti),  /* 64 - 95 */
181         IDTVEC(apic_isr3_pti),  /* 96 - 127 */
182         IDTVEC(apic_isr4_pti),  /* 128 - 159 */
183         IDTVEC(apic_isr5_pti),  /* 160 - 191 */
184         IDTVEC(apic_isr6_pti),  /* 192 - 223 */
185         IDTVEC(apic_isr7_pti),  /* 224 - 255 */
186 };
187
188 static u_int32_t lapic_timer_divisors[] = {
189         APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
190         APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
191 };
192
193 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd);
194
195 volatile char *lapic_map;
196 vm_paddr_t lapic_paddr;
197 int x2apic_mode;
198 int lapic_eoi_suppression;
199 static int lapic_timer_tsc_deadline;
200 static u_long lapic_timer_divisor, count_freq;
201 static struct eventtimer lapic_et;
202 #ifdef SMP
203 static uint64_t lapic_ipi_wait_mult;
204 #endif
205 unsigned int max_apic_id;
206
207 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
208     "APIC options");
209 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, "");
210 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD,
211     &lapic_eoi_suppression, 0, "");
212 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD,
213     &lapic_timer_tsc_deadline, 0, "");
214
215 static void lapic_calibrate_initcount(struct lapic *la);
216 static void lapic_calibrate_deadline(struct lapic *la);
217
218 static uint32_t
219 lapic_read32(enum LAPIC_REGISTERS reg)
220 {
221         uint32_t res;
222
223         if (x2apic_mode) {
224                 res = rdmsr32(MSR_APIC_000 + reg);
225         } else {
226                 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL);
227         }
228         return (res);
229 }
230
231 static void
232 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val)
233 {
234
235         if (x2apic_mode) {
236                 mfence();
237                 lfence();
238                 wrmsr(MSR_APIC_000 + reg, val);
239         } else {
240                 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
241         }
242 }
243
244 static void
245 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val)
246 {
247
248         if (x2apic_mode) {
249                 wrmsr(MSR_APIC_000 + reg, val);
250         } else {
251                 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
252         }
253 }
254
255 #ifdef SMP
256 static uint64_t
257 lapic_read_icr_lo(void)
258 {
259
260         return (lapic_read32(LAPIC_ICR_LO));
261 }
262
263 static void
264 lapic_write_icr(uint32_t vhi, uint32_t vlo)
265 {
266         register_t saveintr;
267         uint64_t v;
268
269         if (x2apic_mode) {
270                 v = ((uint64_t)vhi << 32) | vlo;
271                 mfence();
272                 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v);
273         } else {
274                 saveintr = intr_disable();
275                 lapic_write32(LAPIC_ICR_HI, vhi);
276                 lapic_write32(LAPIC_ICR_LO, vlo);
277                 intr_restore(saveintr);
278         }
279 }
280
281 static void
282 lapic_write_icr_lo(uint32_t vlo)
283 {
284
285         if (x2apic_mode) {
286                 mfence();
287                 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, vlo);
288         } else {
289                 lapic_write32(LAPIC_ICR_LO, vlo);
290         }
291 }
292
293 static void
294 lapic_write_self_ipi(uint32_t vector)
295 {
296
297         KASSERT(x2apic_mode, ("SELF IPI write in xAPIC mode"));
298         wrmsr(MSR_APIC_000 + LAPIC_SELF_IPI, vector);
299 }
300 #endif /* SMP */
301
302 static void
303 native_lapic_enable_x2apic(void)
304 {
305         uint64_t apic_base;
306
307         apic_base = rdmsr(MSR_APICBASE);
308         apic_base |= APICBASE_X2APIC | APICBASE_ENABLED;
309         wrmsr(MSR_APICBASE, apic_base);
310 }
311
312 static bool
313 native_lapic_is_x2apic(void)
314 {
315         uint64_t apic_base;
316
317         apic_base = rdmsr(MSR_APICBASE);
318         return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) ==
319             (APICBASE_X2APIC | APICBASE_ENABLED));
320 }
321
322 static void     lapic_enable(void);
323 static void     lapic_resume(struct pic *pic, bool suspend_cancelled);
324 static void     lapic_timer_oneshot(struct lapic *);
325 static void     lapic_timer_oneshot_nointr(struct lapic *, uint32_t);
326 static void     lapic_timer_periodic(struct lapic *);
327 static void     lapic_timer_deadline(struct lapic *);
328 static void     lapic_timer_stop(struct lapic *);
329 static void     lapic_timer_set_divisor(u_int divisor);
330 static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value);
331 static int      lapic_et_start(struct eventtimer *et,
332                     sbintime_t first, sbintime_t period);
333 static int      lapic_et_stop(struct eventtimer *et);
334 static u_int    apic_idt_to_irq(u_int apic_id, u_int vector);
335 static void     lapic_set_tpr(u_int vector);
336
337 struct pic lapic_pic = { .pic_resume = lapic_resume };
338
339 /* Forward declarations for apic_ops */
340 static void     native_lapic_create(u_int apic_id, int boot_cpu);
341 static void     native_lapic_init(vm_paddr_t addr);
342 static void     native_lapic_xapic_mode(void);
343 static void     native_lapic_setup(int boot);
344 static void     native_lapic_dump(const char *str);
345 static void     native_lapic_disable(void);
346 static void     native_lapic_eoi(void);
347 static int      native_lapic_id(void);
348 static int      native_lapic_intr_pending(u_int vector);
349 static u_int    native_apic_cpuid(u_int apic_id);
350 static u_int    native_apic_alloc_vector(u_int apic_id, u_int irq);
351 static u_int    native_apic_alloc_vectors(u_int apic_id, u_int *irqs,
352                     u_int count, u_int align);
353 static void     native_apic_disable_vector(u_int apic_id, u_int vector);
354 static void     native_apic_enable_vector(u_int apic_id, u_int vector);
355 static void     native_apic_free_vector(u_int apic_id, u_int vector, u_int irq);
356 static void     native_lapic_set_logical_id(u_int apic_id, u_int cluster,
357                     u_int cluster_id);
358 static int      native_lapic_enable_pmc(void);
359 static void     native_lapic_disable_pmc(void);
360 static void     native_lapic_reenable_pmc(void);
361 static void     native_lapic_enable_cmc(void);
362 static int      native_lapic_enable_mca_elvt(void);
363 static int      native_lapic_set_lvt_mask(u_int apic_id, u_int lvt,
364                     u_char masked);
365 static int      native_lapic_set_lvt_mode(u_int apic_id, u_int lvt,
366                     uint32_t mode);
367 static int      native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt,
368                     enum intr_polarity pol);
369 static int      native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt,
370                     enum intr_trigger trigger);
371 #ifdef SMP
372 static void     native_lapic_ipi_raw(register_t icrlo, u_int dest);
373 static void     native_lapic_ipi_vectored(u_int vector, int dest);
374 static int      native_lapic_ipi_wait(int delay);
375 #endif /* SMP */
376 static int      native_lapic_ipi_alloc(inthand_t *ipifunc);
377 static void     native_lapic_ipi_free(int vector);
378
379 struct apic_ops apic_ops = {
380         .create                 = native_lapic_create,
381         .init                   = native_lapic_init,
382         .xapic_mode             = native_lapic_xapic_mode,
383         .is_x2apic              = native_lapic_is_x2apic,
384         .setup                  = native_lapic_setup,
385         .dump                   = native_lapic_dump,
386         .disable                = native_lapic_disable,
387         .eoi                    = native_lapic_eoi,
388         .id                     = native_lapic_id,
389         .intr_pending           = native_lapic_intr_pending,
390         .set_logical_id         = native_lapic_set_logical_id,
391         .cpuid                  = native_apic_cpuid,
392         .alloc_vector           = native_apic_alloc_vector,
393         .alloc_vectors          = native_apic_alloc_vectors,
394         .enable_vector          = native_apic_enable_vector,
395         .disable_vector         = native_apic_disable_vector,
396         .free_vector            = native_apic_free_vector,
397         .enable_pmc             = native_lapic_enable_pmc,
398         .disable_pmc            = native_lapic_disable_pmc,
399         .reenable_pmc           = native_lapic_reenable_pmc,
400         .enable_cmc             = native_lapic_enable_cmc,
401         .enable_mca_elvt        = native_lapic_enable_mca_elvt,
402 #ifdef SMP
403         .ipi_raw                = native_lapic_ipi_raw,
404         .ipi_vectored           = native_lapic_ipi_vectored,
405         .ipi_wait               = native_lapic_ipi_wait,
406 #endif
407         .ipi_alloc              = native_lapic_ipi_alloc,
408         .ipi_free               = native_lapic_ipi_free,
409         .set_lvt_mask           = native_lapic_set_lvt_mask,
410         .set_lvt_mode           = native_lapic_set_lvt_mode,
411         .set_lvt_polarity       = native_lapic_set_lvt_polarity,
412         .set_lvt_triggermode    = native_lapic_set_lvt_triggermode,
413 };
414
415 static uint32_t
416 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value)
417 {
418
419         value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
420             APIC_LVT_VECTOR);
421         if (lvt->lvt_edgetrigger == 0)
422                 value |= APIC_LVT_TM;
423         if (lvt->lvt_activehi == 0)
424                 value |= APIC_LVT_IIPP_INTALO;
425         if (lvt->lvt_masked)
426                 value |= APIC_LVT_M;
427         value |= lvt->lvt_mode;
428         switch (lvt->lvt_mode) {
429         case APIC_LVT_DM_NMI:
430         case APIC_LVT_DM_SMI:
431         case APIC_LVT_DM_INIT:
432         case APIC_LVT_DM_EXTINT:
433                 if (!lvt->lvt_edgetrigger && bootverbose) {
434                         printf("lapic%u: Forcing LINT%u to edge trigger\n",
435                             la->la_id, pin);
436                         value &= ~APIC_LVT_TM;
437                 }
438                 /* Use a vector of 0. */
439                 break;
440         case APIC_LVT_DM_FIXED:
441                 value |= lvt->lvt_vector;
442                 break;
443         default:
444                 panic("bad APIC LVT delivery mode: %#x\n", value);
445         }
446         return (value);
447 }
448
449 static uint32_t
450 lvt_mode(struct lapic *la, u_int pin, uint32_t value)
451 {
452         struct lvt *lvt;
453
454         KASSERT(pin <= APIC_LVT_MAX,
455             ("%s: pin %u out of range", __func__, pin));
456         if (la->la_lvts[pin].lvt_active)
457                 lvt = &la->la_lvts[pin];
458         else
459                 lvt = &lvts[pin];
460
461         return (lvt_mode_impl(la, lvt, pin, value));
462 }
463
464 static uint32_t
465 elvt_mode(struct lapic *la, u_int idx, uint32_t value)
466 {
467         struct lvt *elvt;
468
469         KASSERT(idx <= APIC_ELVT_MAX,
470             ("%s: idx %u out of range", __func__, idx));
471
472         elvt = &la->la_elvts[idx];
473         KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx));
474         KASSERT(elvt->lvt_edgetrigger,
475             ("%s: ELVT%u is not edge triggered", __func__, idx));
476         KASSERT(elvt->lvt_activehi,
477             ("%s: ELVT%u is not active high", __func__, idx));
478         return (lvt_mode_impl(la, elvt, idx, value));
479 }
480
481 /*
482  * Map the local APIC and setup necessary interrupt vectors.
483  */
484 static void
485 native_lapic_init(vm_paddr_t addr)
486 {
487 #ifdef SMP
488         uint64_t r, r1, r2, rx;
489 #endif
490         uint32_t ver;
491         int i;
492         bool arat;
493
494         /*
495          * Enable x2APIC mode if possible. Map the local APIC
496          * registers page.
497          *
498          * Keep the LAPIC registers page mapped uncached for x2APIC
499          * mode too, to have direct map page attribute set to
500          * uncached.  This is needed to work around CPU errata present
501          * on all Intel processors.
502          */
503         KASSERT(trunc_page(addr) == addr,
504             ("local APIC not aligned on a page boundary"));
505         lapic_paddr = addr;
506         lapic_map = pmap_mapdev(addr, PAGE_SIZE);
507         if (x2apic_mode) {
508                 native_lapic_enable_x2apic();
509                 lapic_map = NULL;
510         }
511
512         /* Setup the spurious interrupt handler. */
513         setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL,
514             GSEL_APIC);
515
516         /* Perform basic initialization of the BSP's local APIC. */
517         lapic_enable();
518
519         /* Set BSP's per-CPU local APIC ID. */
520         PCPU_SET(apic_id, lapic_id());
521
522         /* Local APIC timer interrupt. */
523         setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint),
524             SDT_APIC, SEL_KPL, GSEL_APIC);
525
526         /* Local APIC error interrupt. */
527         setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint),
528             SDT_APIC, SEL_KPL, GSEL_APIC);
529
530         /* XXX: Thermal interrupt */
531
532         /* Local APIC CMCI. */
533         setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint),
534             SDT_APIC, SEL_KPL, GSEL_APIC);
535
536         if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
537                 /* Set if APIC timer runs in C3. */
538                 arat = (cpu_power_eax & CPUTPM1_ARAT);
539
540                 bzero(&lapic_et, sizeof(lapic_et));
541                 lapic_et.et_name = "LAPIC";
542                 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
543                     ET_FLAGS_PERCPU;
544                 lapic_et.et_quality = 600;
545                 if (!arat) {
546                         lapic_et.et_flags |= ET_FLAGS_C3STOP;
547                         lapic_et.et_quality = 100;
548                 }
549                 if ((cpu_feature & CPUID_TSC) != 0 &&
550                     (cpu_feature2 & CPUID2_TSCDLT) != 0 &&
551                     tsc_is_invariant && tsc_freq != 0) {
552                         lapic_timer_tsc_deadline = 1;
553                         TUNABLE_INT_FETCH("hw.lapic_tsc_deadline",
554                             &lapic_timer_tsc_deadline);
555                 }
556
557                 lapic_et.et_frequency = 0;
558                 /* We don't know frequency yet, so trying to guess. */
559                 lapic_et.et_min_period = 0x00001000LL;
560                 lapic_et.et_max_period = SBT_1S;
561                 lapic_et.et_start = lapic_et_start;
562                 lapic_et.et_stop = lapic_et_stop;
563                 lapic_et.et_priv = NULL;
564                 et_register(&lapic_et);
565         }
566
567         /*
568          * Set lapic_eoi_suppression after lapic_enable(), to not
569          * enable suppression in the hardware prematurely.  Note that
570          * we by default enable suppression even when system only has
571          * one IO-APIC, since EOI is broadcasted to all APIC agents,
572          * including CPUs, otherwise.
573          *
574          * It seems that at least some KVM versions report
575          * EOI_SUPPRESSION bit, but auto-EOI does not work.
576          */
577         ver = lapic_read32(LAPIC_VERSION);
578         if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) {
579                 lapic_eoi_suppression = 1;
580                 if (vm_guest == VM_GUEST_KVM) {
581                         if (bootverbose)
582                                 printf(
583                        "KVM -- disabling lapic eoi suppression\n");
584                         lapic_eoi_suppression = 0;
585                 }
586                 TUNABLE_INT_FETCH("hw.lapic_eoi_suppression",
587                     &lapic_eoi_suppression);
588         }
589
590 #ifdef SMP
591 #define LOOPS   100000
592         /*
593          * Calibrate the busy loop waiting for IPI ack in xAPIC mode.
594          * lapic_ipi_wait_mult contains the number of iterations which
595          * approximately delay execution for 1 microsecond (the
596          * argument to native_lapic_ipi_wait() is in microseconds).
597          *
598          * We assume that TSC is present and already measured.
599          * Possible TSC frequency jumps are irrelevant to the
600          * calibration loop below, the CPU clock management code is
601          * not yet started, and we do not enter sleep states.
602          */
603         KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0,
604             ("TSC not initialized"));
605         if (!x2apic_mode) {
606                 r = rdtsc();
607                 for (rx = 0; rx < LOOPS; rx++) {
608                         (void)lapic_read_icr_lo();
609                         ia32_pause();
610                 }
611                 r = rdtsc() - r;
612                 r1 = tsc_freq * LOOPS;
613                 r2 = r * 1000000;
614                 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1;
615                 if (bootverbose) {
616                         printf("LAPIC: ipi_wait() us multiplier %ju (r %ju "
617                             "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult,
618                             (uintmax_t)r, (uintmax_t)tsc_freq);
619                 }
620         }
621 #undef LOOPS
622 #endif /* SMP */
623 }
624
625 /*
626  * Create a local APIC instance.
627  */
628 static void
629 native_lapic_create(u_int apic_id, int boot_cpu)
630 {
631         int i;
632
633         if (apic_id > max_apic_id) {
634                 printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
635                 if (boot_cpu)
636                         panic("Can't ignore BSP");
637                 return;
638         }
639         KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
640             apic_id));
641
642         /*
643          * Assume no local LVT overrides and a cluster of 0 and
644          * intra-cluster ID of 0.
645          */
646         lapics[apic_id].la_present = 1;
647         lapics[apic_id].la_id = apic_id;
648         for (i = 0; i <= APIC_LVT_MAX; i++) {
649                 lapics[apic_id].la_lvts[i] = lvts[i];
650                 lapics[apic_id].la_lvts[i].lvt_active = 0;
651         }
652         for (i = 0; i <= APIC_ELVT_MAX; i++) {
653                 lapics[apic_id].la_elvts[i] = elvts[i];
654                 lapics[apic_id].la_elvts[i].lvt_active = 0;
655         }
656         for (i = 0; i <= APIC_NUM_IOINTS; i++)
657             lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE;
658         lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
659         lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
660             IRQ_TIMER;
661 #ifdef KDTRACE_HOOKS
662         lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] =
663             IRQ_DTRACE_RET;
664 #endif
665 #ifdef XENHVM
666         lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN;
667 #endif
668
669 #ifdef SMP
670         cpu_add(apic_id, boot_cpu);
671 #endif
672 }
673
674 static inline uint32_t
675 amd_read_ext_features(void)
676 {
677         uint32_t version;
678
679         if (cpu_vendor_id != CPU_VENDOR_AMD &&
680             cpu_vendor_id != CPU_VENDOR_HYGON)
681                 return (0);
682         version = lapic_read32(LAPIC_VERSION);
683         if ((version & APIC_VER_AMD_EXT_SPACE) != 0)
684                 return (lapic_read32(LAPIC_EXT_FEATURES));
685         else
686                 return (0);
687 }
688
689 static inline uint32_t
690 amd_read_elvt_count(void)
691 {
692         uint32_t extf;
693         uint32_t count;
694
695         extf = amd_read_ext_features();
696         count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT;
697         count = min(count, APIC_ELVT_MAX + 1);
698         return (count);
699 }
700
701 /*
702  * Dump contents of local APIC registers
703  */
704 static void
705 native_lapic_dump(const char* str)
706 {
707         uint32_t version;
708         uint32_t maxlvt;
709         uint32_t extf;
710         int elvt_count;
711         int i;
712
713         version = lapic_read32(LAPIC_VERSION);
714         maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
715         printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
716         printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x",
717             lapic_read32(LAPIC_ID), version,
718             lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR));
719         if ((cpu_feature2 & CPUID2_X2APIC) != 0)
720                 printf(" x2APIC: %d", x2apic_mode);
721         printf("\n  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
722             lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1),
723             lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR));
724         printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x",
725             lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL),
726             lapic_read32(LAPIC_LVT_ERROR));
727         if (maxlvt >= APIC_LVT_PMC)
728                 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT));
729         printf("\n");
730         if (maxlvt >= APIC_LVT_CMCI)
731                 printf("   cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI));
732         extf = amd_read_ext_features();
733         if (extf != 0) {
734                 printf("   AMD ext features: 0x%08x\n", extf);
735                 elvt_count = amd_read_elvt_count();
736                 for (i = 0; i < elvt_count; i++)
737                         printf("   AMD elvt%d: 0x%08x\n", i,
738                             lapic_read32(LAPIC_EXT_LVT0 + i));
739         }
740 }
741
742 static void
743 native_lapic_xapic_mode(void)
744 {
745         register_t saveintr;
746
747         saveintr = intr_disable();
748         if (x2apic_mode)
749                 native_lapic_enable_x2apic();
750         intr_restore(saveintr);
751 }
752
753 static void
754 native_lapic_setup(int boot)
755 {
756         struct lapic *la;
757         uint32_t version;
758         uint32_t maxlvt;
759         register_t saveintr;
760         int elvt_count;
761         int i;
762
763         saveintr = intr_disable();
764
765         la = &lapics[lapic_id()];
766         KASSERT(la->la_present, ("missing APIC structure"));
767         version = lapic_read32(LAPIC_VERSION);
768         maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
769
770         /* Initialize the TPR to allow all interrupts. */
771         lapic_set_tpr(0);
772
773         /* Setup spurious vector and enable the local APIC. */
774         lapic_enable();
775
776         /* Program LINT[01] LVT entries. */
777         lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0,
778             lapic_read32(LAPIC_LVT_LINT0)));
779         lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1,
780             lapic_read32(LAPIC_LVT_LINT1)));
781
782         /* Program the PMC LVT entry if present. */
783         if (maxlvt >= APIC_LVT_PMC) {
784                 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
785                     LAPIC_LVT_PCINT));
786         }
787
788         /* Program timer LVT. */
789         la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER,
790             lapic_read32(LAPIC_LVT_TIMER));
791         la->lvt_timer_last = la->lvt_timer_base;
792         lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base);
793
794         /* Calibrate the timer parameters using BSP. */
795         if (boot && IS_BSP()) {
796                 lapic_calibrate_initcount(la);
797                 if (lapic_timer_tsc_deadline)
798                         lapic_calibrate_deadline(la);
799         }
800
801         /* Setup the timer if configured. */
802         if (la->la_timer_mode != LAT_MODE_UNDEF) {
803                 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor",
804                     lapic_id()));
805                 switch (la->la_timer_mode) {
806                 case LAT_MODE_PERIODIC:
807                         lapic_timer_set_divisor(lapic_timer_divisor);
808                         lapic_timer_periodic(la);
809                         break;
810                 case LAT_MODE_ONESHOT:
811                         lapic_timer_set_divisor(lapic_timer_divisor);
812                         lapic_timer_oneshot(la);
813                         break;
814                 case LAT_MODE_DEADLINE:
815                         lapic_timer_deadline(la);
816                         break;
817                 default:
818                         panic("corrupted la_timer_mode %p %d", la,
819                             la->la_timer_mode);
820                 }
821         }
822
823         /* Program error LVT and clear any existing errors. */
824         lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR,
825             lapic_read32(LAPIC_LVT_ERROR)));
826         lapic_write32(LAPIC_ESR, 0);
827
828         /* XXX: Thermal LVT */
829
830         /* Program the CMCI LVT entry if present. */
831         if (maxlvt >= APIC_LVT_CMCI) {
832                 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI,
833                     lapic_read32(LAPIC_LVT_CMCI)));
834         }
835
836         elvt_count = amd_read_elvt_count();
837         for (i = 0; i < elvt_count; i++) {
838                 if (la->la_elvts[i].lvt_active)
839                         lapic_write32(LAPIC_EXT_LVT0 + i,
840                             elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i)));
841         }
842
843         intr_restore(saveintr);
844 }
845
846 static void
847 native_lapic_intrcnt(void *dummy __unused)
848 {
849         struct pcpu *pc;
850         struct lapic *la;
851         char buf[MAXCOMLEN + 1];
852
853         /* If there are no APICs, skip this function. */
854         if (lapics == NULL)
855                 return;
856
857         STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
858                 la = &lapics[pc->pc_apic_id];
859                 if (!la->la_present)
860                     continue;
861
862                 snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid);
863                 intrcnt_add(buf, &la->la_timer_count);
864         }
865 }
866 SYSINIT(native_lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, native_lapic_intrcnt,
867     NULL);
868
869 static void
870 native_lapic_reenable_pmc(void)
871 {
872 #ifdef HWPMC_HOOKS
873         uint32_t value;
874
875         value = lapic_read32(LAPIC_LVT_PCINT);
876         value &= ~APIC_LVT_M;
877         lapic_write32(LAPIC_LVT_PCINT, value);
878 #endif
879 }
880
881 #ifdef HWPMC_HOOKS
882 static void
883 lapic_update_pmc(void *dummy)
884 {
885         struct lapic *la;
886
887         la = &lapics[lapic_id()];
888         lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
889             lapic_read32(LAPIC_LVT_PCINT)));
890 }
891 #endif
892
893 static int
894 native_lapic_enable_pmc(void)
895 {
896 #ifdef HWPMC_HOOKS
897         u_int32_t maxlvt;
898
899         /* Fail if the local APIC is not present. */
900         if (!x2apic_mode && lapic_map == NULL)
901                 return (0);
902
903         /* Fail if the PMC LVT is not present. */
904         maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
905         if (maxlvt < APIC_LVT_PMC)
906                 return (0);
907
908         lvts[APIC_LVT_PMC].lvt_masked = 0;
909
910 #ifdef EARLY_AP_STARTUP
911         MPASS(mp_ncpus == 1 || smp_started);
912         smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
913 #else
914 #ifdef SMP
915         /*
916          * If hwpmc was loaded at boot time then the APs may not be
917          * started yet.  In that case, don't forward the request to
918          * them as they will program the lvt when they start.
919          */
920         if (smp_started)
921                 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
922         else
923 #endif
924                 lapic_update_pmc(NULL);
925 #endif
926         return (1);
927 #else
928         return (0);
929 #endif
930 }
931
932 static void
933 native_lapic_disable_pmc(void)
934 {
935 #ifdef HWPMC_HOOKS
936         u_int32_t maxlvt;
937
938         /* Fail if the local APIC is not present. */
939         if (!x2apic_mode && lapic_map == NULL)
940                 return;
941
942         /* Fail if the PMC LVT is not present. */
943         maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
944         if (maxlvt < APIC_LVT_PMC)
945                 return;
946
947         lvts[APIC_LVT_PMC].lvt_masked = 1;
948
949 #ifdef SMP
950         /* The APs should always be started when hwpmc is unloaded. */
951         KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early"));
952 #endif
953         smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
954 #endif
955 }
956
957 static void
958 lapic_calibrate_initcount(struct lapic *la)
959 {
960         u_long value;
961
962         /* Start off with a divisor of 2 (power on reset default). */
963         lapic_timer_divisor = 2;
964         /* Try to calibrate the local APIC timer. */
965         do {
966                 lapic_timer_set_divisor(lapic_timer_divisor);
967                 lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT);
968                 DELAY(1000000);
969                 value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER);
970                 if (value != APIC_TIMER_MAX_COUNT)
971                         break;
972                 lapic_timer_divisor <<= 1;
973         } while (lapic_timer_divisor <= 128);
974         if (lapic_timer_divisor > 128)
975                 panic("lapic: Divisor too big");
976         if (bootverbose) {
977                 printf("lapic: Divisor %lu, Frequency %lu Hz\n",
978                     lapic_timer_divisor, value);
979         }
980         count_freq = value;
981 }
982
983 static void
984 lapic_calibrate_deadline(struct lapic *la __unused)
985 {
986
987         if (bootverbose) {
988                 printf("lapic: deadline tsc mode, Frequency %ju Hz\n",
989                     (uintmax_t)tsc_freq);
990         }
991 }
992
993 static void
994 lapic_change_mode(struct eventtimer *et, struct lapic *la,
995     enum lat_timer_mode newmode)
996 {
997
998         if (la->la_timer_mode == newmode)
999                 return;
1000         switch (newmode) {
1001         case LAT_MODE_PERIODIC:
1002                 lapic_timer_set_divisor(lapic_timer_divisor);
1003                 et->et_frequency = count_freq;
1004                 break;
1005         case LAT_MODE_DEADLINE:
1006                 et->et_frequency = tsc_freq;
1007                 break;
1008         case LAT_MODE_ONESHOT:
1009                 lapic_timer_set_divisor(lapic_timer_divisor);
1010                 et->et_frequency = count_freq;
1011                 break;
1012         default:
1013                 panic("lapic_change_mode %d", newmode);
1014         }
1015         la->la_timer_mode = newmode;
1016         et->et_min_period = (0x00000002LLU << 32) / et->et_frequency;
1017         et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency;
1018 }
1019
1020 static int
1021 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period)
1022 {
1023         struct lapic *la;
1024
1025         la = &lapics[PCPU_GET(apic_id)];
1026         if (period != 0) {
1027                 lapic_change_mode(et, la, LAT_MODE_PERIODIC);
1028                 la->la_timer_period = ((uint32_t)et->et_frequency * period) >>
1029                     32;
1030                 lapic_timer_periodic(la);
1031         } else if (lapic_timer_tsc_deadline) {
1032                 lapic_change_mode(et, la, LAT_MODE_DEADLINE);
1033                 la->la_timer_period = (et->et_frequency * first) >> 32;
1034                 lapic_timer_deadline(la);
1035         } else {
1036                 lapic_change_mode(et, la, LAT_MODE_ONESHOT);
1037                 la->la_timer_period = ((uint32_t)et->et_frequency * first) >>
1038                     32;
1039                 lapic_timer_oneshot(la);
1040         }
1041         return (0);
1042 }
1043
1044 static int
1045 lapic_et_stop(struct eventtimer *et)
1046 {
1047         struct lapic *la;
1048
1049         la = &lapics[PCPU_GET(apic_id)];
1050         lapic_timer_stop(la);
1051         la->la_timer_mode = LAT_MODE_UNDEF;
1052         return (0);
1053 }
1054
1055 static void
1056 native_lapic_disable(void)
1057 {
1058         uint32_t value;
1059
1060         /* Software disable the local APIC. */
1061         value = lapic_read32(LAPIC_SVR);
1062         value &= ~APIC_SVR_SWEN;
1063         lapic_write32(LAPIC_SVR, value);
1064 }
1065
1066 static void
1067 lapic_enable(void)
1068 {
1069         uint32_t value;
1070
1071         /* Program the spurious vector to enable the local APIC. */
1072         value = lapic_read32(LAPIC_SVR);
1073         value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
1074         value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT;
1075         if (lapic_eoi_suppression)
1076                 value |= APIC_SVR_EOI_SUPPRESSION;
1077         lapic_write32(LAPIC_SVR, value);
1078 }
1079
1080 /* Reset the local APIC on the BSP during resume. */
1081 static void
1082 lapic_resume(struct pic *pic, bool suspend_cancelled)
1083 {
1084
1085         lapic_setup(0);
1086 }
1087
1088 static int
1089 native_lapic_id(void)
1090 {
1091         uint32_t v;
1092
1093         KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped"));
1094         v = lapic_read32(LAPIC_ID);
1095         if (!x2apic_mode)
1096                 v >>= APIC_ID_SHIFT;
1097         return (v);
1098 }
1099
1100 static int
1101 native_lapic_intr_pending(u_int vector)
1102 {
1103         uint32_t irr;
1104
1105         /*
1106          * The IRR registers are an array of registers each of which
1107          * only describes 32 interrupts in the low 32 bits.  Thus, we
1108          * divide the vector by 32 to get the register index.
1109          * Finally, we modulus the vector by 32 to determine the
1110          * individual bit to test.
1111          */
1112         irr = lapic_read32(LAPIC_IRR0 + vector / 32);
1113         return (irr & 1 << (vector % 32));
1114 }
1115
1116 static void
1117 native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
1118 {
1119         struct lapic *la;
1120
1121         KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
1122             __func__, apic_id));
1123         KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
1124             __func__, cluster));
1125         KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
1126             ("%s: intra cluster id %u too big", __func__, cluster_id));
1127         la = &lapics[apic_id];
1128         la->la_cluster = cluster;
1129         la->la_cluster_id = cluster_id;
1130 }
1131
1132 static int
1133 native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
1134 {
1135
1136         if (pin > APIC_LVT_MAX)
1137                 return (EINVAL);
1138         if (apic_id == APIC_ID_ALL) {
1139                 lvts[pin].lvt_masked = masked;
1140                 if (bootverbose)
1141                         printf("lapic:");
1142         } else {
1143                 KASSERT(lapics[apic_id].la_present,
1144                     ("%s: missing APIC %u", __func__, apic_id));
1145                 lapics[apic_id].la_lvts[pin].lvt_masked = masked;
1146                 lapics[apic_id].la_lvts[pin].lvt_active = 1;
1147                 if (bootverbose)
1148                         printf("lapic%u:", apic_id);
1149         }
1150         if (bootverbose)
1151                 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
1152         return (0);
1153 }
1154
1155 static int
1156 native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
1157 {
1158         struct lvt *lvt;
1159
1160         if (pin > APIC_LVT_MAX)
1161                 return (EINVAL);
1162         if (apic_id == APIC_ID_ALL) {
1163                 lvt = &lvts[pin];
1164                 if (bootverbose)
1165                         printf("lapic:");
1166         } else {
1167                 KASSERT(lapics[apic_id].la_present,
1168                     ("%s: missing APIC %u", __func__, apic_id));
1169                 lvt = &lapics[apic_id].la_lvts[pin];
1170                 lvt->lvt_active = 1;
1171                 if (bootverbose)
1172                         printf("lapic%u:", apic_id);
1173         }
1174         lvt->lvt_mode = mode;
1175         switch (mode) {
1176         case APIC_LVT_DM_NMI:
1177         case APIC_LVT_DM_SMI:
1178         case APIC_LVT_DM_INIT:
1179         case APIC_LVT_DM_EXTINT:
1180                 lvt->lvt_edgetrigger = 1;
1181                 lvt->lvt_activehi = 1;
1182                 if (mode == APIC_LVT_DM_EXTINT)
1183                         lvt->lvt_masked = 1;
1184                 else
1185                         lvt->lvt_masked = 0;
1186                 break;
1187         default:
1188                 panic("Unsupported delivery mode: 0x%x\n", mode);
1189         }
1190         if (bootverbose) {
1191                 printf(" Routing ");
1192                 switch (mode) {
1193                 case APIC_LVT_DM_NMI:
1194                         printf("NMI");
1195                         break;
1196                 case APIC_LVT_DM_SMI:
1197                         printf("SMI");
1198                         break;
1199                 case APIC_LVT_DM_INIT:
1200                         printf("INIT");
1201                         break;
1202                 case APIC_LVT_DM_EXTINT:
1203                         printf("ExtINT");
1204                         break;
1205                 }
1206                 printf(" -> LINT%u\n", pin);
1207         }
1208         return (0);
1209 }
1210
1211 static int
1212 native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
1213 {
1214
1215         if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM)
1216                 return (EINVAL);
1217         if (apic_id == APIC_ID_ALL) {
1218                 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
1219                 if (bootverbose)
1220                         printf("lapic:");
1221         } else {
1222                 KASSERT(lapics[apic_id].la_present,
1223                     ("%s: missing APIC %u", __func__, apic_id));
1224                 lapics[apic_id].la_lvts[pin].lvt_active = 1;
1225                 lapics[apic_id].la_lvts[pin].lvt_activehi =
1226                     (pol == INTR_POLARITY_HIGH);
1227                 if (bootverbose)
1228                         printf("lapic%u:", apic_id);
1229         }
1230         if (bootverbose)
1231                 printf(" LINT%u polarity: %s\n", pin,
1232                     pol == INTR_POLARITY_HIGH ? "high" : "low");
1233         return (0);
1234 }
1235
1236 static int
1237 native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin,
1238      enum intr_trigger trigger)
1239 {
1240
1241         if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
1242                 return (EINVAL);
1243         if (apic_id == APIC_ID_ALL) {
1244                 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
1245                 if (bootverbose)
1246                         printf("lapic:");
1247         } else {
1248                 KASSERT(lapics[apic_id].la_present,
1249                     ("%s: missing APIC %u", __func__, apic_id));
1250                 lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
1251                     (trigger == INTR_TRIGGER_EDGE);
1252                 lapics[apic_id].la_lvts[pin].lvt_active = 1;
1253                 if (bootverbose)
1254                         printf("lapic%u:", apic_id);
1255         }
1256         if (bootverbose)
1257                 printf(" LINT%u trigger: %s\n", pin,
1258                     trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
1259         return (0);
1260 }
1261
1262 /*
1263  * Adjust the TPR of the current CPU so that it blocks all interrupts below
1264  * the passed in vector.
1265  */
1266 static void
1267 lapic_set_tpr(u_int vector)
1268 {
1269 #ifdef CHEAP_TPR
1270         lapic_write32(LAPIC_TPR, vector);
1271 #else
1272         uint32_t tpr;
1273
1274         tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO;
1275         tpr |= vector;
1276         lapic_write32(LAPIC_TPR, tpr);
1277 #endif
1278 }
1279
1280 static void
1281 native_lapic_eoi(void)
1282 {
1283
1284         lapic_write32_nofence(LAPIC_EOI, 0);
1285 }
1286
1287 void
1288 lapic_handle_intr(int vector, struct trapframe *frame)
1289 {
1290         struct intsrc *isrc;
1291
1292         isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id),
1293             vector));
1294         intr_execute_handlers(isrc, frame);
1295 }
1296
1297 void
1298 lapic_handle_timer(struct trapframe *frame)
1299 {
1300         struct lapic *la;
1301         struct trapframe *oldframe;
1302         struct thread *td;
1303
1304         /* Send EOI first thing. */
1305         lapic_eoi();
1306
1307 #if defined(SMP) && !defined(SCHED_ULE)
1308         /*
1309          * Don't do any accounting for the disabled HTT cores, since it
1310          * will provide misleading numbers for the userland.
1311          *
1312          * No locking is necessary here, since even if we lose the race
1313          * when hlt_cpus_mask changes it is not a big deal, really.
1314          *
1315          * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
1316          * and unlike other schedulers it actually schedules threads to
1317          * those CPUs.
1318          */
1319         if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
1320                 return;
1321 #endif
1322
1323         /* Look up our local APIC structure for the tick counters. */
1324         la = &lapics[PCPU_GET(apic_id)];
1325         (*la->la_timer_count)++;
1326         critical_enter();
1327         if (lapic_et.et_active) {
1328                 td = curthread;
1329                 td->td_intr_nesting_level++;
1330                 oldframe = td->td_intr_frame;
1331                 td->td_intr_frame = frame;
1332                 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg);
1333                 td->td_intr_frame = oldframe;
1334                 td->td_intr_nesting_level--;
1335         }
1336         critical_exit();
1337 }
1338
1339 static void
1340 lapic_timer_set_divisor(u_int divisor)
1341 {
1342
1343         KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
1344         KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors),
1345                 ("lapic: invalid divisor %u", divisor));
1346         lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]);
1347 }
1348
1349 static void
1350 lapic_timer_oneshot(struct lapic *la)
1351 {
1352         uint32_t value;
1353
1354         value = la->lvt_timer_base;
1355         value &= ~(APIC_LVTT_TM | APIC_LVT_M);
1356         value |= APIC_LVTT_TM_ONE_SHOT;
1357         la->lvt_timer_last = value;
1358         lapic_write32(LAPIC_LVT_TIMER, value);
1359         lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period);
1360 }
1361
1362 static void
1363 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count)
1364 {
1365         uint32_t value;
1366
1367         value = la->lvt_timer_base;
1368         value &= ~APIC_LVTT_TM;
1369         value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M;
1370         la->lvt_timer_last = value;
1371         lapic_write32(LAPIC_LVT_TIMER, value);
1372         lapic_write32(LAPIC_ICR_TIMER, count);
1373 }
1374
1375 static void
1376 lapic_timer_periodic(struct lapic *la)
1377 {
1378         uint32_t value;
1379
1380         value = la->lvt_timer_base;
1381         value &= ~(APIC_LVTT_TM | APIC_LVT_M);
1382         value |= APIC_LVTT_TM_PERIODIC;
1383         la->lvt_timer_last = value;
1384         lapic_write32(LAPIC_LVT_TIMER, value);
1385         lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period);
1386 }
1387
1388 static void
1389 lapic_timer_deadline(struct lapic *la)
1390 {
1391         uint32_t value;
1392
1393         value = la->lvt_timer_base;
1394         value &= ~(APIC_LVTT_TM | APIC_LVT_M);
1395         value |= APIC_LVTT_TM_TSCDLT;
1396         if (value != la->lvt_timer_last) {
1397                 la->lvt_timer_last = value;
1398                 lapic_write32_nofence(LAPIC_LVT_TIMER, value);
1399                 if (!x2apic_mode)
1400                         mfence();
1401         }
1402         wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc());
1403 }
1404
1405 static void
1406 lapic_timer_stop(struct lapic *la)
1407 {
1408         uint32_t value;
1409
1410         if (la->la_timer_mode == LAT_MODE_DEADLINE) {
1411                 wrmsr(MSR_TSC_DEADLINE, 0);
1412                 mfence();
1413         } else {
1414                 value = la->lvt_timer_base;
1415                 value &= ~APIC_LVTT_TM;
1416                 value |= APIC_LVT_M;
1417                 la->lvt_timer_last = value;
1418                 lapic_write32(LAPIC_LVT_TIMER, value);
1419         }
1420 }
1421
1422 void
1423 lapic_handle_cmc(void)
1424 {
1425
1426         lapic_eoi();
1427         cmc_intr();
1428 }
1429
1430 /*
1431  * Called from the mca_init() to activate the CMC interrupt if this CPU is
1432  * responsible for monitoring any MC banks for CMC events.  Since mca_init()
1433  * is called prior to lapic_setup() during boot, this just needs to unmask
1434  * this CPU's LVT_CMCI entry.
1435  */
1436 static void
1437 native_lapic_enable_cmc(void)
1438 {
1439         u_int apic_id;
1440
1441 #ifdef DEV_ATPIC
1442         if (!x2apic_mode && lapic_map == NULL)
1443                 return;
1444 #endif
1445         apic_id = PCPU_GET(apic_id);
1446         KASSERT(lapics[apic_id].la_present,
1447             ("%s: missing APIC %u", __func__, apic_id));
1448         lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0;
1449         lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1;
1450         if (bootverbose)
1451                 printf("lapic%u: CMCI unmasked\n", apic_id);
1452 }
1453
1454 static int
1455 native_lapic_enable_mca_elvt(void)
1456 {
1457         u_int apic_id;
1458         uint32_t value;
1459         int elvt_count;
1460
1461 #ifdef DEV_ATPIC
1462         if (lapic_map == NULL)
1463                 return (-1);
1464 #endif
1465
1466         apic_id = PCPU_GET(apic_id);
1467         KASSERT(lapics[apic_id].la_present,
1468             ("%s: missing APIC %u", __func__, apic_id));
1469         elvt_count = amd_read_elvt_count();
1470         if (elvt_count <= APIC_ELVT_MCA)
1471                 return (-1);
1472
1473         value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA);
1474         if ((value & APIC_LVT_M) == 0) {
1475                 if (bootverbose)
1476                         printf("AMD MCE Thresholding Extended LVT is already active\n");
1477                 return (APIC_ELVT_MCA);
1478         }
1479         lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0;
1480         lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1;
1481         if (bootverbose)
1482                 printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id);
1483         return (APIC_ELVT_MCA);
1484 }
1485
1486 void
1487 lapic_handle_error(void)
1488 {
1489         uint32_t esr;
1490
1491         /*
1492          * Read the contents of the error status register.  Write to
1493          * the register first before reading from it to force the APIC
1494          * to update its value to indicate any errors that have
1495          * occurred since the previous write to the register.
1496          */
1497         lapic_write32(LAPIC_ESR, 0);
1498         esr = lapic_read32(LAPIC_ESR);
1499
1500         printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
1501         lapic_eoi();
1502 }
1503
1504 static u_int
1505 native_apic_cpuid(u_int apic_id)
1506 {
1507 #ifdef SMP
1508         return apic_cpuids[apic_id];
1509 #else
1510         return 0;
1511 #endif
1512 }
1513
1514 /* Request a free IDT vector to be used by the specified IRQ. */
1515 static u_int
1516 native_apic_alloc_vector(u_int apic_id, u_int irq)
1517 {
1518         u_int vector;
1519
1520         KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq));
1521
1522         /*
1523          * Search for a free vector.  Currently we just use a very simple
1524          * algorithm to find the first free vector.
1525          */
1526         mtx_lock_spin(&icu_lock);
1527         for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
1528                 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE)
1529                         continue;
1530                 lapics[apic_id].la_ioint_irqs[vector] = irq;
1531                 mtx_unlock_spin(&icu_lock);
1532                 return (vector + APIC_IO_INTS);
1533         }
1534         mtx_unlock_spin(&icu_lock);
1535         return (0);
1536 }
1537
1538 /*
1539  * Request 'count' free contiguous IDT vectors to be used by 'count'
1540  * IRQs.  'count' must be a power of two and the vectors will be
1541  * aligned on a boundary of 'align'.  If the request cannot be
1542  * satisfied, 0 is returned.
1543  */
1544 static u_int
1545 native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
1546 {
1547         u_int first, run, vector;
1548
1549         KASSERT(powerof2(count), ("bad count"));
1550         KASSERT(powerof2(align), ("bad align"));
1551         KASSERT(align >= count, ("align < count"));
1552 #ifdef INVARIANTS
1553         for (run = 0; run < count; run++)
1554                 KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u",
1555                     irqs[run], run));
1556 #endif
1557
1558         /*
1559          * Search for 'count' free vectors.  As with apic_alloc_vector(),
1560          * this just uses a simple first fit algorithm.
1561          */
1562         run = 0;
1563         first = 0;
1564         mtx_lock_spin(&icu_lock);
1565         for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
1566                 /* Vector is in use, end run. */
1567                 if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) {
1568                         run = 0;
1569                         first = 0;
1570                         continue;
1571                 }
1572
1573                 /* Start a new run if run == 0 and vector is aligned. */
1574                 if (run == 0) {
1575                         if ((vector & (align - 1)) != 0)
1576                                 continue;
1577                         first = vector;
1578                 }
1579                 run++;
1580
1581                 /* Keep looping if the run isn't long enough yet. */
1582                 if (run < count)
1583                         continue;
1584
1585                 /* Found a run, assign IRQs and return the first vector. */
1586                 for (vector = 0; vector < count; vector++)
1587                         lapics[apic_id].la_ioint_irqs[first + vector] =
1588                             irqs[vector];
1589                 mtx_unlock_spin(&icu_lock);
1590                 return (first + APIC_IO_INTS);
1591         }
1592         mtx_unlock_spin(&icu_lock);
1593         printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
1594         return (0);
1595 }
1596
1597 /*
1598  * Enable a vector for a particular apic_id.  Since all lapics share idt
1599  * entries and ioint_handlers this enables the vector on all lapics.  lapics
1600  * which do not have the vector configured would report spurious interrupts
1601  * should it fire.
1602  */
1603 static void
1604 native_apic_enable_vector(u_int apic_id, u_int vector)
1605 {
1606
1607         KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
1608         KASSERT(ioint_handlers[vector / 32] != NULL,
1609             ("No ISR handler for vector %u", vector));
1610 #ifdef KDTRACE_HOOKS
1611         KASSERT(vector != IDT_DTRACE_RET,
1612             ("Attempt to overwrite DTrace entry"));
1613 #endif
1614         setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32],
1615             SDT_APIC, SEL_KPL, GSEL_APIC);
1616 }
1617
1618 static void
1619 native_apic_disable_vector(u_int apic_id, u_int vector)
1620 {
1621
1622         KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
1623 #ifdef KDTRACE_HOOKS
1624         KASSERT(vector != IDT_DTRACE_RET,
1625             ("Attempt to overwrite DTrace entry"));
1626 #endif
1627         KASSERT(ioint_handlers[vector / 32] != NULL,
1628             ("No ISR handler for vector %u", vector));
1629 #ifdef notyet
1630         /*
1631          * We can not currently clear the idt entry because other cpus
1632          * may have a valid vector at this offset.
1633          */
1634         setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
1635             SEL_KPL, GSEL_APIC);
1636 #endif
1637 }
1638
1639 /* Release an APIC vector when it's no longer in use. */
1640 static void
1641 native_apic_free_vector(u_int apic_id, u_int vector, u_int irq)
1642 {
1643         struct thread *td;
1644
1645         KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
1646             vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
1647             ("Vector %u does not map to an IRQ line", vector));
1648         KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq));
1649         KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] ==
1650             irq, ("IRQ mismatch"));
1651 #ifdef KDTRACE_HOOKS
1652         KASSERT(vector != IDT_DTRACE_RET,
1653             ("Attempt to overwrite DTrace entry"));
1654 #endif
1655
1656         /*
1657          * Bind us to the cpu that owned the vector before freeing it so
1658          * we don't lose an interrupt delivery race.
1659          */
1660         td = curthread;
1661         if (!rebooting) {
1662                 thread_lock(td);
1663                 if (sched_is_bound(td))
1664                         panic("apic_free_vector: Thread already bound.\n");
1665                 sched_bind(td, apic_cpuid(apic_id));
1666                 thread_unlock(td);
1667         }
1668         mtx_lock_spin(&icu_lock);
1669         lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE;
1670         mtx_unlock_spin(&icu_lock);
1671         if (!rebooting) {
1672                 thread_lock(td);
1673                 sched_unbind(td);
1674                 thread_unlock(td);
1675         }
1676 }
1677
1678 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */
1679 static u_int
1680 apic_idt_to_irq(u_int apic_id, u_int vector)
1681 {
1682         int irq;
1683
1684         KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
1685             vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
1686             ("Vector %u does not map to an IRQ line", vector));
1687 #ifdef KDTRACE_HOOKS
1688         KASSERT(vector != IDT_DTRACE_RET,
1689             ("Attempt to overwrite DTrace entry"));
1690 #endif
1691         irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS];
1692         if (irq < 0)
1693                 irq = 0;
1694         return (irq);
1695 }
1696
1697 #ifdef DDB
1698 /*
1699  * Dump data about APIC IDT vector mappings.
1700  */
1701 DB_SHOW_COMMAND(apic, db_show_apic)
1702 {
1703         struct intsrc *isrc;
1704         int i, verbose;
1705         u_int apic_id;
1706         u_int irq;
1707
1708         if (strcmp(modif, "vv") == 0)
1709                 verbose = 2;
1710         else if (strcmp(modif, "v") == 0)
1711                 verbose = 1;
1712         else
1713                 verbose = 0;
1714         for (apic_id = 0; apic_id <= max_apic_id; apic_id++) {
1715                 if (lapics[apic_id].la_present == 0)
1716                         continue;
1717                 db_printf("Interrupts bound to lapic %u\n", apic_id);
1718                 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
1719                         irq = lapics[apic_id].la_ioint_irqs[i];
1720                         if (irq == IRQ_FREE || irq == IRQ_SYSCALL)
1721                                 continue;
1722 #ifdef KDTRACE_HOOKS
1723                         if (irq == IRQ_DTRACE_RET)
1724                                 continue;
1725 #endif
1726 #ifdef XENHVM
1727                         if (irq == IRQ_EVTCHN)
1728                                 continue;
1729 #endif
1730                         db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
1731                         if (irq == IRQ_TIMER)
1732                                 db_printf("lapic timer\n");
1733                         else if (irq < num_io_irqs) {
1734                                 isrc = intr_lookup_source(irq);
1735                                 if (isrc == NULL || verbose == 0)
1736                                         db_printf("IRQ %u\n", irq);
1737                                 else
1738                                         db_dump_intr_event(isrc->is_event,
1739                                             verbose == 2);
1740                         } else
1741                                 db_printf("IRQ %u ???\n", irq);
1742                 }
1743         }
1744 }
1745
1746 static void
1747 dump_mask(const char *prefix, uint32_t v, int base)
1748 {
1749         int i, first;
1750
1751         first = 1;
1752         for (i = 0; i < 32; i++)
1753                 if (v & (1 << i)) {
1754                         if (first) {
1755                                 db_printf("%s:", prefix);
1756                                 first = 0;
1757                         }
1758                         db_printf(" %02x", base + i);
1759                 }
1760         if (!first)
1761                 db_printf("\n");
1762 }
1763
1764 /* Show info from the lapic regs for this CPU. */
1765 DB_SHOW_COMMAND(lapic, db_show_lapic)
1766 {
1767         uint32_t v;
1768
1769         db_printf("lapic ID = %d\n", lapic_id());
1770         v = lapic_read32(LAPIC_VERSION);
1771         db_printf("version  = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
1772             v & 0xf);
1773         db_printf("max LVT  = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
1774         v = lapic_read32(LAPIC_SVR);
1775         db_printf("SVR      = %02x (%s)\n", v & APIC_SVR_VECTOR,
1776             v & APIC_SVR_ENABLE ? "enabled" : "disabled");
1777         db_printf("TPR      = %02x\n", lapic_read32(LAPIC_TPR));
1778
1779 #define dump_field(prefix, regn, index)                                 \
1780         dump_mask(__XSTRING(prefix ## index),                           \
1781             lapic_read32(LAPIC_ ## regn ## index),                      \
1782             index * 32)
1783
1784         db_printf("In-service Interrupts:\n");
1785         dump_field(isr, ISR, 0);
1786         dump_field(isr, ISR, 1);
1787         dump_field(isr, ISR, 2);
1788         dump_field(isr, ISR, 3);
1789         dump_field(isr, ISR, 4);
1790         dump_field(isr, ISR, 5);
1791         dump_field(isr, ISR, 6);
1792         dump_field(isr, ISR, 7);
1793
1794         db_printf("TMR Interrupts:\n");
1795         dump_field(tmr, TMR, 0);
1796         dump_field(tmr, TMR, 1);
1797         dump_field(tmr, TMR, 2);
1798         dump_field(tmr, TMR, 3);
1799         dump_field(tmr, TMR, 4);
1800         dump_field(tmr, TMR, 5);
1801         dump_field(tmr, TMR, 6);
1802         dump_field(tmr, TMR, 7);
1803
1804         db_printf("IRR Interrupts:\n");
1805         dump_field(irr, IRR, 0);
1806         dump_field(irr, IRR, 1);
1807         dump_field(irr, IRR, 2);
1808         dump_field(irr, IRR, 3);
1809         dump_field(irr, IRR, 4);
1810         dump_field(irr, IRR, 5);
1811         dump_field(irr, IRR, 6);
1812         dump_field(irr, IRR, 7);
1813
1814 #undef dump_field
1815 }
1816 #endif
1817
1818 /*
1819  * APIC probing support code.  This includes code to manage enumerators.
1820  */
1821
1822 static SLIST_HEAD(, apic_enumerator) enumerators =
1823         SLIST_HEAD_INITIALIZER(enumerators);
1824 static struct apic_enumerator *best_enum;
1825
1826 void
1827 apic_register_enumerator(struct apic_enumerator *enumerator)
1828 {
1829 #ifdef INVARIANTS
1830         struct apic_enumerator *apic_enum;
1831
1832         SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
1833                 if (apic_enum == enumerator)
1834                         panic("%s: Duplicate register of %s", __func__,
1835                             enumerator->apic_name);
1836         }
1837 #endif
1838         SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
1839 }
1840
1841 /*
1842  * We have to look for CPU's very, very early because certain subsystems
1843  * want to know how many CPU's we have extremely early on in the boot
1844  * process.
1845  */
1846 static void
1847 apic_init(void *dummy __unused)
1848 {
1849         struct apic_enumerator *enumerator;
1850         int retval, best;
1851
1852         /* We only support built in local APICs. */
1853         if (!(cpu_feature & CPUID_APIC))
1854                 return;
1855
1856         /* Don't probe if APIC mode is disabled. */
1857         if (resource_disabled("apic", 0))
1858                 return;
1859
1860         /* Probe all the enumerators to find the best match. */
1861         best_enum = NULL;
1862         best = 0;
1863         SLIST_FOREACH(enumerator, &enumerators, apic_next) {
1864                 retval = enumerator->apic_probe();
1865                 if (retval > 0)
1866                         continue;
1867                 if (best_enum == NULL || best < retval) {
1868                         best_enum = enumerator;
1869                         best = retval;
1870                 }
1871         }
1872         if (best_enum == NULL) {
1873                 if (bootverbose)
1874                         printf("APIC: Could not find any APICs.\n");
1875 #ifndef DEV_ATPIC
1876                 panic("running without device atpic requires a local APIC");
1877 #endif
1878                 return;
1879         }
1880
1881         if (bootverbose)
1882                 printf("APIC: Using the %s enumerator.\n",
1883                     best_enum->apic_name);
1884
1885 #ifdef I686_CPU
1886         /*
1887          * To work around an errata, we disable the local APIC on some
1888          * CPUs during early startup.  We need to turn the local APIC back
1889          * on on such CPUs now.
1890          */
1891         ppro_reenable_apic();
1892 #endif
1893
1894         /* Probe the CPU's in the system. */
1895         retval = best_enum->apic_probe_cpus();
1896         if (retval != 0)
1897                 printf("%s: Failed to probe CPUs: returned %d\n",
1898                     best_enum->apic_name, retval);
1899
1900 }
1901 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL);
1902
1903 /*
1904  * Setup the local APIC.  We have to do this prior to starting up the APs
1905  * in the SMP case.
1906  */
1907 static void
1908 apic_setup_local(void *dummy __unused)
1909 {
1910         int retval;
1911
1912         if (best_enum == NULL)
1913                 return;
1914
1915         lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC,
1916             M_WAITOK | M_ZERO);
1917
1918         /* Initialize the local APIC. */
1919         retval = best_enum->apic_setup_local();
1920         if (retval != 0)
1921                 printf("%s: Failed to setup the local APIC: returned %d\n",
1922                     best_enum->apic_name, retval);
1923 }
1924 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL);
1925
1926 /*
1927  * Setup the I/O APICs.
1928  */
1929 static void
1930 apic_setup_io(void *dummy __unused)
1931 {
1932         int retval;
1933
1934         if (best_enum == NULL)
1935                 return;
1936
1937         /*
1938          * Local APIC must be registered before other PICs and pseudo PICs
1939          * for proper suspend/resume order.
1940          */
1941         intr_register_pic(&lapic_pic);
1942
1943         retval = best_enum->apic_setup_io();
1944         if (retval != 0)
1945                 printf("%s: Failed to setup I/O APICs: returned %d\n",
1946                     best_enum->apic_name, retval);
1947
1948         /*
1949          * Finish setting up the local APIC on the BSP once we know
1950          * how to properly program the LINT pins.  In particular, this
1951          * enables the EOI suppression mode, if LAPIC supports it and
1952          * user did not disable the mode.
1953          */
1954         lapic_setup(1);
1955         if (bootverbose)
1956                 lapic_dump("BSP");
1957
1958         /* Enable the MSI "pic". */
1959         init_ops.msi_init();
1960
1961 #ifdef XENHVM
1962         xen_intr_alloc_irqs();
1963 #endif
1964 }
1965 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL);
1966
1967 #ifdef SMP
1968 /*
1969  * Inter Processor Interrupt functions.  The lapic_ipi_*() functions are
1970  * private to the MD code.  The public interface for the rest of the
1971  * kernel is defined in mp_machdep.c.
1972  */
1973
1974 /*
1975  * Wait delay microseconds for IPI to be sent.  If delay is -1, we
1976  * wait forever.
1977  */
1978 static int
1979 native_lapic_ipi_wait(int delay)
1980 {
1981         uint64_t rx;
1982
1983         /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */
1984         if (x2apic_mode)
1985                 return (1);
1986
1987         for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) {
1988                 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) ==
1989                     APIC_DELSTAT_IDLE)
1990                         return (1);
1991                 ia32_pause();
1992         }
1993         return (0);
1994 }
1995
1996 static void
1997 native_lapic_ipi_raw(register_t icrlo, u_int dest)
1998 {
1999         uint32_t icrhi;
2000
2001         /* XXX: Need more sanity checking of icrlo? */
2002         KASSERT(x2apic_mode || lapic_map != NULL,
2003             ("%s called too early", __func__));
2004         KASSERT(x2apic_mode ||
2005             (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
2006             ("%s: invalid dest field", __func__));
2007         KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
2008             ("%s: reserved bits set in ICR LO register", __func__));
2009
2010         if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
2011                 if (x2apic_mode)
2012                         icrhi = dest;
2013                 else
2014                         icrhi = dest << APIC_ID_SHIFT;
2015                 lapic_write_icr(icrhi, icrlo);
2016         } else {
2017                 lapic_write_icr_lo(icrlo);
2018         }
2019 }
2020
2021 #define BEFORE_SPIN     50000
2022 #ifdef DETECT_DEADLOCK
2023 #define AFTER_SPIN      50
2024 #endif
2025
2026 static void
2027 native_lapic_ipi_vectored(u_int vector, int dest)
2028 {
2029         register_t icrlo, destfield;
2030
2031         KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
2032             ("%s: invalid vector %d", __func__, vector));
2033
2034         destfield = 0;
2035         switch (dest) {
2036         case APIC_IPI_DEST_SELF:
2037                 if (x2apic_mode && vector < IPI_NMI_FIRST) {
2038                         lapic_write_self_ipi(vector);
2039                         return;
2040                 }
2041                 icrlo = APIC_DEST_SELF;
2042                 break;
2043         case APIC_IPI_DEST_ALL:
2044                 icrlo = APIC_DEST_ALLISELF;
2045                 break;
2046         case APIC_IPI_DEST_OTHERS:
2047                 icrlo = APIC_DEST_ALLESELF;
2048                 break;
2049         default:
2050                 icrlo = 0;
2051                 KASSERT(x2apic_mode ||
2052                     (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
2053                     ("%s: invalid destination 0x%x", __func__, dest));
2054                 destfield = dest;
2055         }
2056
2057         /*
2058          * NMI IPIs are just fake vectors used to send a NMI.  Use special rules
2059          * regarding NMIs if passed, otherwise specify the vector.
2060          */
2061         if (vector >= IPI_NMI_FIRST)
2062                 icrlo |= APIC_DELMODE_NMI;
2063         else
2064                 icrlo |= vector | APIC_DELMODE_FIXED;
2065         icrlo |= APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT;
2066
2067         /* Wait for an earlier IPI to finish. */
2068         if (!lapic_ipi_wait(BEFORE_SPIN)) {
2069                 if (KERNEL_PANICKED())
2070                         return;
2071                 else
2072                         panic("APIC: Previous IPI is stuck");
2073         }
2074
2075         lapic_ipi_raw(icrlo, destfield);
2076
2077 #ifdef DETECT_DEADLOCK
2078         /* Wait for IPI to be delivered. */
2079         if (!lapic_ipi_wait(AFTER_SPIN)) {
2080 #ifdef needsattention
2081                 /*
2082                  * XXX FIXME:
2083                  *
2084                  * The above function waits for the message to actually be
2085                  * delivered.  It breaks out after an arbitrary timeout
2086                  * since the message should eventually be delivered (at
2087                  * least in theory) and that if it wasn't we would catch
2088                  * the failure with the check above when the next IPI is
2089                  * sent.
2090                  *
2091                  * We could skip this wait entirely, EXCEPT it probably
2092                  * protects us from other routines that assume that the
2093                  * message was delivered and acted upon when this function
2094                  * returns.
2095                  */
2096                 printf("APIC: IPI might be stuck\n");
2097 #else /* !needsattention */
2098                 /* Wait until mesage is sent without a timeout. */
2099                 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND)
2100                         ia32_pause();
2101 #endif /* needsattention */
2102         }
2103 #endif /* DETECT_DEADLOCK */
2104 }
2105
2106 #endif /* SMP */
2107
2108 /*
2109  * Since the IDT is shared by all CPUs the IPI slot update needs to be globally
2110  * visible.
2111  *
2112  * Consider the case where an IPI is generated immediately after allocation:
2113  *     vector = lapic_ipi_alloc(ipifunc);
2114  *     ipi_selected(other_cpus, vector);
2115  *
2116  * In xAPIC mode a write to ICR_LO has serializing semantics because the
2117  * APIC page is mapped as an uncached region. In x2APIC mode there is an
2118  * explicit 'mfence' before the ICR MSR is written. Therefore in both cases
2119  * the IDT slot update is globally visible before the IPI is delivered.
2120  */
2121 static int
2122 native_lapic_ipi_alloc(inthand_t *ipifunc)
2123 {
2124         struct gate_descriptor *ip;
2125         long func;
2126         int idx, vector;
2127
2128         KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti),
2129             ("invalid ipifunc %p", ipifunc));
2130
2131         vector = -1;
2132         mtx_lock_spin(&icu_lock);
2133         for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) {
2134                 ip = &idt[idx];
2135                 func = (ip->gd_hioffset << 16) | ip->gd_looffset;
2136                 if ((!pti && func == (uintptr_t)&IDTVEC(rsvd)) ||
2137                     (pti && func == (uintptr_t)&IDTVEC(rsvd_pti))) {
2138                         vector = idx;
2139                         setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC);
2140                         break;
2141                 }
2142         }
2143         mtx_unlock_spin(&icu_lock);
2144         return (vector);
2145 }
2146
2147 static void
2148 native_lapic_ipi_free(int vector)
2149 {
2150         struct gate_descriptor *ip;
2151         long func;
2152
2153         KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST,
2154             ("%s: invalid vector %d", __func__, vector));
2155
2156         mtx_lock_spin(&icu_lock);
2157         ip = &idt[vector];
2158         func = (ip->gd_hioffset << 16) | ip->gd_looffset;
2159         KASSERT(func != (uintptr_t)&IDTVEC(rsvd) &&
2160             func != (uintptr_t)&IDTVEC(rsvd_pti),
2161             ("invalid idtfunc %#lx", func));
2162         setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
2163             SEL_KPL, GSEL_APIC);
2164         mtx_unlock_spin(&icu_lock);
2165 }