]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/x86/x86/local_apic.c
PTI for amd64.
[FreeBSD/FreeBSD.git] / sys / x86 / x86 / local_apic.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
5  * Copyright (c) 1996, by Steve Passe
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. The name of the developer may NOT be used to endorse or promote products
14  *    derived from this software without specific prior written permission.
15  * 3. Neither the name of the author nor the names of any co-contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 /*
33  * Local APIC support on Pentium and later processors.
34  */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 #include "opt_atpic.h"
40 #include "opt_hwpmc_hooks.h"
41
42 #include "opt_ddb.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/bus.h>
47 #include <sys/kernel.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/mutex.h>
51 #include <sys/pcpu.h>
52 #include <sys/proc.h>
53 #include <sys/sched.h>
54 #include <sys/smp.h>
55 #include <sys/sysctl.h>
56 #include <sys/timeet.h>
57
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60
61 #include <x86/apicreg.h>
62 #include <machine/clock.h>
63 #include <machine/cpufunc.h>
64 #include <machine/cputypes.h>
65 #include <machine/frame.h>
66 #include <machine/intr_machdep.h>
67 #include <x86/apicvar.h>
68 #include <x86/mca.h>
69 #include <machine/md_var.h>
70 #include <machine/smp.h>
71 #include <machine/specialreg.h>
72 #include <x86/init.h>
73
74 #ifdef DDB
75 #include <sys/interrupt.h>
76 #include <ddb/ddb.h>
77 #endif
78
79 #ifdef __amd64__
80 #define SDT_APIC        SDT_SYSIGT
81 #define SDT_APICT       SDT_SYSIGT
82 #define GSEL_APIC       0
83 #else
84 #define SDT_APIC        SDT_SYS386IGT
85 #define SDT_APICT       SDT_SYS386TGT
86 #define GSEL_APIC       GSEL(GCODE_SEL, SEL_KPL)
87 #endif
88
89 static MALLOC_DEFINE(M_LAPIC, "local_apic", "Local APIC items");
90
91 /* Sanity checks on IDT vectors. */
92 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT);
93 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS);
94 CTASSERT(APIC_LOCAL_INTS == 240);
95 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
96
97 /* Magic IRQ values for the timer and syscalls. */
98 #define IRQ_TIMER       (NUM_IO_INTS + 1)
99 #define IRQ_SYSCALL     (NUM_IO_INTS + 2)
100 #define IRQ_DTRACE_RET  (NUM_IO_INTS + 3)
101 #define IRQ_EVTCHN      (NUM_IO_INTS + 4)
102
103 enum lat_timer_mode {
104         LAT_MODE_UNDEF =        0,
105         LAT_MODE_PERIODIC =     1,
106         LAT_MODE_ONESHOT =      2,
107         LAT_MODE_DEADLINE =     3,
108 };
109
110 /*
111  * Support for local APICs.  Local APICs manage interrupts on each
112  * individual processor as opposed to I/O APICs which receive interrupts
113  * from I/O devices and then forward them on to the local APICs.
114  *
115  * Local APICs can also send interrupts to each other thus providing the
116  * mechanism for IPIs.
117  */
118
119 struct lvt {
120         u_int lvt_edgetrigger:1;
121         u_int lvt_activehi:1;
122         u_int lvt_masked:1;
123         u_int lvt_active:1;
124         u_int lvt_mode:16;
125         u_int lvt_vector:8;
126 };
127
128 struct lapic {
129         struct lvt la_lvts[APIC_LVT_MAX + 1];
130         struct lvt la_elvts[APIC_ELVT_MAX + 1];;
131         u_int la_id:8;
132         u_int la_cluster:4;
133         u_int la_cluster_id:2;
134         u_int la_present:1;
135         u_long *la_timer_count;
136         uint64_t la_timer_period;
137         enum lat_timer_mode la_timer_mode;
138         uint32_t lvt_timer_base;
139         uint32_t lvt_timer_last;
140         /* Include IDT_SYSCALL to make indexing easier. */
141         int la_ioint_irqs[APIC_NUM_IOINTS + 1];
142 } static *lapics;
143
144 /* Global defaults for local APIC LVT entries. */
145 static struct lvt lvts[APIC_LVT_MAX + 1] = {
146         { 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 },  /* LINT0: masked ExtINT */
147         { 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },     /* LINT1: NMI */
148         { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT },      /* Timer */
149         { 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT },      /* Error */
150         { 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 },     /* PMC */
151         { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT },    /* Thermal */
152         { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT },        /* CMCI */
153 };
154
155 /* Global defaults for AMD local APIC ELVT entries. */
156 static struct lvt elvts[APIC_ELVT_MAX + 1] = {
157         { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
158         { 1, 1, 1, 0, APIC_LVT_DM_FIXED, APIC_CMC_INT },
159         { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
160         { 1, 1, 1, 0, APIC_LVT_DM_FIXED, 0 },
161 };
162
163 static inthand_t *ioint_handlers[] = {
164         NULL,                   /* 0 - 31 */
165         IDTVEC(apic_isr1),      /* 32 - 63 */
166         IDTVEC(apic_isr2),      /* 64 - 95 */
167         IDTVEC(apic_isr3),      /* 96 - 127 */
168         IDTVEC(apic_isr4),      /* 128 - 159 */
169         IDTVEC(apic_isr5),      /* 160 - 191 */
170         IDTVEC(apic_isr6),      /* 192 - 223 */
171         IDTVEC(apic_isr7),      /* 224 - 255 */
172 };
173
174 static inthand_t *ioint_pti_handlers[] = {
175         NULL,                   /* 0 - 31 */
176         IDTVEC(apic_isr1_pti),  /* 32 - 63 */
177         IDTVEC(apic_isr2_pti),  /* 64 - 95 */
178         IDTVEC(apic_isr3_pti),  /* 96 - 127 */
179         IDTVEC(apic_isr4_pti),  /* 128 - 159 */
180         IDTVEC(apic_isr5_pti),  /* 160 - 191 */
181         IDTVEC(apic_isr6_pti),  /* 192 - 223 */
182         IDTVEC(apic_isr7_pti),  /* 224 - 255 */
183 };
184
185 static u_int32_t lapic_timer_divisors[] = {
186         APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
187         APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
188 };
189
190 extern inthand_t IDTVEC(rsvd_pti), IDTVEC(rsvd);
191
192 volatile char *lapic_map;
193 vm_paddr_t lapic_paddr;
194 int x2apic_mode;
195 int lapic_eoi_suppression;
196 static int lapic_timer_tsc_deadline;
197 static u_long lapic_timer_divisor, count_freq;
198 static struct eventtimer lapic_et;
199 #ifdef SMP
200 static uint64_t lapic_ipi_wait_mult;
201 #endif
202 unsigned int max_apic_id;
203
204 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options");
205 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, "");
206 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD,
207     &lapic_eoi_suppression, 0, "");
208 SYSCTL_INT(_hw_apic, OID_AUTO, timer_tsc_deadline, CTLFLAG_RD,
209     &lapic_timer_tsc_deadline, 0, "");
210
211 static uint32_t
212 lapic_read32(enum LAPIC_REGISTERS reg)
213 {
214         uint32_t res;
215
216         if (x2apic_mode) {
217                 res = rdmsr32(MSR_APIC_000 + reg);
218         } else {
219                 res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL);
220         }
221         return (res);
222 }
223
224 static void
225 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val)
226 {
227
228         if (x2apic_mode) {
229                 mfence();
230                 lfence();
231                 wrmsr(MSR_APIC_000 + reg, val);
232         } else {
233                 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
234         }
235 }
236
237 static void
238 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val)
239 {
240
241         if (x2apic_mode) {
242                 wrmsr(MSR_APIC_000 + reg, val);
243         } else {
244                 *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
245         }
246 }
247
248 #ifdef SMP
249 static uint64_t
250 lapic_read_icr(void)
251 {
252         uint64_t v;
253         uint32_t vhi, vlo;
254
255         if (x2apic_mode) {
256                 v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO);
257         } else {
258                 vhi = lapic_read32(LAPIC_ICR_HI);
259                 vlo = lapic_read32(LAPIC_ICR_LO);
260                 v = ((uint64_t)vhi << 32) | vlo;
261         }
262         return (v);
263 }
264
265 static uint64_t
266 lapic_read_icr_lo(void)
267 {
268
269         return (lapic_read32(LAPIC_ICR_LO));
270 }
271
272 static void
273 lapic_write_icr(uint32_t vhi, uint32_t vlo)
274 {
275         uint64_t v;
276
277         if (x2apic_mode) {
278                 v = ((uint64_t)vhi << 32) | vlo;
279                 mfence();
280                 wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v);
281         } else {
282                 lapic_write32(LAPIC_ICR_HI, vhi);
283                 lapic_write32(LAPIC_ICR_LO, vlo);
284         }
285 }
286 #endif /* SMP */
287
288 static void
289 native_lapic_enable_x2apic(void)
290 {
291         uint64_t apic_base;
292
293         apic_base = rdmsr(MSR_APICBASE);
294         apic_base |= APICBASE_X2APIC | APICBASE_ENABLED;
295         wrmsr(MSR_APICBASE, apic_base);
296 }
297
298 static bool
299 native_lapic_is_x2apic(void)
300 {
301         uint64_t apic_base;
302
303         apic_base = rdmsr(MSR_APICBASE);
304         return ((apic_base & (APICBASE_X2APIC | APICBASE_ENABLED)) ==
305             (APICBASE_X2APIC | APICBASE_ENABLED));
306 }
307
308 static void     lapic_enable(void);
309 static void     lapic_resume(struct pic *pic, bool suspend_cancelled);
310 static void     lapic_timer_oneshot(struct lapic *);
311 static void     lapic_timer_oneshot_nointr(struct lapic *, uint32_t);
312 static void     lapic_timer_periodic(struct lapic *);
313 static void     lapic_timer_deadline(struct lapic *);
314 static void     lapic_timer_stop(struct lapic *);
315 static void     lapic_timer_set_divisor(u_int divisor);
316 static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value);
317 static int      lapic_et_start(struct eventtimer *et,
318                     sbintime_t first, sbintime_t period);
319 static int      lapic_et_stop(struct eventtimer *et);
320 static u_int    apic_idt_to_irq(u_int apic_id, u_int vector);
321 static void     lapic_set_tpr(u_int vector);
322
323 struct pic lapic_pic = { .pic_resume = lapic_resume };
324
325 /* Forward declarations for apic_ops */
326 static void     native_lapic_create(u_int apic_id, int boot_cpu);
327 static void     native_lapic_init(vm_paddr_t addr);
328 static void     native_lapic_xapic_mode(void);
329 static void     native_lapic_setup(int boot);
330 static void     native_lapic_dump(const char *str);
331 static void     native_lapic_disable(void);
332 static void     native_lapic_eoi(void);
333 static int      native_lapic_id(void);
334 static int      native_lapic_intr_pending(u_int vector);
335 static u_int    native_apic_cpuid(u_int apic_id);
336 static u_int    native_apic_alloc_vector(u_int apic_id, u_int irq);
337 static u_int    native_apic_alloc_vectors(u_int apic_id, u_int *irqs,
338                     u_int count, u_int align);
339 static void     native_apic_disable_vector(u_int apic_id, u_int vector);
340 static void     native_apic_enable_vector(u_int apic_id, u_int vector);
341 static void     native_apic_free_vector(u_int apic_id, u_int vector, u_int irq);
342 static void     native_lapic_set_logical_id(u_int apic_id, u_int cluster,
343                     u_int cluster_id);
344 static int      native_lapic_enable_pmc(void);
345 static void     native_lapic_disable_pmc(void);
346 static void     native_lapic_reenable_pmc(void);
347 static void     native_lapic_enable_cmc(void);
348 static int      native_lapic_enable_mca_elvt(void);
349 static int      native_lapic_set_lvt_mask(u_int apic_id, u_int lvt,
350                     u_char masked);
351 static int      native_lapic_set_lvt_mode(u_int apic_id, u_int lvt,
352                     uint32_t mode);
353 static int      native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt,
354                     enum intr_polarity pol);
355 static int      native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt,
356                     enum intr_trigger trigger);
357 #ifdef SMP
358 static void     native_lapic_ipi_raw(register_t icrlo, u_int dest);
359 static void     native_lapic_ipi_vectored(u_int vector, int dest);
360 static int      native_lapic_ipi_wait(int delay);
361 #endif /* SMP */
362 static int      native_lapic_ipi_alloc(inthand_t *ipifunc);
363 static void     native_lapic_ipi_free(int vector);
364
365 struct apic_ops apic_ops = {
366         .create                 = native_lapic_create,
367         .init                   = native_lapic_init,
368         .xapic_mode             = native_lapic_xapic_mode,
369         .is_x2apic              = native_lapic_is_x2apic,
370         .setup                  = native_lapic_setup,
371         .dump                   = native_lapic_dump,
372         .disable                = native_lapic_disable,
373         .eoi                    = native_lapic_eoi,
374         .id                     = native_lapic_id,
375         .intr_pending           = native_lapic_intr_pending,
376         .set_logical_id         = native_lapic_set_logical_id,
377         .cpuid                  = native_apic_cpuid,
378         .alloc_vector           = native_apic_alloc_vector,
379         .alloc_vectors          = native_apic_alloc_vectors,
380         .enable_vector          = native_apic_enable_vector,
381         .disable_vector         = native_apic_disable_vector,
382         .free_vector            = native_apic_free_vector,
383         .enable_pmc             = native_lapic_enable_pmc,
384         .disable_pmc            = native_lapic_disable_pmc,
385         .reenable_pmc           = native_lapic_reenable_pmc,
386         .enable_cmc             = native_lapic_enable_cmc,
387         .enable_mca_elvt        = native_lapic_enable_mca_elvt,
388 #ifdef SMP
389         .ipi_raw                = native_lapic_ipi_raw,
390         .ipi_vectored           = native_lapic_ipi_vectored,
391         .ipi_wait               = native_lapic_ipi_wait,
392 #endif
393         .ipi_alloc              = native_lapic_ipi_alloc,
394         .ipi_free               = native_lapic_ipi_free,
395         .set_lvt_mask           = native_lapic_set_lvt_mask,
396         .set_lvt_mode           = native_lapic_set_lvt_mode,
397         .set_lvt_polarity       = native_lapic_set_lvt_polarity,
398         .set_lvt_triggermode    = native_lapic_set_lvt_triggermode,
399 };
400
401 static uint32_t
402 lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value)
403 {
404
405         value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
406             APIC_LVT_VECTOR);
407         if (lvt->lvt_edgetrigger == 0)
408                 value |= APIC_LVT_TM;
409         if (lvt->lvt_activehi == 0)
410                 value |= APIC_LVT_IIPP_INTALO;
411         if (lvt->lvt_masked)
412                 value |= APIC_LVT_M;
413         value |= lvt->lvt_mode;
414         switch (lvt->lvt_mode) {
415         case APIC_LVT_DM_NMI:
416         case APIC_LVT_DM_SMI:
417         case APIC_LVT_DM_INIT:
418         case APIC_LVT_DM_EXTINT:
419                 if (!lvt->lvt_edgetrigger && bootverbose) {
420                         printf("lapic%u: Forcing LINT%u to edge trigger\n",
421                             la->la_id, pin);
422                         value &= ~APIC_LVT_TM;
423                 }
424                 /* Use a vector of 0. */
425                 break;
426         case APIC_LVT_DM_FIXED:
427                 value |= lvt->lvt_vector;
428                 break;
429         default:
430                 panic("bad APIC LVT delivery mode: %#x\n", value);
431         }
432         return (value);
433 }
434
435 static uint32_t
436 lvt_mode(struct lapic *la, u_int pin, uint32_t value)
437 {
438         struct lvt *lvt;
439
440         KASSERT(pin <= APIC_LVT_MAX,
441             ("%s: pin %u out of range", __func__, pin));
442         if (la->la_lvts[pin].lvt_active)
443                 lvt = &la->la_lvts[pin];
444         else
445                 lvt = &lvts[pin];
446
447         return (lvt_mode_impl(la, lvt, pin, value));
448 }
449
450 static uint32_t
451 elvt_mode(struct lapic *la, u_int idx, uint32_t value)
452 {
453         struct lvt *elvt;
454
455         KASSERT(idx <= APIC_ELVT_MAX,
456             ("%s: idx %u out of range", __func__, idx));
457
458         elvt = &la->la_elvts[idx];
459         KASSERT(elvt->lvt_active, ("%s: ELVT%u is not active", __func__, idx));
460         KASSERT(elvt->lvt_edgetrigger,
461             ("%s: ELVT%u is not edge triggered", __func__, idx));
462         KASSERT(elvt->lvt_activehi,
463             ("%s: ELVT%u is not active high", __func__, idx));
464         return (lvt_mode_impl(la, elvt, idx, value));
465 }
466
467 /*
468  * Map the local APIC and setup necessary interrupt vectors.
469  */
470 static void
471 native_lapic_init(vm_paddr_t addr)
472 {
473 #ifdef SMP
474         uint64_t r, r1, r2, rx;
475 #endif
476         uint32_t ver;
477         u_int regs[4];
478         int i, arat;
479
480         /*
481          * Enable x2APIC mode if possible. Map the local APIC
482          * registers page.
483          *
484          * Keep the LAPIC registers page mapped uncached for x2APIC
485          * mode too, to have direct map page attribute set to
486          * uncached.  This is needed to work around CPU errata present
487          * on all Intel processors.
488          */
489         KASSERT(trunc_page(addr) == addr,
490             ("local APIC not aligned on a page boundary"));
491         lapic_paddr = addr;
492         lapic_map = pmap_mapdev(addr, PAGE_SIZE);
493         if (x2apic_mode) {
494                 native_lapic_enable_x2apic();
495                 lapic_map = NULL;
496         }
497
498         /* Setup the spurious interrupt handler. */
499         setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL,
500             GSEL_APIC);
501
502         /* Perform basic initialization of the BSP's local APIC. */
503         lapic_enable();
504
505         /* Set BSP's per-CPU local APIC ID. */
506         PCPU_SET(apic_id, lapic_id());
507
508         /* Local APIC timer interrupt. */
509         setidt(APIC_TIMER_INT, pti ? IDTVEC(timerint_pti) : IDTVEC(timerint),
510             SDT_APIC, SEL_KPL, GSEL_APIC);
511
512         /* Local APIC error interrupt. */
513         setidt(APIC_ERROR_INT, pti ? IDTVEC(errorint_pti) : IDTVEC(errorint),
514             SDT_APIC, SEL_KPL, GSEL_APIC);
515
516         /* XXX: Thermal interrupt */
517
518         /* Local APIC CMCI. */
519         setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint),
520             SDT_APICT, SEL_KPL, GSEL_APIC);
521
522         if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
523                 arat = 0;
524                 /* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */
525                 if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) {
526                         do_cpuid(0x06, regs);
527                         if ((regs[0] & CPUTPM1_ARAT) != 0)
528                                 arat = 1;
529                 }
530                 bzero(&lapic_et, sizeof(lapic_et));
531                 lapic_et.et_name = "LAPIC";
532                 lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
533                     ET_FLAGS_PERCPU;
534                 lapic_et.et_quality = 600;
535                 if (!arat) {
536                         lapic_et.et_flags |= ET_FLAGS_C3STOP;
537                         lapic_et.et_quality = 100;
538                 }
539                 if ((cpu_feature & CPUID_TSC) != 0 &&
540                     (cpu_feature2 & CPUID2_TSCDLT) != 0 &&
541                     tsc_is_invariant && tsc_freq != 0) {
542                         lapic_timer_tsc_deadline = 1;
543                         TUNABLE_INT_FETCH("hw.lapic_tsc_deadline",
544                             &lapic_timer_tsc_deadline);
545                 }
546
547                 lapic_et.et_frequency = 0;
548                 /* We don't know frequency yet, so trying to guess. */
549                 lapic_et.et_min_period = 0x00001000LL;
550                 lapic_et.et_max_period = SBT_1S;
551                 lapic_et.et_start = lapic_et_start;
552                 lapic_et.et_stop = lapic_et_stop;
553                 lapic_et.et_priv = NULL;
554                 et_register(&lapic_et);
555         }
556
557         /*
558          * Set lapic_eoi_suppression after lapic_enable(), to not
559          * enable suppression in the hardware prematurely.  Note that
560          * we by default enable suppression even when system only has
561          * one IO-APIC, since EOI is broadcasted to all APIC agents,
562          * including CPUs, otherwise.
563          *
564          * It seems that at least some KVM versions report
565          * EOI_SUPPRESSION bit, but auto-EOI does not work.
566          */
567         ver = lapic_read32(LAPIC_VERSION);
568         if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) {
569                 lapic_eoi_suppression = 1;
570                 if (vm_guest == VM_GUEST_KVM) {
571                         if (bootverbose)
572                                 printf(
573                        "KVM -- disabling lapic eoi suppression\n");
574                         lapic_eoi_suppression = 0;
575                 }
576                 TUNABLE_INT_FETCH("hw.lapic_eoi_suppression",
577                     &lapic_eoi_suppression);
578         }
579
580 #ifdef SMP
581 #define LOOPS   100000
582         /*
583          * Calibrate the busy loop waiting for IPI ack in xAPIC mode.
584          * lapic_ipi_wait_mult contains the number of iterations which
585          * approximately delay execution for 1 microsecond (the
586          * argument to native_lapic_ipi_wait() is in microseconds).
587          *
588          * We assume that TSC is present and already measured.
589          * Possible TSC frequency jumps are irrelevant to the
590          * calibration loop below, the CPU clock management code is
591          * not yet started, and we do not enter sleep states.
592          */
593         KASSERT((cpu_feature & CPUID_TSC) != 0 && tsc_freq != 0,
594             ("TSC not initialized"));
595         if (!x2apic_mode) {
596                 r = rdtsc();
597                 for (rx = 0; rx < LOOPS; rx++) {
598                         (void)lapic_read_icr_lo();
599                         ia32_pause();
600                 }
601                 r = rdtsc() - r;
602                 r1 = tsc_freq * LOOPS;
603                 r2 = r * 1000000;
604                 lapic_ipi_wait_mult = r1 >= r2 ? r1 / r2 : 1;
605                 if (bootverbose) {
606                         printf("LAPIC: ipi_wait() us multiplier %ju (r %ju "
607                             "tsc %ju)\n", (uintmax_t)lapic_ipi_wait_mult,
608                             (uintmax_t)r, (uintmax_t)tsc_freq);
609                 }
610         }
611 #undef LOOPS
612 #endif /* SMP */
613 }
614
615 /*
616  * Create a local APIC instance.
617  */
618 static void
619 native_lapic_create(u_int apic_id, int boot_cpu)
620 {
621         int i;
622
623         if (apic_id > max_apic_id) {
624                 printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
625                 if (boot_cpu)
626                         panic("Can't ignore BSP");
627                 return;
628         }
629         KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
630             apic_id));
631
632         /*
633          * Assume no local LVT overrides and a cluster of 0 and
634          * intra-cluster ID of 0.
635          */
636         lapics[apic_id].la_present = 1;
637         lapics[apic_id].la_id = apic_id;
638         for (i = 0; i <= APIC_LVT_MAX; i++) {
639                 lapics[apic_id].la_lvts[i] = lvts[i];
640                 lapics[apic_id].la_lvts[i].lvt_active = 0;
641         }
642         for (i = 0; i <= APIC_ELVT_MAX; i++) {
643                 lapics[apic_id].la_elvts[i] = elvts[i];
644                 lapics[apic_id].la_elvts[i].lvt_active = 0;
645         }
646         for (i = 0; i <= APIC_NUM_IOINTS; i++)
647             lapics[apic_id].la_ioint_irqs[i] = -1;
648         lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
649         lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
650             IRQ_TIMER;
651 #ifdef KDTRACE_HOOKS
652         lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] =
653             IRQ_DTRACE_RET;
654 #endif
655 #ifdef XENHVM
656         lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN;
657 #endif
658
659
660 #ifdef SMP
661         cpu_add(apic_id, boot_cpu);
662 #endif
663 }
664
665 static inline uint32_t
666 amd_read_ext_features(void)
667 {
668         uint32_t version;
669
670         if (cpu_vendor_id != CPU_VENDOR_AMD)
671                 return (0);
672         version = lapic_read32(LAPIC_VERSION);
673         if ((version & APIC_VER_AMD_EXT_SPACE) != 0)
674                 return (lapic_read32(LAPIC_EXT_FEATURES));
675         else
676                 return (0);
677 }
678
679 static inline uint32_t
680 amd_read_elvt_count(void)
681 {
682         uint32_t extf;
683         uint32_t count;
684
685         extf = amd_read_ext_features();
686         count = (extf & APIC_EXTF_ELVT_MASK) >> APIC_EXTF_ELVT_SHIFT;
687         count = min(count, APIC_ELVT_MAX + 1);
688         return (count);
689 }
690
691 /*
692  * Dump contents of local APIC registers
693  */
694 static void
695 native_lapic_dump(const char* str)
696 {
697         uint32_t version;
698         uint32_t maxlvt;
699         uint32_t extf;
700         int elvt_count;
701         int i;
702
703         version = lapic_read32(LAPIC_VERSION);
704         maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
705         printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
706         printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x",
707             lapic_read32(LAPIC_ID), version,
708             lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR));
709         if ((cpu_feature2 & CPUID2_X2APIC) != 0)
710                 printf(" x2APIC: %d", x2apic_mode);
711         printf("\n  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
712             lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1),
713             lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR));
714         printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x",
715             lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL),
716             lapic_read32(LAPIC_LVT_ERROR));
717         if (maxlvt >= APIC_LVT_PMC)
718                 printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT));
719         printf("\n");
720         if (maxlvt >= APIC_LVT_CMCI)
721                 printf("   cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI));
722         extf = amd_read_ext_features();
723         if (extf != 0) {
724                 printf("   AMD ext features: 0x%08x\n", extf);
725                 elvt_count = amd_read_elvt_count();
726                 for (i = 0; i < elvt_count; i++)
727                         printf("   AMD elvt%d: 0x%08x\n", i,
728                             lapic_read32(LAPIC_EXT_LVT0 + i));
729         }
730 }
731
732 static void
733 native_lapic_xapic_mode(void)
734 {
735         register_t saveintr;
736
737         saveintr = intr_disable();
738         if (x2apic_mode)
739                 native_lapic_enable_x2apic();
740         intr_restore(saveintr);
741 }
742
743 static void
744 native_lapic_setup(int boot)
745 {
746         struct lapic *la;
747         uint32_t version;
748         uint32_t maxlvt;
749         register_t saveintr;
750         char buf[MAXCOMLEN + 1];
751         int elvt_count;
752         int i;
753
754         saveintr = intr_disable();
755
756         la = &lapics[lapic_id()];
757         KASSERT(la->la_present, ("missing APIC structure"));
758         version = lapic_read32(LAPIC_VERSION);
759         maxlvt = (version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
760
761         /* Initialize the TPR to allow all interrupts. */
762         lapic_set_tpr(0);
763
764         /* Setup spurious vector and enable the local APIC. */
765         lapic_enable();
766
767         /* Program LINT[01] LVT entries. */
768         lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0,
769             lapic_read32(LAPIC_LVT_LINT0)));
770         lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1,
771             lapic_read32(LAPIC_LVT_LINT1)));
772
773         /* Program the PMC LVT entry if present. */
774         if (maxlvt >= APIC_LVT_PMC) {
775                 lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
776                     LAPIC_LVT_PCINT));
777         }
778
779         /* Program timer LVT and setup handler. */
780         la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER,
781             lapic_read32(LAPIC_LVT_TIMER));
782         la->lvt_timer_last = la->lvt_timer_base;
783         lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base);
784         if (boot) {
785                 snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid));
786                 intrcnt_add(buf, &la->la_timer_count);
787         }
788
789         /* Setup the timer if configured. */
790         if (la->la_timer_mode != LAT_MODE_UNDEF) {
791                 KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor",
792                     lapic_id()));
793                 switch (la->la_timer_mode) {
794                 case LAT_MODE_PERIODIC:
795                         lapic_timer_set_divisor(lapic_timer_divisor);
796                         lapic_timer_periodic(la);
797                         break;
798                 case LAT_MODE_ONESHOT:
799                         lapic_timer_set_divisor(lapic_timer_divisor);
800                         lapic_timer_oneshot(la);
801                         break;
802                 case LAT_MODE_DEADLINE:
803                         lapic_timer_deadline(la);
804                         break;
805                 default:
806                         panic("corrupted la_timer_mode %p %d", la,
807                             la->la_timer_mode);
808                 }
809         }
810
811         /* Program error LVT and clear any existing errors. */
812         lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR,
813             lapic_read32(LAPIC_LVT_ERROR)));
814         lapic_write32(LAPIC_ESR, 0);
815
816         /* XXX: Thermal LVT */
817
818         /* Program the CMCI LVT entry if present. */
819         if (maxlvt >= APIC_LVT_CMCI) {
820                 lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI,
821                     lapic_read32(LAPIC_LVT_CMCI)));
822         }
823
824         elvt_count = amd_read_elvt_count();
825         for (i = 0; i < elvt_count; i++) {
826                 if (la->la_elvts[i].lvt_active)
827                         lapic_write32(LAPIC_EXT_LVT0 + i,
828                             elvt_mode(la, i, lapic_read32(LAPIC_EXT_LVT0 + i)));
829         }
830
831         intr_restore(saveintr);
832 }
833
834 static void
835 native_lapic_reenable_pmc(void)
836 {
837 #ifdef HWPMC_HOOKS
838         uint32_t value;
839
840         value = lapic_read32(LAPIC_LVT_PCINT);
841         value &= ~APIC_LVT_M;
842         lapic_write32(LAPIC_LVT_PCINT, value);
843 #endif
844 }
845
846 #ifdef HWPMC_HOOKS
847 static void
848 lapic_update_pmc(void *dummy)
849 {
850         struct lapic *la;
851
852         la = &lapics[lapic_id()];
853         lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
854             lapic_read32(LAPIC_LVT_PCINT)));
855 }
856 #endif
857
858 static int
859 native_lapic_enable_pmc(void)
860 {
861 #ifdef HWPMC_HOOKS
862         u_int32_t maxlvt;
863
864         /* Fail if the local APIC is not present. */
865         if (!x2apic_mode && lapic_map == NULL)
866                 return (0);
867
868         /* Fail if the PMC LVT is not present. */
869         maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
870         if (maxlvt < APIC_LVT_PMC)
871                 return (0);
872
873         lvts[APIC_LVT_PMC].lvt_masked = 0;
874
875 #ifdef EARLY_AP_STARTUP
876         MPASS(mp_ncpus == 1 || smp_started);
877         smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
878 #else
879 #ifdef SMP
880         /*
881          * If hwpmc was loaded at boot time then the APs may not be
882          * started yet.  In that case, don't forward the request to
883          * them as they will program the lvt when they start.
884          */
885         if (smp_started)
886                 smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
887         else
888 #endif
889                 lapic_update_pmc(NULL);
890 #endif
891         return (1);
892 #else
893         return (0);
894 #endif
895 }
896
897 static void
898 native_lapic_disable_pmc(void)
899 {
900 #ifdef HWPMC_HOOKS
901         u_int32_t maxlvt;
902
903         /* Fail if the local APIC is not present. */
904         if (!x2apic_mode && lapic_map == NULL)
905                 return;
906
907         /* Fail if the PMC LVT is not present. */
908         maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
909         if (maxlvt < APIC_LVT_PMC)
910                 return;
911
912         lvts[APIC_LVT_PMC].lvt_masked = 1;
913
914 #ifdef SMP
915         /* The APs should always be started when hwpmc is unloaded. */
916         KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early"));
917 #endif
918         smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
919 #endif
920 }
921
922 static void
923 lapic_calibrate_initcount(struct eventtimer *et, struct lapic *la)
924 {
925         u_long value;
926
927         /* Start off with a divisor of 2 (power on reset default). */
928         lapic_timer_divisor = 2;
929         /* Try to calibrate the local APIC timer. */
930         do {
931                 lapic_timer_set_divisor(lapic_timer_divisor);
932                 lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT);
933                 DELAY(1000000);
934                 value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER);
935                 if (value != APIC_TIMER_MAX_COUNT)
936                         break;
937                 lapic_timer_divisor <<= 1;
938         } while (lapic_timer_divisor <= 128);
939         if (lapic_timer_divisor > 128)
940                 panic("lapic: Divisor too big");
941         if (bootverbose) {
942                 printf("lapic: Divisor %lu, Frequency %lu Hz\n",
943                     lapic_timer_divisor, value);
944         }
945         count_freq = value;
946 }
947
948 static void
949 lapic_calibrate_deadline(struct eventtimer *et, struct lapic *la __unused)
950 {
951
952         if (bootverbose) {
953                 printf("lapic: deadline tsc mode, Frequency %ju Hz\n",
954                     (uintmax_t)tsc_freq);
955         }
956 }
957
958 static void
959 lapic_change_mode(struct eventtimer *et, struct lapic *la,
960     enum lat_timer_mode newmode)
961 {
962
963         if (la->la_timer_mode == newmode)
964                 return;
965         switch (newmode) {
966         case LAT_MODE_PERIODIC:
967                 lapic_timer_set_divisor(lapic_timer_divisor);
968                 et->et_frequency = count_freq;
969                 break;
970         case LAT_MODE_DEADLINE:
971                 et->et_frequency = tsc_freq;
972                 break;
973         case LAT_MODE_ONESHOT:
974                 lapic_timer_set_divisor(lapic_timer_divisor);
975                 et->et_frequency = count_freq;
976                 break;
977         default:
978                 panic("lapic_change_mode %d", newmode);
979         }
980         la->la_timer_mode = newmode;
981         et->et_min_period = (0x00000002LLU << 32) / et->et_frequency;
982         et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency;
983 }
984
985 static int
986 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period)
987 {
988         struct lapic *la;
989
990         la = &lapics[PCPU_GET(apic_id)];
991         if (et->et_frequency == 0) {
992                 lapic_calibrate_initcount(et, la);
993                 if (lapic_timer_tsc_deadline)
994                         lapic_calibrate_deadline(et, la);
995         }
996         if (period != 0) {
997                 lapic_change_mode(et, la, LAT_MODE_PERIODIC);
998                 la->la_timer_period = ((uint32_t)et->et_frequency * period) >>
999                     32;
1000                 lapic_timer_periodic(la);
1001         } else if (lapic_timer_tsc_deadline) {
1002                 lapic_change_mode(et, la, LAT_MODE_DEADLINE);
1003                 la->la_timer_period = (et->et_frequency * first) >> 32;
1004                 lapic_timer_deadline(la);
1005         } else {
1006                 lapic_change_mode(et, la, LAT_MODE_ONESHOT);
1007                 la->la_timer_period = ((uint32_t)et->et_frequency * first) >>
1008                     32;
1009                 lapic_timer_oneshot(la);
1010         }
1011         return (0);
1012 }
1013
1014 static int
1015 lapic_et_stop(struct eventtimer *et)
1016 {
1017         struct lapic *la;
1018
1019         la = &lapics[PCPU_GET(apic_id)];
1020         lapic_timer_stop(la);
1021         la->la_timer_mode = LAT_MODE_UNDEF;
1022         return (0);
1023 }
1024
1025 static void
1026 native_lapic_disable(void)
1027 {
1028         uint32_t value;
1029
1030         /* Software disable the local APIC. */
1031         value = lapic_read32(LAPIC_SVR);
1032         value &= ~APIC_SVR_SWEN;
1033         lapic_write32(LAPIC_SVR, value);
1034 }
1035
1036 static void
1037 lapic_enable(void)
1038 {
1039         uint32_t value;
1040
1041         /* Program the spurious vector to enable the local APIC. */
1042         value = lapic_read32(LAPIC_SVR);
1043         value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
1044         value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT;
1045         if (lapic_eoi_suppression)
1046                 value |= APIC_SVR_EOI_SUPPRESSION;
1047         lapic_write32(LAPIC_SVR, value);
1048 }
1049
1050 /* Reset the local APIC on the BSP during resume. */
1051 static void
1052 lapic_resume(struct pic *pic, bool suspend_cancelled)
1053 {
1054
1055         lapic_setup(0);
1056 }
1057
1058 static int
1059 native_lapic_id(void)
1060 {
1061         uint32_t v;
1062
1063         KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped"));
1064         v = lapic_read32(LAPIC_ID);
1065         if (!x2apic_mode)
1066                 v >>= APIC_ID_SHIFT;
1067         return (v);
1068 }
1069
1070 static int
1071 native_lapic_intr_pending(u_int vector)
1072 {
1073         uint32_t irr;
1074
1075         /*
1076          * The IRR registers are an array of registers each of which
1077          * only describes 32 interrupts in the low 32 bits.  Thus, we
1078          * divide the vector by 32 to get the register index.
1079          * Finally, we modulus the vector by 32 to determine the
1080          * individual bit to test.
1081          */
1082         irr = lapic_read32(LAPIC_IRR0 + vector / 32);
1083         return (irr & 1 << (vector % 32));
1084 }
1085
1086 static void
1087 native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
1088 {
1089         struct lapic *la;
1090
1091         KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
1092             __func__, apic_id));
1093         KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
1094             __func__, cluster));
1095         KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
1096             ("%s: intra cluster id %u too big", __func__, cluster_id));
1097         la = &lapics[apic_id];
1098         la->la_cluster = cluster;
1099         la->la_cluster_id = cluster_id;
1100 }
1101
1102 static int
1103 native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
1104 {
1105
1106         if (pin > APIC_LVT_MAX)
1107                 return (EINVAL);
1108         if (apic_id == APIC_ID_ALL) {
1109                 lvts[pin].lvt_masked = masked;
1110                 if (bootverbose)
1111                         printf("lapic:");
1112         } else {
1113                 KASSERT(lapics[apic_id].la_present,
1114                     ("%s: missing APIC %u", __func__, apic_id));
1115                 lapics[apic_id].la_lvts[pin].lvt_masked = masked;
1116                 lapics[apic_id].la_lvts[pin].lvt_active = 1;
1117                 if (bootverbose)
1118                         printf("lapic%u:", apic_id);
1119         }
1120         if (bootverbose)
1121                 printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
1122         return (0);
1123 }
1124
1125 static int
1126 native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
1127 {
1128         struct lvt *lvt;
1129
1130         if (pin > APIC_LVT_MAX)
1131                 return (EINVAL);
1132         if (apic_id == APIC_ID_ALL) {
1133                 lvt = &lvts[pin];
1134                 if (bootverbose)
1135                         printf("lapic:");
1136         } else {
1137                 KASSERT(lapics[apic_id].la_present,
1138                     ("%s: missing APIC %u", __func__, apic_id));
1139                 lvt = &lapics[apic_id].la_lvts[pin];
1140                 lvt->lvt_active = 1;
1141                 if (bootverbose)
1142                         printf("lapic%u:", apic_id);
1143         }
1144         lvt->lvt_mode = mode;
1145         switch (mode) {
1146         case APIC_LVT_DM_NMI:
1147         case APIC_LVT_DM_SMI:
1148         case APIC_LVT_DM_INIT:
1149         case APIC_LVT_DM_EXTINT:
1150                 lvt->lvt_edgetrigger = 1;
1151                 lvt->lvt_activehi = 1;
1152                 if (mode == APIC_LVT_DM_EXTINT)
1153                         lvt->lvt_masked = 1;
1154                 else
1155                         lvt->lvt_masked = 0;
1156                 break;
1157         default:
1158                 panic("Unsupported delivery mode: 0x%x\n", mode);
1159         }
1160         if (bootverbose) {
1161                 printf(" Routing ");
1162                 switch (mode) {
1163                 case APIC_LVT_DM_NMI:
1164                         printf("NMI");
1165                         break;
1166                 case APIC_LVT_DM_SMI:
1167                         printf("SMI");
1168                         break;
1169                 case APIC_LVT_DM_INIT:
1170                         printf("INIT");
1171                         break;
1172                 case APIC_LVT_DM_EXTINT:
1173                         printf("ExtINT");
1174                         break;
1175                 }
1176                 printf(" -> LINT%u\n", pin);
1177         }
1178         return (0);
1179 }
1180
1181 static int
1182 native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
1183 {
1184
1185         if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM)
1186                 return (EINVAL);
1187         if (apic_id == APIC_ID_ALL) {
1188                 lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
1189                 if (bootverbose)
1190                         printf("lapic:");
1191         } else {
1192                 KASSERT(lapics[apic_id].la_present,
1193                     ("%s: missing APIC %u", __func__, apic_id));
1194                 lapics[apic_id].la_lvts[pin].lvt_active = 1;
1195                 lapics[apic_id].la_lvts[pin].lvt_activehi =
1196                     (pol == INTR_POLARITY_HIGH);
1197                 if (bootverbose)
1198                         printf("lapic%u:", apic_id);
1199         }
1200         if (bootverbose)
1201                 printf(" LINT%u polarity: %s\n", pin,
1202                     pol == INTR_POLARITY_HIGH ? "high" : "low");
1203         return (0);
1204 }
1205
1206 static int
1207 native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin,
1208      enum intr_trigger trigger)
1209 {
1210
1211         if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
1212                 return (EINVAL);
1213         if (apic_id == APIC_ID_ALL) {
1214                 lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
1215                 if (bootverbose)
1216                         printf("lapic:");
1217         } else {
1218                 KASSERT(lapics[apic_id].la_present,
1219                     ("%s: missing APIC %u", __func__, apic_id));
1220                 lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
1221                     (trigger == INTR_TRIGGER_EDGE);
1222                 lapics[apic_id].la_lvts[pin].lvt_active = 1;
1223                 if (bootverbose)
1224                         printf("lapic%u:", apic_id);
1225         }
1226         if (bootverbose)
1227                 printf(" LINT%u trigger: %s\n", pin,
1228                     trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
1229         return (0);
1230 }
1231
1232 /*
1233  * Adjust the TPR of the current CPU so that it blocks all interrupts below
1234  * the passed in vector.
1235  */
1236 static void
1237 lapic_set_tpr(u_int vector)
1238 {
1239 #ifdef CHEAP_TPR
1240         lapic_write32(LAPIC_TPR, vector);
1241 #else
1242         uint32_t tpr;
1243
1244         tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO;
1245         tpr |= vector;
1246         lapic_write32(LAPIC_TPR, tpr);
1247 #endif
1248 }
1249
1250 static void
1251 native_lapic_eoi(void)
1252 {
1253
1254         lapic_write32_nofence(LAPIC_EOI, 0);
1255 }
1256
1257 void
1258 lapic_handle_intr(int vector, struct trapframe *frame)
1259 {
1260         struct intsrc *isrc;
1261
1262         isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id),
1263             vector));
1264         intr_execute_handlers(isrc, frame);
1265 }
1266
1267 void
1268 lapic_handle_timer(struct trapframe *frame)
1269 {
1270         struct lapic *la;
1271         struct trapframe *oldframe;
1272         struct thread *td;
1273
1274         /* Send EOI first thing. */
1275         lapic_eoi();
1276
1277 #if defined(SMP) && !defined(SCHED_ULE)
1278         /*
1279          * Don't do any accounting for the disabled HTT cores, since it
1280          * will provide misleading numbers for the userland.
1281          *
1282          * No locking is necessary here, since even if we lose the race
1283          * when hlt_cpus_mask changes it is not a big deal, really.
1284          *
1285          * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
1286          * and unlike other schedulers it actually schedules threads to
1287          * those CPUs.
1288          */
1289         if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
1290                 return;
1291 #endif
1292
1293         /* Look up our local APIC structure for the tick counters. */
1294         la = &lapics[PCPU_GET(apic_id)];
1295         (*la->la_timer_count)++;
1296         critical_enter();
1297         if (lapic_et.et_active) {
1298                 td = curthread;
1299                 td->td_intr_nesting_level++;
1300                 oldframe = td->td_intr_frame;
1301                 td->td_intr_frame = frame;
1302                 lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg);
1303                 td->td_intr_frame = oldframe;
1304                 td->td_intr_nesting_level--;
1305         }
1306         critical_exit();
1307 }
1308
1309 static void
1310 lapic_timer_set_divisor(u_int divisor)
1311 {
1312
1313         KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
1314         KASSERT(ffs(divisor) <= nitems(lapic_timer_divisors),
1315                 ("lapic: invalid divisor %u", divisor));
1316         lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]);
1317 }
1318
1319 static void
1320 lapic_timer_oneshot(struct lapic *la)
1321 {
1322         uint32_t value;
1323
1324         value = la->lvt_timer_base;
1325         value &= ~(APIC_LVTT_TM | APIC_LVT_M);
1326         value |= APIC_LVTT_TM_ONE_SHOT;
1327         la->lvt_timer_last = value;
1328         lapic_write32(LAPIC_LVT_TIMER, value);
1329         lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period);
1330 }
1331
1332 static void
1333 lapic_timer_oneshot_nointr(struct lapic *la, uint32_t count)
1334 {
1335         uint32_t value;
1336
1337         value = la->lvt_timer_base;
1338         value &= ~APIC_LVTT_TM;
1339         value |= APIC_LVTT_TM_ONE_SHOT | APIC_LVT_M;
1340         la->lvt_timer_last = value;
1341         lapic_write32(LAPIC_LVT_TIMER, value);
1342         lapic_write32(LAPIC_ICR_TIMER, count);
1343 }
1344
1345 static void
1346 lapic_timer_periodic(struct lapic *la)
1347 {
1348         uint32_t value;
1349
1350         value = la->lvt_timer_base;
1351         value &= ~(APIC_LVTT_TM | APIC_LVT_M);
1352         value |= APIC_LVTT_TM_PERIODIC;
1353         la->lvt_timer_last = value;
1354         lapic_write32(LAPIC_LVT_TIMER, value);
1355         lapic_write32(LAPIC_ICR_TIMER, la->la_timer_period);
1356 }
1357
1358 static void
1359 lapic_timer_deadline(struct lapic *la)
1360 {
1361         uint32_t value;
1362
1363         value = la->lvt_timer_base;
1364         value &= ~(APIC_LVTT_TM | APIC_LVT_M);
1365         value |= APIC_LVTT_TM_TSCDLT;
1366         if (value != la->lvt_timer_last) {
1367                 la->lvt_timer_last = value;
1368                 lapic_write32_nofence(LAPIC_LVT_TIMER, value);
1369                 if (!x2apic_mode)
1370                         mfence();
1371         }
1372         wrmsr(MSR_TSC_DEADLINE, la->la_timer_period + rdtsc());
1373 }
1374
1375 static void
1376 lapic_timer_stop(struct lapic *la)
1377 {
1378         uint32_t value;
1379
1380         if (la->la_timer_mode == LAT_MODE_DEADLINE) {
1381                 wrmsr(MSR_TSC_DEADLINE, 0);
1382                 mfence();
1383         } else {
1384                 value = la->lvt_timer_base;
1385                 value &= ~APIC_LVTT_TM;
1386                 value |= APIC_LVT_M;
1387                 la->lvt_timer_last = value;
1388                 lapic_write32(LAPIC_LVT_TIMER, value);
1389         }
1390 }
1391
1392 void
1393 lapic_handle_cmc(void)
1394 {
1395
1396         lapic_eoi();
1397         cmc_intr();
1398 }
1399
1400 /*
1401  * Called from the mca_init() to activate the CMC interrupt if this CPU is
1402  * responsible for monitoring any MC banks for CMC events.  Since mca_init()
1403  * is called prior to lapic_setup() during boot, this just needs to unmask
1404  * this CPU's LVT_CMCI entry.
1405  */
1406 static void
1407 native_lapic_enable_cmc(void)
1408 {
1409         u_int apic_id;
1410
1411 #ifdef DEV_ATPIC
1412         if (!x2apic_mode && lapic_map == NULL)
1413                 return;
1414 #endif
1415         apic_id = PCPU_GET(apic_id);
1416         KASSERT(lapics[apic_id].la_present,
1417             ("%s: missing APIC %u", __func__, apic_id));
1418         lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0;
1419         lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1;
1420         if (bootverbose)
1421                 printf("lapic%u: CMCI unmasked\n", apic_id);
1422 }
1423
1424 static int
1425 native_lapic_enable_mca_elvt(void)
1426 {
1427         u_int apic_id;
1428         uint32_t value;
1429         int elvt_count;
1430
1431 #ifdef DEV_ATPIC
1432         if (lapic_map == NULL)
1433                 return (-1);
1434 #endif
1435
1436         apic_id = PCPU_GET(apic_id);
1437         KASSERT(lapics[apic_id].la_present,
1438             ("%s: missing APIC %u", __func__, apic_id));
1439         elvt_count = amd_read_elvt_count();
1440         if (elvt_count <= APIC_ELVT_MCA)
1441                 return (-1);
1442
1443         value = lapic_read32(LAPIC_EXT_LVT0 + APIC_ELVT_MCA);
1444         if ((value & APIC_LVT_M) == 0) {
1445                 if (bootverbose)
1446                         printf("AMD MCE Thresholding Extended LVT is already active\n");
1447                 return (APIC_ELVT_MCA);
1448         }
1449         lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_masked = 0;
1450         lapics[apic_id].la_elvts[APIC_ELVT_MCA].lvt_active = 1;
1451         if (bootverbose)
1452                 printf("lapic%u: MCE Thresholding ELVT unmasked\n", apic_id);
1453         return (APIC_ELVT_MCA);
1454 }
1455
1456 void
1457 lapic_handle_error(void)
1458 {
1459         uint32_t esr;
1460
1461         /*
1462          * Read the contents of the error status register.  Write to
1463          * the register first before reading from it to force the APIC
1464          * to update its value to indicate any errors that have
1465          * occurred since the previous write to the register.
1466          */
1467         lapic_write32(LAPIC_ESR, 0);
1468         esr = lapic_read32(LAPIC_ESR);
1469
1470         printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
1471         lapic_eoi();
1472 }
1473
1474 static u_int
1475 native_apic_cpuid(u_int apic_id)
1476 {
1477 #ifdef SMP
1478         return apic_cpuids[apic_id];
1479 #else
1480         return 0;
1481 #endif
1482 }
1483
1484 /* Request a free IDT vector to be used by the specified IRQ. */
1485 static u_int
1486 native_apic_alloc_vector(u_int apic_id, u_int irq)
1487 {
1488         u_int vector;
1489
1490         KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
1491
1492         /*
1493          * Search for a free vector.  Currently we just use a very simple
1494          * algorithm to find the first free vector.
1495          */
1496         mtx_lock_spin(&icu_lock);
1497         for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
1498                 if (lapics[apic_id].la_ioint_irqs[vector] != -1)
1499                         continue;
1500                 lapics[apic_id].la_ioint_irqs[vector] = irq;
1501                 mtx_unlock_spin(&icu_lock);
1502                 return (vector + APIC_IO_INTS);
1503         }
1504         mtx_unlock_spin(&icu_lock);
1505         return (0);
1506 }
1507
1508 /*
1509  * Request 'count' free contiguous IDT vectors to be used by 'count'
1510  * IRQs.  'count' must be a power of two and the vectors will be
1511  * aligned on a boundary of 'align'.  If the request cannot be
1512  * satisfied, 0 is returned.
1513  */
1514 static u_int
1515 native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
1516 {
1517         u_int first, run, vector;
1518
1519         KASSERT(powerof2(count), ("bad count"));
1520         KASSERT(powerof2(align), ("bad align"));
1521         KASSERT(align >= count, ("align < count"));
1522 #ifdef INVARIANTS
1523         for (run = 0; run < count; run++)
1524                 KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u",
1525                     irqs[run], run));
1526 #endif
1527
1528         /*
1529          * Search for 'count' free vectors.  As with apic_alloc_vector(),
1530          * this just uses a simple first fit algorithm.
1531          */
1532         run = 0;
1533         first = 0;
1534         mtx_lock_spin(&icu_lock);
1535         for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
1536
1537                 /* Vector is in use, end run. */
1538                 if (lapics[apic_id].la_ioint_irqs[vector] != -1) {
1539                         run = 0;
1540                         first = 0;
1541                         continue;
1542                 }
1543
1544                 /* Start a new run if run == 0 and vector is aligned. */
1545                 if (run == 0) {
1546                         if ((vector & (align - 1)) != 0)
1547                                 continue;
1548                         first = vector;
1549                 }
1550                 run++;
1551
1552                 /* Keep looping if the run isn't long enough yet. */
1553                 if (run < count)
1554                         continue;
1555
1556                 /* Found a run, assign IRQs and return the first vector. */
1557                 for (vector = 0; vector < count; vector++)
1558                         lapics[apic_id].la_ioint_irqs[first + vector] =
1559                             irqs[vector];
1560                 mtx_unlock_spin(&icu_lock);
1561                 return (first + APIC_IO_INTS);
1562         }
1563         mtx_unlock_spin(&icu_lock);
1564         printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
1565         return (0);
1566 }
1567
1568 /*
1569  * Enable a vector for a particular apic_id.  Since all lapics share idt
1570  * entries and ioint_handlers this enables the vector on all lapics.  lapics
1571  * which do not have the vector configured would report spurious interrupts
1572  * should it fire.
1573  */
1574 static void
1575 native_apic_enable_vector(u_int apic_id, u_int vector)
1576 {
1577
1578         KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
1579         KASSERT(ioint_handlers[vector / 32] != NULL,
1580             ("No ISR handler for vector %u", vector));
1581 #ifdef KDTRACE_HOOKS
1582         KASSERT(vector != IDT_DTRACE_RET,
1583             ("Attempt to overwrite DTrace entry"));
1584 #endif
1585         setidt(vector, (pti ? ioint_pti_handlers : ioint_handlers)[vector / 32],
1586             SDT_APIC, SEL_KPL, GSEL_APIC);
1587 }
1588
1589 static void
1590 native_apic_disable_vector(u_int apic_id, u_int vector)
1591 {
1592
1593         KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
1594 #ifdef KDTRACE_HOOKS
1595         KASSERT(vector != IDT_DTRACE_RET,
1596             ("Attempt to overwrite DTrace entry"));
1597 #endif
1598         KASSERT(ioint_handlers[vector / 32] != NULL,
1599             ("No ISR handler for vector %u", vector));
1600 #ifdef notyet
1601         /*
1602          * We can not currently clear the idt entry because other cpus
1603          * may have a valid vector at this offset.
1604          */
1605         setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APICT,
1606             SEL_KPL, GSEL_APIC);
1607 #endif
1608 }
1609
1610 /* Release an APIC vector when it's no longer in use. */
1611 static void
1612 native_apic_free_vector(u_int apic_id, u_int vector, u_int irq)
1613 {
1614         struct thread *td;
1615
1616         KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
1617             vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
1618             ("Vector %u does not map to an IRQ line", vector));
1619         KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
1620         KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] ==
1621             irq, ("IRQ mismatch"));
1622 #ifdef KDTRACE_HOOKS
1623         KASSERT(vector != IDT_DTRACE_RET,
1624             ("Attempt to overwrite DTrace entry"));
1625 #endif
1626
1627         /*
1628          * Bind us to the cpu that owned the vector before freeing it so
1629          * we don't lose an interrupt delivery race.
1630          */
1631         td = curthread;
1632         if (!rebooting) {
1633                 thread_lock(td);
1634                 if (sched_is_bound(td))
1635                         panic("apic_free_vector: Thread already bound.\n");
1636                 sched_bind(td, apic_cpuid(apic_id));
1637                 thread_unlock(td);
1638         }
1639         mtx_lock_spin(&icu_lock);
1640         lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1;
1641         mtx_unlock_spin(&icu_lock);
1642         if (!rebooting) {
1643                 thread_lock(td);
1644                 sched_unbind(td);
1645                 thread_unlock(td);
1646         }
1647 }
1648
1649 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */
1650 static u_int
1651 apic_idt_to_irq(u_int apic_id, u_int vector)
1652 {
1653         int irq;
1654
1655         KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
1656             vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
1657             ("Vector %u does not map to an IRQ line", vector));
1658 #ifdef KDTRACE_HOOKS
1659         KASSERT(vector != IDT_DTRACE_RET,
1660             ("Attempt to overwrite DTrace entry"));
1661 #endif
1662         irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS];
1663         if (irq < 0)
1664                 irq = 0;
1665         return (irq);
1666 }
1667
1668 #ifdef DDB
1669 /*
1670  * Dump data about APIC IDT vector mappings.
1671  */
1672 DB_SHOW_COMMAND(apic, db_show_apic)
1673 {
1674         struct intsrc *isrc;
1675         int i, verbose;
1676         u_int apic_id;
1677         u_int irq;
1678
1679         if (strcmp(modif, "vv") == 0)
1680                 verbose = 2;
1681         else if (strcmp(modif, "v") == 0)
1682                 verbose = 1;
1683         else
1684                 verbose = 0;
1685         for (apic_id = 0; apic_id <= max_apic_id; apic_id++) {
1686                 if (lapics[apic_id].la_present == 0)
1687                         continue;
1688                 db_printf("Interrupts bound to lapic %u\n", apic_id);
1689                 for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
1690                         irq = lapics[apic_id].la_ioint_irqs[i];
1691                         if (irq == -1 || irq == IRQ_SYSCALL)
1692                                 continue;
1693 #ifdef KDTRACE_HOOKS
1694                         if (irq == IRQ_DTRACE_RET)
1695                                 continue;
1696 #endif
1697 #ifdef XENHVM
1698                         if (irq == IRQ_EVTCHN)
1699                                 continue;
1700 #endif
1701                         db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
1702                         if (irq == IRQ_TIMER)
1703                                 db_printf("lapic timer\n");
1704                         else if (irq < NUM_IO_INTS) {
1705                                 isrc = intr_lookup_source(irq);
1706                                 if (isrc == NULL || verbose == 0)
1707                                         db_printf("IRQ %u\n", irq);
1708                                 else
1709                                         db_dump_intr_event(isrc->is_event,
1710                                             verbose == 2);
1711                         } else
1712                                 db_printf("IRQ %u ???\n", irq);
1713                 }
1714         }
1715 }
1716
1717 static void
1718 dump_mask(const char *prefix, uint32_t v, int base)
1719 {
1720         int i, first;
1721
1722         first = 1;
1723         for (i = 0; i < 32; i++)
1724                 if (v & (1 << i)) {
1725                         if (first) {
1726                                 db_printf("%s:", prefix);
1727                                 first = 0;
1728                         }
1729                         db_printf(" %02x", base + i);
1730                 }
1731         if (!first)
1732                 db_printf("\n");
1733 }
1734
1735 /* Show info from the lapic regs for this CPU. */
1736 DB_SHOW_COMMAND(lapic, db_show_lapic)
1737 {
1738         uint32_t v;
1739
1740         db_printf("lapic ID = %d\n", lapic_id());
1741         v = lapic_read32(LAPIC_VERSION);
1742         db_printf("version  = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
1743             v & 0xf);
1744         db_printf("max LVT  = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
1745         v = lapic_read32(LAPIC_SVR);
1746         db_printf("SVR      = %02x (%s)\n", v & APIC_SVR_VECTOR,
1747             v & APIC_SVR_ENABLE ? "enabled" : "disabled");
1748         db_printf("TPR      = %02x\n", lapic_read32(LAPIC_TPR));
1749
1750 #define dump_field(prefix, regn, index)                                 \
1751         dump_mask(__XSTRING(prefix ## index),                           \
1752             lapic_read32(LAPIC_ ## regn ## index),                      \
1753             index * 32)
1754
1755         db_printf("In-service Interrupts:\n");
1756         dump_field(isr, ISR, 0);
1757         dump_field(isr, ISR, 1);
1758         dump_field(isr, ISR, 2);
1759         dump_field(isr, ISR, 3);
1760         dump_field(isr, ISR, 4);
1761         dump_field(isr, ISR, 5);
1762         dump_field(isr, ISR, 6);
1763         dump_field(isr, ISR, 7);
1764
1765         db_printf("TMR Interrupts:\n");
1766         dump_field(tmr, TMR, 0);
1767         dump_field(tmr, TMR, 1);
1768         dump_field(tmr, TMR, 2);
1769         dump_field(tmr, TMR, 3);
1770         dump_field(tmr, TMR, 4);
1771         dump_field(tmr, TMR, 5);
1772         dump_field(tmr, TMR, 6);
1773         dump_field(tmr, TMR, 7);
1774
1775         db_printf("IRR Interrupts:\n");
1776         dump_field(irr, IRR, 0);
1777         dump_field(irr, IRR, 1);
1778         dump_field(irr, IRR, 2);
1779         dump_field(irr, IRR, 3);
1780         dump_field(irr, IRR, 4);
1781         dump_field(irr, IRR, 5);
1782         dump_field(irr, IRR, 6);
1783         dump_field(irr, IRR, 7);
1784
1785 #undef dump_field
1786 }
1787 #endif
1788
1789 /*
1790  * APIC probing support code.  This includes code to manage enumerators.
1791  */
1792
1793 static SLIST_HEAD(, apic_enumerator) enumerators =
1794         SLIST_HEAD_INITIALIZER(enumerators);
1795 static struct apic_enumerator *best_enum;
1796
1797 void
1798 apic_register_enumerator(struct apic_enumerator *enumerator)
1799 {
1800 #ifdef INVARIANTS
1801         struct apic_enumerator *apic_enum;
1802
1803         SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
1804                 if (apic_enum == enumerator)
1805                         panic("%s: Duplicate register of %s", __func__,
1806                             enumerator->apic_name);
1807         }
1808 #endif
1809         SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
1810 }
1811
1812 /*
1813  * We have to look for CPU's very, very early because certain subsystems
1814  * want to know how many CPU's we have extremely early on in the boot
1815  * process.
1816  */
1817 static void
1818 apic_init(void *dummy __unused)
1819 {
1820         struct apic_enumerator *enumerator;
1821         int retval, best;
1822
1823         /* We only support built in local APICs. */
1824         if (!(cpu_feature & CPUID_APIC))
1825                 return;
1826
1827         /* Don't probe if APIC mode is disabled. */
1828         if (resource_disabled("apic", 0))
1829                 return;
1830
1831         /* Probe all the enumerators to find the best match. */
1832         best_enum = NULL;
1833         best = 0;
1834         SLIST_FOREACH(enumerator, &enumerators, apic_next) {
1835                 retval = enumerator->apic_probe();
1836                 if (retval > 0)
1837                         continue;
1838                 if (best_enum == NULL || best < retval) {
1839                         best_enum = enumerator;
1840                         best = retval;
1841                 }
1842         }
1843         if (best_enum == NULL) {
1844                 if (bootverbose)
1845                         printf("APIC: Could not find any APICs.\n");
1846 #ifndef DEV_ATPIC
1847                 panic("running without device atpic requires a local APIC");
1848 #endif
1849                 return;
1850         }
1851
1852         if (bootverbose)
1853                 printf("APIC: Using the %s enumerator.\n",
1854                     best_enum->apic_name);
1855
1856 #ifdef I686_CPU
1857         /*
1858          * To work around an errata, we disable the local APIC on some
1859          * CPUs during early startup.  We need to turn the local APIC back
1860          * on on such CPUs now.
1861          */
1862         ppro_reenable_apic();
1863 #endif
1864
1865         /* Probe the CPU's in the system. */
1866         retval = best_enum->apic_probe_cpus();
1867         if (retval != 0)
1868                 printf("%s: Failed to probe CPUs: returned %d\n",
1869                     best_enum->apic_name, retval);
1870
1871 }
1872 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL);
1873
1874 /*
1875  * Setup the local APIC.  We have to do this prior to starting up the APs
1876  * in the SMP case.
1877  */
1878 static void
1879 apic_setup_local(void *dummy __unused)
1880 {
1881         int retval;
1882
1883         if (best_enum == NULL)
1884                 return;
1885
1886         lapics = malloc(sizeof(*lapics) * (max_apic_id + 1), M_LAPIC,
1887             M_WAITOK | M_ZERO);
1888
1889         /* Initialize the local APIC. */
1890         retval = best_enum->apic_setup_local();
1891         if (retval != 0)
1892                 printf("%s: Failed to setup the local APIC: returned %d\n",
1893                     best_enum->apic_name, retval);
1894 }
1895 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL);
1896
1897 /*
1898  * Setup the I/O APICs.
1899  */
1900 static void
1901 apic_setup_io(void *dummy __unused)
1902 {
1903         int retval;
1904
1905         if (best_enum == NULL)
1906                 return;
1907
1908         /*
1909          * Local APIC must be registered before other PICs and pseudo PICs
1910          * for proper suspend/resume order.
1911          */
1912         intr_register_pic(&lapic_pic);
1913
1914         retval = best_enum->apic_setup_io();
1915         if (retval != 0)
1916                 printf("%s: Failed to setup I/O APICs: returned %d\n",
1917                     best_enum->apic_name, retval);
1918
1919         /*
1920          * Finish setting up the local APIC on the BSP once we know
1921          * how to properly program the LINT pins.  In particular, this
1922          * enables the EOI suppression mode, if LAPIC support it and
1923          * user did not disabled the mode.
1924          */
1925         lapic_setup(1);
1926         if (bootverbose)
1927                 lapic_dump("BSP");
1928
1929         /* Enable the MSI "pic". */
1930         init_ops.msi_init();
1931 }
1932 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL);
1933
1934 #ifdef SMP
1935 /*
1936  * Inter Processor Interrupt functions.  The lapic_ipi_*() functions are
1937  * private to the MD code.  The public interface for the rest of the
1938  * kernel is defined in mp_machdep.c.
1939  */
1940
1941 /*
1942  * Wait delay microseconds for IPI to be sent.  If delay is -1, we
1943  * wait forever.
1944  */
1945 static int
1946 native_lapic_ipi_wait(int delay)
1947 {
1948         uint64_t rx;
1949
1950         /* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */
1951         if (x2apic_mode)
1952                 return (1);
1953
1954         for (rx = 0; delay == -1 || rx < lapic_ipi_wait_mult * delay; rx++) {
1955                 if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) ==
1956                     APIC_DELSTAT_IDLE)
1957                         return (1);
1958                 ia32_pause();
1959         }
1960         return (0);
1961 }
1962
1963 static void
1964 native_lapic_ipi_raw(register_t icrlo, u_int dest)
1965 {
1966         uint64_t icr;
1967         uint32_t vhi, vlo;
1968         register_t saveintr;
1969
1970         /* XXX: Need more sanity checking of icrlo? */
1971         KASSERT(x2apic_mode || lapic_map != NULL,
1972             ("%s called too early", __func__));
1973         KASSERT(x2apic_mode ||
1974             (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
1975             ("%s: invalid dest field", __func__));
1976         KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
1977             ("%s: reserved bits set in ICR LO register", __func__));
1978
1979         /* Set destination in ICR HI register if it is being used. */
1980         if (!x2apic_mode) {
1981                 saveintr = intr_disable();
1982                 icr = lapic_read_icr();
1983         }
1984
1985         if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
1986                 if (x2apic_mode) {
1987                         vhi = dest;
1988                 } else {
1989                         vhi = icr >> 32;
1990                         vhi &= ~APIC_ID_MASK;
1991                         vhi |= dest << APIC_ID_SHIFT;
1992                 }
1993         } else {
1994                 vhi = 0;
1995         }
1996
1997         /* Program the contents of the IPI and dispatch it. */
1998         if (x2apic_mode) {
1999                 vlo = icrlo;
2000         } else {
2001                 vlo = icr;
2002                 vlo &= APIC_ICRLO_RESV_MASK;
2003                 vlo |= icrlo;
2004         }
2005         lapic_write_icr(vhi, vlo);
2006         if (!x2apic_mode)
2007                 intr_restore(saveintr);
2008 }
2009
2010 #define BEFORE_SPIN     50000
2011 #ifdef DETECT_DEADLOCK
2012 #define AFTER_SPIN      50
2013 #endif
2014
2015 static void
2016 native_lapic_ipi_vectored(u_int vector, int dest)
2017 {
2018         register_t icrlo, destfield;
2019
2020         KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
2021             ("%s: invalid vector %d", __func__, vector));
2022
2023         icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT;
2024
2025         /*
2026          * NMI IPIs are just fake vectors used to send a NMI.  Use special rules
2027          * regarding NMIs if passed, otherwise specify the vector.
2028          */
2029         if (vector >= IPI_NMI_FIRST)
2030                 icrlo |= APIC_DELMODE_NMI;
2031         else
2032                 icrlo |= vector | APIC_DELMODE_FIXED;
2033         destfield = 0;
2034         switch (dest) {
2035         case APIC_IPI_DEST_SELF:
2036                 icrlo |= APIC_DEST_SELF;
2037                 break;
2038         case APIC_IPI_DEST_ALL:
2039                 icrlo |= APIC_DEST_ALLISELF;
2040                 break;
2041         case APIC_IPI_DEST_OTHERS:
2042                 icrlo |= APIC_DEST_ALLESELF;
2043                 break;
2044         default:
2045                 KASSERT(x2apic_mode ||
2046                     (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
2047                     ("%s: invalid destination 0x%x", __func__, dest));
2048                 destfield = dest;
2049         }
2050
2051         /* Wait for an earlier IPI to finish. */
2052         if (!lapic_ipi_wait(BEFORE_SPIN)) {
2053                 if (panicstr != NULL)
2054                         return;
2055                 else
2056                         panic("APIC: Previous IPI is stuck");
2057         }
2058
2059         lapic_ipi_raw(icrlo, destfield);
2060
2061 #ifdef DETECT_DEADLOCK
2062         /* Wait for IPI to be delivered. */
2063         if (!lapic_ipi_wait(AFTER_SPIN)) {
2064 #ifdef needsattention
2065                 /*
2066                  * XXX FIXME:
2067                  *
2068                  * The above function waits for the message to actually be
2069                  * delivered.  It breaks out after an arbitrary timeout
2070                  * since the message should eventually be delivered (at
2071                  * least in theory) and that if it wasn't we would catch
2072                  * the failure with the check above when the next IPI is
2073                  * sent.
2074                  *
2075                  * We could skip this wait entirely, EXCEPT it probably
2076                  * protects us from other routines that assume that the
2077                  * message was delivered and acted upon when this function
2078                  * returns.
2079                  */
2080                 printf("APIC: IPI might be stuck\n");
2081 #else /* !needsattention */
2082                 /* Wait until mesage is sent without a timeout. */
2083                 while (lapic_read_icr_lo() & APIC_DELSTAT_PEND)
2084                         ia32_pause();
2085 #endif /* needsattention */
2086         }
2087 #endif /* DETECT_DEADLOCK */
2088 }
2089
2090 #endif /* SMP */
2091
2092 /*
2093  * Since the IDT is shared by all CPUs the IPI slot update needs to be globally
2094  * visible.
2095  *
2096  * Consider the case where an IPI is generated immediately after allocation:
2097  *     vector = lapic_ipi_alloc(ipifunc);
2098  *     ipi_selected(other_cpus, vector);
2099  *
2100  * In xAPIC mode a write to ICR_LO has serializing semantics because the
2101  * APIC page is mapped as an uncached region. In x2APIC mode there is an
2102  * explicit 'mfence' before the ICR MSR is written. Therefore in both cases
2103  * the IDT slot update is globally visible before the IPI is delivered.
2104  */
2105 static int
2106 native_lapic_ipi_alloc(inthand_t *ipifunc)
2107 {
2108         struct gate_descriptor *ip;
2109         long func;
2110         int idx, vector;
2111
2112         KASSERT(ipifunc != &IDTVEC(rsvd) && ipifunc != &IDTVEC(rsvd_pti),
2113             ("invalid ipifunc %p", ipifunc));
2114
2115         vector = -1;
2116         mtx_lock_spin(&icu_lock);
2117         for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) {
2118                 ip = &idt[idx];
2119                 func = (ip->gd_hioffset << 16) | ip->gd_looffset;
2120                 if (func == (uintptr_t)&IDTVEC(rsvd)) {
2121                         vector = idx;
2122                         setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC);
2123                         break;
2124                 }
2125         }
2126         mtx_unlock_spin(&icu_lock);
2127         return (vector);
2128 }
2129
2130 static void
2131 native_lapic_ipi_free(int vector)
2132 {
2133         struct gate_descriptor *ip;
2134         long func;
2135
2136         KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST,
2137             ("%s: invalid vector %d", __func__, vector));
2138
2139         mtx_lock_spin(&icu_lock);
2140         ip = &idt[vector];
2141         func = (ip->gd_hioffset << 16) | ip->gd_looffset;
2142         KASSERT(func != (uintptr_t)&IDTVEC(rsvd) &&
2143             func != (uintptr_t)&IDTVEC(rsvd_pti),
2144             ("invalid idtfunc %#lx", func));
2145         setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APICT,
2146             SEL_KPL, GSEL_APIC);
2147         mtx_unlock_spin(&icu_lock);
2148 }