2 * Copyright (c) 2015-2016 The FreeBSD Foundation
5 * This software was developed by Andrew Turner under
6 * sponsorship from the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include "opt_kstack_pages.h"
34 #include "opt_platform.h"
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
39 #include <sys/param.h>
40 #include <sys/systm.h>
44 #include <sys/domainset.h>
45 #include <sys/kernel.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/mutex.h>
52 #include <sys/sched.h>
57 #include <vm/vm_extern.h>
58 #include <vm/vm_kern.h>
59 #include <vm/vm_map.h>
61 #include <machine/machdep.h>
62 #include <machine/debug_monitor.h>
63 #include <machine/intr.h>
64 #include <machine/smp.h>
66 #include <machine/vfp.h>
70 #include <contrib/dev/acpica/include/acpi.h>
71 #include <dev/acpica/acpivar.h>
75 #include <dev/ofw/openfirm.h>
76 #include <dev/ofw/ofw_bus.h>
77 #include <dev/ofw/ofw_bus_subr.h>
78 #include <dev/ofw/ofw_cpu.h>
81 #include <dev/psci/psci.h>
85 #define MP_QUIRK_CPULIST 0x01 /* The list of cpus may be wrong, */
86 /* don't panic if one fails to start */
87 static uint32_t mp_quirks;
94 { "arm,foundation-aarch64", MP_QUIRK_CPULIST },
95 { "arm,fvp-base", MP_QUIRK_CPULIST },
96 /* This is incorrect in some DTS files */
97 { "arm,vfp-base", MP_QUIRK_CPULIST },
102 typedef void intr_ipi_send_t(void *, cpuset_t, u_int);
103 typedef void intr_ipi_handler_t(void *);
105 #define INTR_IPI_NAMELEN (MAXCOMLEN + 1)
107 intr_ipi_handler_t * ii_handler;
108 void * ii_handler_arg;
109 intr_ipi_send_t * ii_send;
111 char ii_name[INTR_IPI_NAMELEN];
115 static struct intr_ipi ipi_sources[INTR_IPI_COUNT];
117 static struct intr_ipi *intr_ipi_lookup(u_int);
118 static void intr_pic_ipi_setup(u_int, const char *, intr_ipi_handler_t *,
121 static void ipi_ast(void *);
122 static void ipi_hardclock(void *);
123 static void ipi_preempt(void *);
124 static void ipi_rendezvous(void *);
125 static void ipi_stop(void *);
127 struct pcb stoppcbs[MAXCPU];
130 static u_int fdt_cpuid;
133 void mpentry(unsigned long cpuid);
134 void init_secondary(uint64_t);
136 /* Synchronize AP startup. */
137 static struct mtx ap_boot_mtx;
139 /* Stacks for AP initialization, discarded once idle threads are started. */
141 static void *bootstacks[MAXCPU];
143 /* Count of started APs, used to synchronize access to bootstack. */
144 static volatile int aps_started;
146 /* Set to 1 once we're ready to let the APs out of the pen. */
147 static volatile int aps_ready;
149 /* Temporary variables for init_secondary() */
150 void *dpcpu[MAXCPU - 1];
153 is_boot_cpu(uint64_t target_cpu)
156 return (cpuid_to_pcpu[0]->pc_mpidr == (target_cpu & CPU_AFF_MASK));
160 release_aps(void *dummy __unused)
164 /* Only release CPUs if they exist */
168 intr_pic_ipi_setup(IPI_AST, "ast", ipi_ast, NULL);
169 intr_pic_ipi_setup(IPI_PREEMPT, "preempt", ipi_preempt, NULL);
170 intr_pic_ipi_setup(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL);
171 intr_pic_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL);
172 intr_pic_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL);
173 intr_pic_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL);
175 atomic_store_rel_int(&aps_ready, 1);
176 /* Wake up the other CPUs */
182 printf("Release APs...");
185 for (i = 0; i < 2000; i++) {
191 * Don't time out while we are making progress. Some large
192 * systems can take a while to start all CPUs.
194 if (smp_cpus > started) {
201 printf("APs not started\n");
203 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
206 init_secondary(uint64_t cpu)
213 * Verify that the value passed in 'cpu' argument (aka context_id) is
214 * valid. Some older U-Boot based PSCI implementations are buggy,
215 * they can pass random value in it.
217 mpidr = READ_SPECIALREG(mpidr_el1) & CPU_AFF_MASK;
218 if (cpu >= MAXCPU || cpuid_to_pcpu[cpu] == NULL ||
219 cpuid_to_pcpu[cpu]->pc_mpidr != mpidr) {
220 for (cpu = 0; cpu < mp_maxid; cpu++)
221 if (cpuid_to_pcpu[cpu] != NULL &&
222 cpuid_to_pcpu[cpu]->pc_mpidr == mpidr)
225 panic("MPIDR for this CPU is not in pcpu table");
228 pcpup = cpuid_to_pcpu[cpu];
230 * Set the pcpu pointer with a backup in tpidr_el1 to be
231 * loaded when entering the kernel from userland.
235 "msr tpidr_el1, %0" :: "r"(pcpup));
238 * Identify current CPU. This is necessary to setup
239 * affinity registers and to provide support for
240 * runtime chip identification.
242 * We need this before signalling the CPU is ready to
243 * let the boot CPU use the results.
245 pcpup->pc_midr = get_midr();
248 /* Ensure the stores in identify_cpu have completed */
249 atomic_thread_fence_acq_rel();
251 /* Signal the BSP and spin until it has released all APs. */
252 atomic_add_int(&aps_started, 1);
253 while (!atomic_load_int(&aps_ready))
254 __asm __volatile("wfe");
256 /* Initialize curthread */
257 KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
258 pcpup->pc_curthread = pcpup->pc_idlethread;
260 /* Initialize curpmap to match TTBR0's current setting. */
261 pmap0 = vmspace_pmap(&vmspace0);
262 KASSERT(pmap_to_ttbr0(pmap0) == READ_SPECIALREG(ttbr0_el1),
263 ("pmap0 doesn't match cpu %ld's ttbr0", cpu));
264 pcpup->pc_curpmap = pmap0;
266 install_cpu_errata();
268 intr_pic_init_secondary();
270 /* Start per-CPU event timers. */
280 mtx_lock_spin(&ap_boot_mtx);
281 atomic_add_rel_32(&smp_cpus, 1);
282 if (smp_cpus == mp_ncpus) {
283 /* enable IPI's, tlb shootdown, freezes etc */
284 atomic_store_rel_int(&smp_started, 1);
286 mtx_unlock_spin(&ap_boot_mtx);
291 * Assert that smp_after_idle_runnable condition is reasonable.
293 MPASS(PCPU_GET(curpcb) == NULL);
295 /* Enter the scheduler */
298 panic("scheduler returned us to init_secondary");
303 smp_after_idle_runnable(void *arg __unused)
308 for (cpu = 1; cpu < mp_ncpus; cpu++) {
309 if (bootstacks[cpu] != NULL) {
311 while (atomic_load_ptr(&pc->pc_curpcb) == NULL)
313 kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE);
317 SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY,
318 smp_after_idle_runnable, NULL);
321 * Send IPI thru interrupt controller.
324 pic_ipi_send(void *arg, cpuset_t cpus, u_int ipi)
327 KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
330 * Ensure that this CPU's stores will be visible to IPI
331 * recipients before starting to send the interrupts.
335 PIC_IPI_SEND(intr_irq_root_dev, arg, cpus, ipi);
339 * Setup IPI handler on interrupt controller.
344 intr_pic_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand,
347 struct intr_irqsrc *isrc;
351 KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
352 KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi));
354 error = PIC_IPI_SETUP(intr_irq_root_dev, ipi, &isrc);
358 isrc->isrc_handlers++;
360 ii = intr_ipi_lookup(ipi);
361 KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi));
363 ii->ii_handler = hand;
364 ii->ii_handler_arg = arg;
365 ii->ii_send = pic_ipi_send;
366 ii->ii_send_arg = isrc;
367 strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN);
368 ii->ii_count = intr_ipi_setup_counters(name);
370 PIC_ENABLE_INTR(intr_irq_root_dev, isrc);
374 intr_ipi_send(cpuset_t cpus, u_int ipi)
378 ii = intr_ipi_lookup(ipi);
379 if (ii->ii_count == NULL)
380 panic("%s: not setup IPI %u", __func__, ipi);
382 ii->ii_send(ii->ii_send_arg, cpus, ipi);
386 ipi_ast(void *dummy __unused)
389 CTR0(KTR_SMP, "IPI_AST");
393 ipi_hardclock(void *dummy __unused)
396 CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
401 ipi_preempt(void *dummy __unused)
403 CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
404 sched_preempt(curthread);
408 ipi_rendezvous(void *dummy __unused)
411 CTR0(KTR_SMP, "IPI_RENDEZVOUS");
412 smp_rendezvous_action();
416 ipi_stop(void *dummy __unused)
420 CTR0(KTR_SMP, "IPI_STOP");
422 cpu = PCPU_GET(cpuid);
423 savectx(&stoppcbs[cpu]);
425 /* Indicate we are stopped */
426 CPU_SET_ATOMIC(cpu, &stopped_cpus);
428 /* Wait for restart */
429 while (!CPU_ISSET(cpu, &started_cpus))
433 dbg_register_sync(NULL);
436 CPU_CLR_ATOMIC(cpu, &started_cpus);
437 CPU_CLR_ATOMIC(cpu, &stopped_cpus);
438 CTR0(KTR_SMP, "IPI_STOP (restart)");
444 struct cpu_group *dom, *root;
447 root = smp_topo_alloc(1);
448 dom = smp_topo_alloc(vm_ndomains);
450 root->cg_parent = NULL;
451 root->cg_child = dom;
452 CPU_COPY(&all_cpus, &root->cg_mask);
453 root->cg_count = mp_ncpus;
454 root->cg_children = vm_ndomains;
455 root->cg_level = CG_SHARE_NONE;
459 * Redundant layers will be collapsed by the caller so we don't need a
460 * special case for a single domain.
462 for (i = 0; i < vm_ndomains; i++, dom++) {
463 dom->cg_parent = root;
464 dom->cg_child = NULL;
465 CPU_COPY(&cpuset_domain[i], &dom->cg_mask);
466 dom->cg_count = CPU_COUNT(&dom->cg_mask);
467 dom->cg_children = 0;
468 dom->cg_level = CG_SHARE_L3;
475 /* Determine if we running MP machine */
480 /* ARM64TODO: Read the u bit of mpidr_el1 to determine this */
485 * Starts a given CPU. If the CPU is already running, i.e. it is the boot CPU,
486 * do nothing. Returns true if the CPU is present and running.
489 start_cpu(u_int cpuid, uint64_t target_cpu, int domain)
495 /* Check we are able to start this cpu */
496 if (cpuid > mp_maxid)
500 if (is_boot_cpu(target_cpu))
503 KASSERT(cpuid < MAXCPU, ("Too many CPUs"));
505 pcpup = (void *)kmem_malloc_domainset(DOMAINSET_PREF(domain),
506 sizeof(*pcpup), M_WAITOK | M_ZERO);
507 pcpu_init(pcpup, cpuid, sizeof(struct pcpu));
508 pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK;
510 dpcpu[cpuid - 1] = (void *)kmem_malloc_domainset(
511 DOMAINSET_PREF(domain), DPCPU_SIZE, M_WAITOK | M_ZERO);
512 dpcpu_init(dpcpu[cpuid - 1], cpuid);
514 bootstacks[cpuid] = (void *)kmem_malloc_domainset(
515 DOMAINSET_PREF(domain), PAGE_SIZE, M_WAITOK | M_ZERO);
517 naps = atomic_load_int(&aps_started);
518 bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE;
520 printf("Starting CPU %u (%lx)\n", cpuid, target_cpu);
521 pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry);
522 err = psci_cpu_on(target_cpu, pa, cpuid);
523 if (err != PSCI_RETVAL_SUCCESS) {
525 * Panic here if INVARIANTS are enabled and PSCI failed to
526 * start the requested CPU. psci_cpu_on() returns PSCI_MISSING
527 * to indicate we are unable to use it to start the given CPU.
529 KASSERT(err == PSCI_MISSING ||
530 (mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST,
531 ("Failed to start CPU %u (%lx), error %d\n",
532 cpuid, target_cpu, err));
535 kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE);
536 dpcpu[cpuid - 1] = NULL;
537 kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE);
538 bootstacks[cpuid] = NULL;
543 /* Wait for the AP to switch to its boot stack. */
544 while (atomic_load_int(&aps_started) < naps + 1)
546 CPU_SET(cpuid, &all_cpus);
553 madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
555 ACPI_MADT_GENERIC_INTERRUPT *intr;
560 switch(entry->Type) {
561 case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
562 intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry;
565 if (is_boot_cpu(intr->ArmMpidr))
571 domain = acpi_pxm_get_cpu_locality(*cpuid);
575 if (start_cpu(id, intr->ArmMpidr, domain)) {
576 MPASS(cpuid_to_pcpu[id] != NULL);
577 cpuid_to_pcpu[id]->pc_acpi_id = intr->Uid;
579 * Don't increment for the boot CPU, its CPU ID is
582 if (!is_boot_cpu(intr->ArmMpidr))
595 ACPI_TABLE_MADT *madt;
599 physaddr = acpi_find_table(ACPI_SIG_MADT);
603 madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
605 printf("Unable to map the MADT, not starting APs\n");
608 /* Boot CPU is always 0 */
610 acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
611 madt_handler, &cpuid);
613 acpi_unmap_table(madt);
616 acpi_pxm_set_cpu_locality();
623 start_cpu_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
630 if (addr_size == 2) {
632 target_cpu |= reg[1];
635 if (is_boot_cpu(target_cpu))
640 if (!start_cpu(cpuid, target_cpu, 0))
644 * Don't increment for the boot CPU, its CPU ID is reserved.
646 if (!is_boot_cpu(target_cpu))
649 /* Try to read the numa node of this cpu */
650 if (vm_ndomains == 1 ||
651 OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) <= 0)
653 cpuid_to_pcpu[cpuid]->pc_domain = domain;
654 if (domain < MAXMEMDOM)
655 CPU_SET(cpuid, &cpuset_domain[domain]);
665 for (i = 0; fdt_quirks[i].compat != NULL; i++) {
666 if (ofw_bus_node_is_compatible(node,
667 fdt_quirks[i].compat) != 0) {
668 mp_quirks = fdt_quirks[i].quirks;
672 ofw_cpu_early_foreach(start_cpu_fdt, true);
676 /* Initialize and fire up non-boot processors */
680 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
682 /* CPU 0 is always boot CPU. */
683 CPU_SET(0, &all_cpus);
684 cpuid_to_pcpu[0]->pc_mpidr = READ_SPECIALREG(mpidr_el1) & CPU_AFF_MASK;
686 switch(arm64_bus_method) {
689 mp_quirks = MP_QUIRK_CPULIST;
703 /* Introduce rest of cores to the world */
705 cpu_mp_announce(void)
711 cpu_count_acpi_handler(ACPI_SUBTABLE_HEADER *entry, void *arg)
713 ACPI_MADT_GENERIC_INTERRUPT *intr;
716 switch(entry->Type) {
717 case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
718 intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry;
729 ACPI_TABLE_MADT *madt;
733 physaddr = acpi_find_table(ACPI_SIG_MADT);
737 madt = acpi_map_table(physaddr, ACPI_SIG_MADT);
739 printf("Unable to map the MADT, not starting APs\n");
744 acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length,
745 cpu_count_acpi_handler, &cores);
747 acpi_unmap_table(madt);
754 cpu_mp_setmaxid(void)
761 switch(arm64_bus_method) {
764 cores = cpu_count_acpi();
766 cores = MIN(cores, MAXCPU);
768 printf("Found %d CPUs in the ACPI tables\n",
771 mp_maxid = cores - 1;
777 cores = ofw_cpu_early_foreach(NULL, false);
779 cores = MIN(cores, MAXCPU);
781 printf("Found %d CPUs in the device tree\n",
784 mp_maxid = cores - 1;
790 printf("No CPU data, limiting to 1 core\n");
794 if (TUNABLE_INT_FETCH("hw.ncpu", &cores)) {
795 if (cores > 0 && cores < mp_ncpus) {
797 mp_maxid = cores - 1;
805 static struct intr_ipi *
806 intr_ipi_lookup(u_int ipi)
809 if (ipi >= INTR_IPI_COUNT)
810 panic("%s: no such IPI %u", __func__, ipi);
812 return (&ipi_sources[ipi]);
816 * interrupt controller dispatch function for IPIs. It should
817 * be called straight from the interrupt controller, when associated
818 * interrupt source is learned. Or from anybody who has an interrupt
822 intr_ipi_dispatch(u_int ipi, struct trapframe *tf)
827 ii = intr_ipi_lookup(ipi);
828 if (ii->ii_count == NULL)
829 panic("%s: not setup IPI %u", __func__, ipi);
831 intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid));
834 * Supply ipi filter with trapframe argument
835 * if none is registered.
837 arg = ii->ii_handler_arg != NULL ? ii->ii_handler_arg : tf;
843 * Map IPI into interrupt controller.
848 ipi_map(struct intr_irqsrc *isrc, u_int ipi)
853 if (ipi >= INTR_IPI_COUNT)
854 panic("%s: no such IPI %u", __func__, ipi);
856 KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
858 isrc->isrc_type = INTR_ISRCT_NAMESPACE;
859 isrc->isrc_nspc_type = INTR_IRQ_NSPC_IPI;
860 isrc->isrc_nspc_num = ipi_next_num;
862 error = PIC_REGISTER(intr_irq_root_dev, isrc, &is_percpu);
864 isrc->isrc_dev = intr_irq_root_dev;
871 * Setup IPI handler to interrupt source.
873 * Note that there could be more ways how to send and receive IPIs
874 * on a platform like fast interrupts for example. In that case,
875 * one can call this function with ASIF_NOALLOC flag set and then
876 * call intr_ipi_dispatch() when appropriate.
881 intr_ipi_set_handler(u_int ipi, const char *name, intr_ipi_filter_t *filter,
882 void *arg, u_int flags)
884 struct intr_irqsrc *isrc;
890 isrc = intr_ipi_lookup(ipi);
891 if (isrc->isrc_ipifilter != NULL)
894 if ((flags & AISHF_NOALLOC) == 0) {
895 error = ipi_map(isrc, ipi);
900 isrc->isrc_ipifilter = filter;
901 isrc->isrc_arg = arg;
902 isrc->isrc_handlers = 1;
903 isrc->isrc_count = intr_ipi_setup_counters(name);
904 isrc->isrc_index = 0; /* it should not be used in IPI case */
906 if (isrc->isrc_dev != NULL) {
907 PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
908 PIC_ENABLE_SOURCE(isrc->isrc_dev, isrc);
916 ipi_all_but_self(u_int ipi)
921 CPU_CLR(PCPU_GET(cpuid), &cpus);
922 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
923 intr_ipi_send(cpus, ipi);
927 ipi_cpu(int cpu, u_int ipi)
934 CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi);
935 intr_ipi_send(cpus, ipi);
939 ipi_selected(cpuset_t cpus, u_int ipi)
942 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
943 intr_ipi_send(cpus, ipi);