2 * Copyright (c) 1996, by Steve Passe
3 * Copyright (c) 2008, by Kip Macy
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. The name of the developer may NOT be used to endorse or promote products
12 * derived from this software without specific prior written permission.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
32 #include "opt_kstack_pages.h"
33 #include "opt_mp_watchdog.h"
34 #include "opt_sched.h"
39 #error How did you get here?
43 #error The apic device is required for SMP, add "device apic" to your config file.
45 #if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
46 #error SMP not supported with CPU_DISABLE_CMPXCHG
50 #include <sys/param.h>
51 #include <sys/systm.h>
53 #include <sys/cons.h> /* cngetc() */
57 #include <sys/kernel.h>
60 #include <sys/malloc.h>
61 #include <sys/memrange.h>
62 #include <sys/mutex.h>
65 #include <sys/sched.h>
67 #include <sys/sysctl.h>
70 #include <vm/vm_param.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_extern.h>
74 #include <vm/vm_page.h>
76 #include <machine/apicreg.h>
77 #include <machine/md_var.h>
78 #include <machine/mp_watchdog.h>
79 #include <machine/pcb.h>
80 #include <machine/psl.h>
81 #include <machine/smp.h>
82 #include <machine/specialreg.h>
83 #include <machine/pcpu.h>
87 #include <machine/xen/xen-os.h>
88 #include <xen/evtchn.h>
89 #include <xen/xen_intr.h>
90 #include <xen/hypervisor.h>
91 #include <xen/interface/vcpu.h>
94 int mp_naps; /* # of Applications processors */
95 int boot_cpu_id = -1; /* designated BSP */
97 extern struct pcpu __pcpu[];
100 static union descriptor *bootAPgdt;
102 static char resched_name[NR_CPUS][15];
103 static char callfunc_name[NR_CPUS][15];
105 /* Free these after use */
106 void *bootstacks[MAXCPU];
108 /* Hotwire a 0->4MB V==P mapping */
109 extern pt_entry_t *KPTphys;
111 struct pcb stoppcbs[MAXCPU];
113 /* Variables needed for SMP tlb shootdown. */
114 vm_offset_t smp_tlb_addr1;
115 vm_offset_t smp_tlb_addr2;
116 volatile int smp_tlb_wait;
118 typedef void call_data_func_t(uintptr_t , uintptr_t);
120 static u_int logical_cpus;
122 /* used to hold the AP's until we are ready to release them */
123 static struct mtx ap_boot_mtx;
125 /* Set to 1 once we're ready to let the APs out of the pen. */
126 static volatile int aps_ready = 0;
129 * Store data from cpu_add() until later in the boot when we actually setup
136 } static cpu_info[MAX_APIC_ID + 1];
137 int cpu_apic_ids[MAXCPU];
138 int apic_cpuids[MAX_APIC_ID + 1];
140 /* Holds pending bitmap based IPIs per CPU */
141 static volatile u_int cpu_ipi_pending[MAXCPU];
143 static int cpu_logical;
144 static int cpu_cores;
146 static void assign_cpu_ids(void);
147 static void set_interrupt_apic_ids(void);
148 int start_all_aps(void);
149 static int start_ap(int apic_id);
150 static void release_aps(void *dummy);
152 static u_int hyperthreading_cpus;
153 static cpumask_t hyperthreading_cpus_mask;
155 extern void Xhypervisor_callback(void);
156 extern void failsafe_callback(void);
157 extern void pmap_lazyfix_action(void);
164 if (cpu_logical == 0)
166 if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
167 printf("WARNING: Non-uniform processors.\n");
168 printf("WARNING: Using suboptimal topology.\n");
169 return (smp_topo_none());
172 * No multi-core or hyper-threaded.
174 if (cpu_logical * cpu_cores == 1)
175 return (smp_topo_none());
177 * Only HTT no multi-core.
179 if (cpu_logical > 1 && cpu_cores == 1)
180 return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
182 * Only multi-core no HTT.
184 if (cpu_cores > 1 && cpu_logical == 1)
185 return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0));
187 * Both HTT and multi-core.
189 return (smp_topo_2level(CG_SHARE_NONE, cpu_cores,
190 CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
194 * Calculate usable address in base memory for AP trampoline code.
197 mp_bootaddress(u_int basemem)
204 cpu_add(u_int apic_id, char boot_cpu)
207 if (apic_id > MAX_APIC_ID) {
208 panic("SMP: APIC ID %d too high", apic_id);
211 KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
213 cpu_info[apic_id].cpu_present = 1;
215 KASSERT(boot_cpu_id == -1,
216 ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
218 boot_cpu_id = apic_id;
219 cpu_info[apic_id].cpu_bsp = 1;
221 if (mp_ncpus < MAXCPU)
224 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
229 cpu_mp_setmaxid(void)
232 mp_maxid = MAXCPU - 1;
240 * Always record BSP in CPU map so that the mbuf init code works
246 * No CPUs were found, so this must be a UP system. Setup
247 * the variables to represent a system with a single CPU
254 /* At least one CPU was found. */
257 * One CPU was found, so this must be a UP system with
263 /* At least two CPUs were found. */
268 * Initialize the IPI handlers and start up the AP's.
275 /* Initialize the logical ID to APIC ID table. */
276 for (i = 0; i < MAXCPU; i++) {
277 cpu_apic_ids[i] = -1;
278 cpu_ipi_pending[i] = 0;
281 /* Set boot_cpu_id if needed. */
282 if (boot_cpu_id == -1) {
283 boot_cpu_id = PCPU_GET(apic_id);
284 cpu_info[boot_cpu_id].cpu_bsp = 1;
286 KASSERT(boot_cpu_id == PCPU_GET(apic_id),
287 ("BSP's APIC ID doesn't match boot_cpu_id"));
288 cpu_apic_ids[0] = boot_cpu_id;
289 apic_cpuids[boot_cpu_id] = 0;
293 /* Start each Application Processor */
296 /* Setup the initial logical CPUs info. */
297 logical_cpus = logical_cpus_mask = 0;
298 if (cpu_feature & CPUID_HTT)
299 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
301 set_interrupt_apic_ids();
306 iv_rendezvous(uintptr_t a, uintptr_t b)
308 smp_rendezvous_action();
312 iv_invltlb(uintptr_t a, uintptr_t b)
318 iv_invlpg(uintptr_t a, uintptr_t b)
324 iv_invlrng(uintptr_t a, uintptr_t b)
326 vm_offset_t start = (vm_offset_t)a;
327 vm_offset_t end = (vm_offset_t)b;
329 while (start < end) {
337 iv_invlcache(uintptr_t a, uintptr_t b)
341 atomic_add_int(&smp_tlb_wait, 1);
345 iv_lazypmap(uintptr_t a, uintptr_t b)
347 pmap_lazyfix_action();
348 atomic_add_int(&smp_tlb_wait, 1);
352 * These start from "IPI offset" APIC_IPI_INTS
354 static call_data_func_t *ipi_vectors[6] =
365 * Reschedule call back. Nothing to do,
366 * all the work is done automatically when
367 * we return from the interrupt.
370 smp_reschedule_interrupt(void *unused)
372 int cpu = PCPU_GET(cpuid);
375 ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
377 if (ipi_bitmap & (1 << IPI_PREEMPT)) {
379 (*ipi_preempt_counts[cpu])++;
381 sched_preempt(curthread);
384 if (ipi_bitmap & (1 << IPI_AST)) {
386 (*ipi_ast_counts[cpu])++;
388 /* Nothing to do for AST */
390 return (FILTER_HANDLED);
402 static struct _call_data *call_data;
405 smp_call_function_interrupt(void *unused)
407 call_data_func_t *func;
408 uintptr_t arg1 = call_data->arg1;
409 uintptr_t arg2 = call_data->arg2;
410 int wait = call_data->wait;
411 atomic_t *started = &call_data->started;
412 atomic_t *finished = &call_data->finished;
414 /* We only handle function IPIs, not bitmap IPIs */
415 if (call_data->func_id < APIC_IPI_INTS || call_data->func_id > IPI_BITMAP_VECTOR)
416 panic("invalid function id %u", call_data->func_id);
418 func = ipi_vectors[call_data->func_id - APIC_IPI_INTS];
420 * Notify initiating CPU that I've grabbed the data and am
421 * about to execute the function
426 * At this point the info structure may be out of scope unless wait==1
432 atomic_inc(finished);
434 atomic_add_int(&smp_tlb_wait, 1);
435 return (FILTER_HANDLED);
439 * Print various information about the SMP system hardware and setup.
442 cpu_mp_announce(void)
447 printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
448 for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
449 if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
451 if (cpu_info[x].cpu_disabled)
452 printf(" cpu (AP): APIC ID: %2d (disabled)\n", x);
454 KASSERT(i < mp_ncpus,
455 ("mp_ncpus and actual cpus are out of whack"));
456 printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
462 xen_smp_intr_init(unsigned int cpu)
467 per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
469 sprintf(resched_name[cpu], "resched%u", cpu);
470 rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
473 smp_reschedule_interrupt,
474 INTR_FAST|INTR_TYPE_TTY|INTR_MPSAFE, &irq);
476 printf("[XEN] IPI cpu=%d irq=%d vector=RESCHEDULE_VECTOR (%d)\n",
477 cpu, irq, RESCHEDULE_VECTOR);
479 per_cpu(resched_irq, cpu) = irq;
481 sprintf(callfunc_name[cpu], "callfunc%u", cpu);
482 rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR,
485 smp_call_function_interrupt,
486 INTR_FAST|INTR_TYPE_TTY|INTR_MPSAFE, &irq);
489 per_cpu(callfunc_irq, cpu) = irq;
491 printf("[XEN] IPI cpu=%d irq=%d vector=CALL_FUNCTION_VECTOR (%d)\n",
492 cpu, irq, CALL_FUNCTION_VECTOR);
495 if ((cpu != 0) && ((rc = ap_cpu_initclocks(cpu)) != 0))
501 if (per_cpu(resched_irq, cpu) >= 0)
502 unbind_from_irqhandler(per_cpu(resched_irq, cpu));
503 if (per_cpu(callfunc_irq, cpu) >= 0)
504 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu));
509 xen_smp_intr_init_cpus(void *unused)
513 for (i = 0; i < mp_ncpus; i++)
514 xen_smp_intr_init(i);
517 #define MTOPSIZE (1<<(14 + PAGE_SHIFT))
520 * AP CPU's call this to initialize themselves.
529 /* bootAP is set in start_ap() to our ID. */
530 PCPU_SET(currentldt, _default_ldt);
531 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
533 gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
535 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
536 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
537 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
539 PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd);
541 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
543 PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
546 * Set to a known state:
547 * Set by mpboot.s: CR0_PG, CR0_PE
548 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
551 * signal our startup to the BSP.
555 /* Spin until the BSP releases the AP's. */
559 /* BSP may have changed PTD while we were waiting */
561 for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
564 /* set up FPU state on the AP */
568 /* set up SSE registers */
571 #if 0 && defined(PAE)
572 /* Enable the PTE no-execute bit. */
573 if ((amd_feature & AMDID_NX) != 0) {
576 msr = rdmsr(MSR_EFER) | EFER_NXE;
577 wrmsr(MSR_EFER, msr);
581 /* A quick check from sanity claus */
582 if (PCPU_GET(apic_id) != lapic_id()) {
583 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
584 printf("SMP: actual apic_id = %d\n", lapic_id());
585 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
586 panic("cpuid mismatch! boom!!");
590 /* Initialize curthread. */
591 KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
592 PCPU_SET(curthread, PCPU_GET(idlethread));
594 mtx_lock_spin(&ap_boot_mtx);
597 /* Init local apic for irq's */
602 CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
603 printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
605 /* Determine if we are a logical CPU. */
606 if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
607 logical_cpus_mask |= PCPU_GET(cpumask);
609 /* Determine if we are a hyperthread. */
610 if (hyperthreading_cpus > 1 &&
611 PCPU_GET(apic_id) % hyperthreading_cpus != 0)
612 hyperthreading_cpus_mask |= PCPU_GET(cpumask);
614 /* Build our map of 'other' CPUs. */
615 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
620 if (smp_cpus == mp_ncpus) {
621 /* enable IPI's, tlb shootdown, freezes etc */
622 atomic_store_rel_int(&smp_started, 1);
623 smp_active = 1; /* historic */
626 mtx_unlock_spin(&ap_boot_mtx);
628 /* wait until all the AP's are up */
629 while (smp_started == 0)
633 PCPU_SET(curthread, PCPU_GET(idlethread));
634 /* enter the scheduler */
637 panic("scheduler returned us to %s", __func__);
641 /*******************************************************************
642 * local functions and data
646 * We tell the I/O APIC code about all the CPUs we want to receive
647 * interrupts. If we don't want certain CPUs to receive IRQs we
648 * can simply not tell the I/O APIC code about them in this function.
649 * We also do not tell it about the BSP since it tells itself about
650 * the BSP internally to work with UP kernels and on UP machines.
653 set_interrupt_apic_ids(void)
657 for (i = 0; i < MAXCPU; i++) {
658 apic_id = cpu_apic_ids[i];
661 if (cpu_info[apic_id].cpu_bsp)
663 if (cpu_info[apic_id].cpu_disabled)
666 /* Don't let hyperthreads service interrupts. */
667 if (hyperthreading_cpus > 1 &&
668 apic_id % hyperthreading_cpus != 0)
676 * Assign logical CPU IDs to local APICs.
683 /* Check for explicitly disabled CPUs. */
684 for (i = 0; i <= MAX_APIC_ID; i++) {
685 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
688 /* Don't use this CPU if it has been disabled by a tunable. */
689 if (resource_disabled("lapic", i)) {
690 cpu_info[i].cpu_disabled = 1;
696 * Assign CPU IDs to local APIC IDs and disable any CPUs
697 * beyond MAXCPU. CPU 0 has already been assigned to the BSP,
698 * so we only have to assign IDs for APs.
701 for (i = 0; i <= MAX_APIC_ID; i++) {
702 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
703 cpu_info[i].cpu_disabled)
706 if (mp_ncpus < MAXCPU) {
707 cpu_apic_ids[mp_ncpus] = i;
708 apic_cpuids[i] = mp_ncpus;
711 cpu_info[i].cpu_disabled = 1;
713 KASSERT(mp_maxid >= mp_ncpus - 1,
714 ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
719 * start each AP in our list
721 /* Lowest 1MB is already mapped: don't touch*/
722 #define TMPMAP_START 1
729 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
731 /* set up temporary P==V mapping for AP boot */
732 /* XXX this is a hack, we should boot the AP on its own stack/PTD */
735 for (cpu = 1; cpu < mp_ncpus; cpu++) {
736 apic_id = cpu_apic_ids[cpu];
740 bootAPgdt = gdt + (512*cpu);
742 /* Get per-cpu data */
743 pc = &__pcpu[bootAP];
744 pcpu_init(pc, bootAP, sizeof(struct pcpu));
745 dpcpu_init((void *)kmem_alloc(kernel_map, DPCPU_SIZE), bootAP);
746 pc->pc_apic_id = cpu_apic_ids[bootAP];
747 pc->pc_prvspace = pc;
748 pc->pc_curthread = 0;
750 gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
751 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
753 PT_SET_MA(bootAPgdt, xpmap_ptom(VTOP(bootAPgdt)) | PG_V | PG_RW);
754 bzero(bootAPgdt, PAGE_SIZE);
755 for (x = 0; x < NGDT; x++)
756 ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd);
757 PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V);
760 if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
761 apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
762 acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
765 x86_acpiid_to_apicid[acpiid] = apicid;
770 /* attempt to start the Application Processor */
771 if (!start_ap(cpu)) {
772 printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
773 /* better panic as the AP may be running loose */
774 printf("panic y/n? [y] ");
779 all_cpus |= (1 << cpu); /* record AP in CPU map */
783 /* build our map of 'other' CPUs */
784 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
786 pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
788 /* number of APs actually started */
792 extern uint8_t *pcpu_boot_stack;
793 extern trap_info_t trap_table[];
796 smp_trap_init(trap_info_t *trap_ctxt)
798 const trap_info_t *t = trap_table;
800 for (t = trap_table; t->address; t++) {
801 trap_ctxt[t->vector].flags = t->flags;
802 trap_ctxt[t->vector].cs = t->cs;
803 trap_ctxt[t->vector].address = t->address;
809 cpu_initialize_context(unsigned int cpu)
811 /* vcpu_guest_context_t is too large to allocate on the stack.
812 * Hence we allocate statically and protect it with a lock */
814 static vcpu_guest_context_t ctxt;
815 vm_offset_t boot_stack;
817 vm_paddr_t ma[NPGPTD];
823 * Page 1, [4] boot stack
827 for (i = 0; i < NPGPTD + 2; i++) {
828 m[i] = vm_page_alloc(NULL, color++,
829 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
832 pmap_zero_page(m[i]);
835 boot_stack = kmem_alloc_nofault(kernel_map, 1);
836 newPTD = kmem_alloc_nofault(kernel_map, NPGPTD);
837 ma[0] = xpmap_ptom(VM_PAGE_TO_PHYS(m[0]))|PG_V;
840 pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
841 for (i = 0; i < NPGPTD; i++) {
842 ((vm_paddr_t *)boot_stack)[i] =
844 xpmap_ptom(VM_PAGE_TO_PHYS(m[i]))|PG_V;
849 * Copy cpu0 IdlePTD to new IdlePTD - copying only
852 pmap_qenter(newPTD, m, 4);
854 memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t),
855 (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t),
856 nkpt*sizeof(vm_paddr_t));
858 pmap_qremove(newPTD, 4);
859 kmem_free(kernel_map, newPTD, 4);
861 * map actual idle stack to boot_stack
863 pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD]));
866 xen_pgdpt_pin(xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1])));
867 vm_page_lock_queues();
868 for (i = 0; i < 4; i++) {
869 int pdir = (PTDPTDI + i) / NPDEPG;
870 int curoffset = (PTDPTDI + i) % NPDEPG;
872 xen_queue_pt_update((vm_paddr_t)
873 ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))),
877 vm_page_unlock_queues();
879 memset(&ctxt, 0, sizeof(ctxt));
880 ctxt.flags = VGCF_IN_KERNEL;
881 ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
882 ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
883 ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL);
884 ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL);
885 ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL);
886 ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL);
887 ctxt.user_regs.eip = (unsigned long)init_secondary;
888 ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */
890 memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
892 smp_trap_init(ctxt.trap_ctxt);
895 ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT);
899 ctxt.user_regs.esp = boot_stack + PAGE_SIZE;
901 ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
902 ctxt.kernel_sp = boot_stack + PAGE_SIZE;
904 ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL);
905 ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback;
906 ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL);
907 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
909 ctxt.ctrlreg[3] = xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
910 #else /* __x86_64__ */
911 ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
912 ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
913 ctxt.kernel_sp = idle->thread.rsp0;
915 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
916 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
917 ctxt.syscall_callback_eip = (unsigned long)system_call;
919 ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
921 ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
924 printf("gdtpfn=%lx pdptpfn=%lx\n",
926 ctxt.ctrlreg[3] >> PAGE_SHIFT);
928 PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
930 PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL));
934 * This function starts the AP (application processor) identified
935 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
936 * to accomplish this. This is necessary because of the nuances
937 * of the different hardware we might encounter. It isn't pretty,
938 * but it seems to work.
943 start_ap(int apic_id)
947 /* used as a watchpoint to signal AP startup */
950 cpu_initialize_context(apic_id);
952 /* Wait up to 5 seconds for it to start. */
953 for (ms = 0; ms < 5000; ms++) {
955 return 1; /* return SUCCESS */
958 return 0; /* return FAILURE */
962 * Flush the TLB on all other CPU's
965 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
968 struct _call_data data;
970 ncpu = mp_ncpus - 1; /* does not shootdown self */
972 return; /* no other cpus */
973 if (!(read_eflags() & PSL_I))
974 panic("%s: interrupts disabled", __func__);
975 mtx_lock_spin(&smp_ipi_mtx);
976 KASSERT(call_data == NULL, ("call_data isn't null?!"));
978 call_data->func_id = vector;
979 call_data->arg1 = addr1;
980 call_data->arg2 = addr2;
981 atomic_store_rel_int(&smp_tlb_wait, 0);
982 ipi_all_but_self(vector);
983 while (smp_tlb_wait < ncpu)
986 mtx_unlock_spin(&smp_ipi_mtx);
990 smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
993 struct _call_data data;
995 othercpus = mp_ncpus - 1;
996 if (mask == (u_int)-1) {
1001 mask &= ~PCPU_GET(cpumask);
1004 ncpu = bitcount32(mask);
1005 if (ncpu > othercpus) {
1006 /* XXX this should be a panic offence */
1007 printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
1011 /* XXX should be a panic, implied by mask == 0 above */
1015 if (!(read_eflags() & PSL_I))
1016 panic("%s: interrupts disabled", __func__);
1017 mtx_lock_spin(&smp_ipi_mtx);
1018 KASSERT(call_data == NULL, ("call_data isn't null?!"));
1020 call_data->func_id = vector;
1021 call_data->arg1 = addr1;
1022 call_data->arg2 = addr2;
1023 atomic_store_rel_int(&smp_tlb_wait, 0);
1024 if (mask == (u_int)-1)
1025 ipi_all_but_self(vector);
1027 ipi_selected(mask, vector);
1028 while (smp_tlb_wait < ncpu)
1031 mtx_unlock_spin(&smp_ipi_mtx);
1035 smp_cache_flush(void)
1039 smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
1047 smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
1052 smp_invlpg(vm_offset_t addr)
1056 smp_tlb_shootdown(IPI_INVLPG, addr, 0);
1061 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
1065 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
1070 smp_masked_invltlb(cpumask_t mask)
1074 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
1079 smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
1083 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
1088 smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
1092 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
1097 * send an IPI to a set of cpus.
1100 ipi_selected(cpumask_t cpus, u_int ipi)
1107 if (IPI_IS_BITMAPED(ipi)) {
1109 ipi = IPI_BITMAP_VECTOR;
1112 CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
1113 while ((cpu = ffs(cpus)) != 0) {
1115 cpus &= ~(1 << cpu);
1117 KASSERT(cpu_apic_ids[cpu] != -1,
1118 ("IPI to non-existent CPU %d", cpu));
1122 old_pending = cpu_ipi_pending[cpu];
1123 new_pending = old_pending | bitmap;
1124 } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending));
1127 ipi_pcpu(cpu, RESCHEDULE_VECTOR);
1131 KASSERT(call_data != NULL, ("call_data not set"));
1132 ipi_pcpu(cpu, CALL_FUNCTION_VECTOR);
1138 * send an IPI to all CPUs EXCEPT myself
1141 ipi_all_but_self(u_int ipi)
1143 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
1144 ipi_selected(PCPU_GET(other_cpus), ipi);
1148 * Handle an IPI_STOP by saving our current context and spinning until we
1152 cpustop_handler(void)
1154 int cpu = PCPU_GET(cpuid);
1155 int cpumask = PCPU_GET(cpumask);
1157 savectx(&stoppcbs[cpu]);
1159 /* Indicate that we are stopped */
1160 atomic_set_int(&stopped_cpus, cpumask);
1162 /* Wait for restart */
1163 while (!(started_cpus & cpumask))
1166 atomic_clear_int(&started_cpus, cpumask);
1167 atomic_clear_int(&stopped_cpus, cpumask);
1169 if (cpu == 0 && cpustop_restartfunc != NULL) {
1170 cpustop_restartfunc();
1171 cpustop_restartfunc = NULL;
1176 * This is called once the rest of the system is up and running and we're
1177 * ready to let the AP's out of the pen.
1180 release_aps(void *dummy __unused)
1185 atomic_store_rel_int(&aps_ready, 1);
1186 while (smp_started == 0)
1189 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
1190 SYSINIT(start_ipis, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL);