2 * Copyright (C) 1995, 1996 Wolfgang Solfrank.
3 * Copyright (C) 1995, 1996 TooLs GmbH.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by TooLs GmbH.
17 * 4. The name of TooLs GmbH may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * Copyright (C) 2001 Benno Rice
33 * All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
44 * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
49 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
50 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
51 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
52 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
53 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $
57 #include <sys/cdefs.h>
58 __FBSDID("$FreeBSD$");
61 #include "opt_kstack_pages.h"
62 #include "opt_platform.h"
64 #include <sys/param.h>
66 #include <sys/systm.h>
72 #include <sys/eventhandler.h>
74 #include <sys/imgact.h>
76 #include <sys/kernel.h>
78 #include <sys/linker.h>
80 #include <sys/malloc.h>
82 #include <sys/msgbuf.h>
83 #include <sys/mutex.h>
84 #include <sys/ptrace.h>
85 #include <sys/reboot.h>
86 #include <sys/rwlock.h>
87 #include <sys/signalvar.h>
88 #include <sys/syscallsubr.h>
89 #include <sys/sysctl.h>
90 #include <sys/sysent.h>
91 #include <sys/sysproto.h>
92 #include <sys/ucontext.h>
94 #include <sys/vmmeter.h>
95 #include <sys/vnode.h>
97 #include <net/netisr.h>
100 #include <vm/vm_extern.h>
101 #include <vm/vm_kern.h>
102 #include <vm/vm_page.h>
103 #include <vm/vm_phys.h>
104 #include <vm/vm_map.h>
105 #include <vm/vm_object.h>
106 #include <vm/vm_pager.h>
108 #include <machine/altivec.h>
109 #ifndef __powerpc64__
110 #include <machine/bat.h>
112 #include <machine/cpu.h>
113 #include <machine/elf.h>
114 #include <machine/fpu.h>
115 #include <machine/hid.h>
116 #include <machine/ifunc.h>
117 #include <machine/kdb.h>
118 #include <machine/md_var.h>
119 #include <machine/metadata.h>
120 #include <machine/mmuvar.h>
121 #include <machine/pcb.h>
122 #include <machine/reg.h>
123 #include <machine/sigframe.h>
124 #include <machine/spr.h>
125 #include <machine/trap.h>
126 #include <machine/vmparam.h>
127 #include <machine/ofw_machdep.h>
131 #include <dev/ofw/openfirm.h>
132 #include <dev/ofw/ofw_subr.h>
136 int cacheline_size = 128;
138 int cacheline_size = 32;
140 int hw_direct_map = 1;
143 extern vm_paddr_t kernload;
146 extern void *ap_pcpu;
148 struct pcpu __pcpu[MAXCPU] __aligned(PAGE_SIZE);
149 static char init_kenv[2048];
151 static struct trapframe frame0;
153 char machine[] = "powerpc";
154 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
156 static void cpu_startup(void *);
157 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
159 SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size,
160 CTLFLAG_RD, &cacheline_size, 0, "");
162 uintptr_t powerpc_init(vm_offset_t, vm_offset_t, vm_offset_t, void *,
165 static void fake_preload_metadata(void);
170 /* Default MSR values set in the AIM/Book-E early startup code */
171 register_t psl_kernset;
172 register_t psl_userset;
173 register_t psl_userstatic;
175 register_t psl_userset32;
178 struct kva_md_info kmi;
181 cpu_startup(void *dummy)
185 * Initialise the decrementer-based clock.
190 * Good {morning,afternoon,evening,night}.
192 cpu_setup(PCPU_GET(cpuid));
197 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)physmem),
198 ptoa((uintmax_t)physmem) / 1048576);
202 printf("available KVA = %zu (%zu MB)\n",
203 virtual_end - virtual_avail,
204 (virtual_end - virtual_avail) / 1048576);
207 * Display any holes after the first chunk of extended memory.
212 printf("Physical memory chunk(s):\n");
213 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
215 phys_avail[indx + 1] - phys_avail[indx];
218 printf("0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
220 printf("0x%09jx - 0x%09jx, %ju bytes (%ju pages)\n",
222 (uintmax_t)phys_avail[indx],
223 (uintmax_t)phys_avail[indx + 1] - 1,
224 (uintmax_t)size1, (uintmax_t)size1 / PAGE_SIZE);
228 vm_ksubmap_init(&kmi);
230 printf("avail memory = %ju (%ju MB)\n",
231 ptoa((uintmax_t)vm_free_count()),
232 ptoa((uintmax_t)vm_free_count()) / 1048576);
235 * Set up buffers, so they can be used to read disk labels.
238 vm_pager_bufferinit();
241 extern vm_offset_t __startkernel, __endkernel;
242 extern unsigned char __bss_start[];
243 extern unsigned char __sbss_start[];
244 extern unsigned char __sbss_end[];
245 extern unsigned char _end[];
247 void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry,
248 void *mdp, uint32_t mdp_cookie);
249 void aim_cpu_init(vm_offset_t toc);
250 void booke_cpu_init(void);
253 static void load_external_symtab(void);
257 powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp,
262 vm_offset_t startkernel, endkernel;
265 bool ofw_bootargs = false;
266 bool symbols_provided = false;
268 vm_offset_t ksym_start;
269 vm_offset_t ksym_end;
273 /* First guess at start/end kernel positions */
274 startkernel = __startkernel;
275 endkernel = __endkernel;
278 * If the metadata pointer cookie is not set to the magic value,
279 * the number in mdp should be treated as nonsense.
281 if (mdp_cookie != 0xfb5d104d)
286 * On BOOKE the BSS is already cleared and some variables
287 * initialized. Do not wipe them out.
289 bzero(__sbss_start, __sbss_end - __sbss_start);
290 bzero(__bss_start, _end - __bss_start);
296 aim_early_init(fdt, toc, ofentry, mdp, mdp_cookie);
300 * At this point, we are executing in our correct memory space.
301 * Book-E started there, and AIM has done an rfi and restarted
302 * execution from _start.
304 * We may still be in real mode, however. If we are running out of
305 * the direct map on 64 bit, this is possible to do.
309 * Parse metadata if present and fetch parameters. Must be done
310 * before console is inited so cninit gets the right value of
315 * Starting up from loader.
317 * Full metadata has been provided, but we need to figure
318 * out the correct address to relocate it to.
321 uintptr_t md_offset = 0;
322 vm_paddr_t kernelstartphys, kernelendphys;
325 if ((uintptr_t)&powerpc_init > DMAP_BASE_ADDRESS)
326 md_offset = DMAP_BASE_ADDRESS;
328 md_offset = VM_MIN_KERNEL_ADDRESS - kernload;
331 preload_metadata = mdp;
333 /* Translate phys offset into DMAP offset. */
334 preload_metadata += md_offset;
335 preload_bootstrap_relocate(md_offset);
337 kmdp = preload_search_by_type("elf kernel");
339 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
340 envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
343 init_static_kenv(envp, 0);
345 fdt = MD_FETCH(kmdp, MODINFOMD_DTBP, uintptr_t);
349 kernelstartphys = MD_FETCH(kmdp, MODINFO_ADDR,
351 /* kernelstartphys is already relocated. */
352 kernelendphys = MD_FETCH(kmdp, MODINFOMD_KERNEND,
354 if (kernelendphys != 0)
355 kernelendphys += md_offset;
356 endkernel = ulmax(endkernel, kernelendphys);
358 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
359 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
360 ksym_sz = *(Elf_Size*)ksym_start;
362 db_fetch_ksymtab(ksym_start, ksym_end, md_offset);
363 /* Symbols provided by loader. */
364 symbols_provided = true;
369 * Self-loading kernel, we have to fake up metadata.
371 * Since we are creating the metadata from the final
372 * memory space, we don't need to call
373 * preload_boostrap_relocate().
375 fake_preload_metadata();
376 kmdp = preload_search_by_type("elf kernel");
377 init_static_kenv(init_kenv, sizeof(init_kenv));
381 /* Store boot environment state */
382 OF_initial_setup((void *)fdt, NULL, (int (*)(void *))ofentry);
385 * Init params/tunables that can be overridden by the loader
390 * Start initializing proc0 and thread0.
392 proc_linkup0(&proc0, &thread0);
393 thread0.td_frame = &frame0;
395 __asm __volatile("mr 13,%0" :: "r"(&thread0));
397 __asm __volatile("mr 2,%0" :: "r"(&thread0));
401 * Init mutexes, which we use heavily in PMAP
406 * Install the OF client interface
411 if (!symbols_provided && hw_direct_map)
412 load_external_symtab();
416 ofw_parse_bootargs();
419 * Initialize the console before printing anything.
428 /* Make sure the kernel icache is valid before we go too much further */
429 __syncicache((caddr_t)startkernel, endkernel - startkernel);
433 * Choose a platform module so we can get the physical memory map.
436 platform_probe_and_attach();
439 * Set up per-cpu data for the BSP now that the platform can tell
442 if (platform_smp_get_bsp(&bsp) != 0)
444 pc = &__pcpu[bsp.cr_cpuid];
445 __asm __volatile("mtsprg 0, %0" :: "r"(pc));
446 pcpu_init(pc, bsp.cr_cpuid, sizeof(struct pcpu));
447 pc->pc_curthread = &thread0;
448 thread0.td_oncpu = bsp.cr_cpuid;
449 pc->pc_cpuid = bsp.cr_cpuid;
450 pc->pc_hwref = bsp.cr_hwref;
461 link_elf_ireloc(kmdp);
462 pmap_bootstrap(startkernel, endkernel);
463 mtmsr(psl_kernset & ~PSL_EE);
466 * Initialize params/tunables that are derived from memsize
468 init_param2(physmem);
471 * Grab booted kernel's name
473 env = kern_getenv("kernelname");
475 strlcpy(kernelname, env, sizeof(kernelname));
480 * Finish setting up thread0.
482 thread0.td_pcb = (struct pcb *)
483 ((thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE -
484 sizeof(struct pcb)) & ~15UL);
485 bzero((void *)thread0.td_pcb, sizeof(struct pcb));
486 pc->pc_curpcb = thread0.td_pcb;
488 /* Initialise the message buffer. */
489 msgbufinit(msgbufp, msgbufsize);
492 if (boothowto & RB_KDB)
493 kdb_enter(KDB_WHY_BOOTFLAGS,
494 "Boot flags requested debugger");
497 return (((uintptr_t)thread0.td_pcb -
498 (sizeof(struct callframe) - 3*sizeof(register_t))) & ~15UL);
503 * On powernv and some booke systems, we might not have symbols loaded via
504 * loader. However, if the user passed the kernel in as the initrd as well,
505 * we can manually load it via reinterpreting the initrd copy of the kernel.
507 * In the BOOKE case, we don't actually have a DMAP yet, so we have to use
508 * temporary maps to inspect the memory, but write DMAP addresses to the
509 * configuration variables.
512 load_external_symtab(void) {
514 vm_paddr_t start, end;
517 u_char *kernelimg; /* Temporary map */
518 u_char *kernelimg_final; /* Final location */
526 vm_offset_t ksym_start, ksym_sz, kstr_start, kstr_sz,
527 ksym_start_final, kstr_start_final;
532 chosen = OF_finddevice("/chosen");
536 if (!OF_hasprop(chosen, "linux,initrd-start") ||
537 !OF_hasprop(chosen, "linux,initrd-end"))
540 size = OF_getencprop(chosen, "linux,initrd-start", cell, sizeof(cell));
544 start = (uint64_t)cell[0] << 32 | cell[1];
548 size = OF_getencprop(chosen, "linux,initrd-end", cell, sizeof(cell));
552 end = (uint64_t)cell[0] << 32 | cell[1];
556 if (!(end - start > 0))
559 kernelimg_final = (u_char *) PHYS_TO_DMAP(start);
561 kernelimg = kernelimg_final;
563 kernelimg = (u_char *)pmap_early_io_map(start, PAGE_SIZE);
565 ehdr = (Elf_Ehdr *)kernelimg;
567 if (!IS_ELF(*ehdr)) {
569 pmap_early_io_unmap(start, PAGE_SIZE);
575 pmap_early_io_unmap(start, PAGE_SIZE);
576 kernelimg = (u_char *)pmap_early_io_map(start, (end - start));
579 phdr = (Elf_Phdr *)(kernelimg + ehdr->e_phoff);
580 shdr = (Elf_Shdr *)(kernelimg + ehdr->e_shoff);
584 ksym_start_final = 0;
587 kstr_start_final = 0;
588 for (i = 0; i < ehdr->e_shnum; i++) {
589 if (shdr[i].sh_type == SHT_SYMTAB) {
590 ksym_start = (vm_offset_t)(kernelimg +
592 ksym_start_final = (vm_offset_t)
593 (kernelimg_final + shdr[i].sh_offset);
594 ksym_sz = (vm_offset_t)(shdr[i].sh_size);
595 kstr_start = (vm_offset_t)(kernelimg +
596 shdr[shdr[i].sh_link].sh_offset);
597 kstr_start_final = (vm_offset_t)
599 shdr[shdr[i].sh_link].sh_offset);
601 kstr_sz = (vm_offset_t)
602 (shdr[shdr[i].sh_link].sh_size);
606 if (ksym_start != 0 && kstr_start != 0 && ksym_sz != 0 &&
607 kstr_sz != 0 && ksym_start < kstr_start) {
609 * We can't use db_fetch_ksymtab() here, because we need to
610 * feed in DMAP addresses that are not mapped yet on booke.
612 * Write the variables directly, where db_init() will pick
613 * them up later, after the DMAP is up.
615 ksymtab = ksym_start_final;
616 ksymtab_size = ksym_sz;
617 kstrtab = kstr_start_final;
618 ksymtab_relbase = (__startkernel - KERNBASE);
622 pmap_early_io_unmap(start, (end - start));
629 * When not being loaded from loader, we need to create our own metadata
630 * so we can interact with the kernel linker.
633 fake_preload_metadata(void) {
634 /* We depend on dword alignment here. */
635 static uint32_t fake_preload[36] __aligned(8);
638 fake_preload[i++] = MODINFO_NAME;
639 fake_preload[i++] = strlen("kernel") + 1;
640 strcpy((char*)&fake_preload[i], "kernel");
641 /* ['k' 'e' 'r' 'n'] ['e' 'l' '\0' ..] */
644 fake_preload[i++] = MODINFO_TYPE;
645 fake_preload[i++] = strlen("elf kernel") + 1;
646 strcpy((char*)&fake_preload[i], "elf kernel");
647 /* ['e' 'l' 'f' ' '] ['k' 'e' 'r' 'n'] ['e' 'l' '\0' ..] */
651 /* Padding -- Fields start on u_long boundaries */
652 fake_preload[i++] = 0;
655 fake_preload[i++] = MODINFO_ADDR;
656 fake_preload[i++] = sizeof(vm_offset_t);
657 *(vm_offset_t *)&fake_preload[i] =
658 (vm_offset_t)(__startkernel);
659 i += (sizeof(vm_offset_t) / 4);
661 fake_preload[i++] = MODINFO_SIZE;
662 fake_preload[i++] = sizeof(vm_offset_t);
663 *(vm_offset_t *)&fake_preload[i] =
664 (vm_offset_t)(__endkernel) - (vm_offset_t)(__startkernel);
665 i += (sizeof(vm_offset_t) / 4);
668 * MODINFOMD_SSYM and MODINFOMD_ESYM cannot be provided here,
669 * as the memory comes from outside the loaded ELF sections.
671 * If the symbols are being provided by other means (MFS), the
672 * tables will be loaded into the debugger directly.
675 /* Null field at end to mark end of data. */
676 fake_preload[i++] = 0;
678 preload_metadata = (void*)fake_preload;
682 * Flush the D-cache for non-DMA I/O so that the I-cache can
683 * be made coherent later.
686 cpu_flush_dcache(void *ptr, size_t len)
688 register_t addr, off;
691 * Align the address to a cacheline and adjust the length
692 * accordingly. Then round the length to a multiple of the
693 * cacheline for easy looping.
695 addr = (uintptr_t)ptr;
696 off = addr & (cacheline_size - 1);
698 len = roundup2(len + off, cacheline_size);
701 __asm __volatile ("dcbf 0,%0" :: "r"(addr));
702 __asm __volatile ("sync");
703 addr += cacheline_size;
704 len -= cacheline_size;
709 ptrace_set_pc(struct thread *td, unsigned long addr)
711 struct trapframe *tf;
714 tf->srr0 = (register_t)addr;
726 if (td->td_md.md_spinlock_count == 0) {
728 msr = intr_disable();
729 td->td_md.md_spinlock_count = 1;
730 td->td_md.md_saved_msr = msr;
733 td->td_md.md_spinlock_count++;
743 msr = td->td_md.md_saved_msr;
744 td->td_md.md_spinlock_count--;
745 if (td->td_md.md_spinlock_count == 0) {
753 * Simple ddb(4) command/hack to view any SPR on the running CPU.
754 * Uses a trivial asm function to perform the mfspr, and rewrites the mfspr
755 * instruction each time.
756 * XXX: Since it uses code modification, it won't work if the kernel code pages
759 extern register_t get_spr(int);
762 DB_SHOW_COMMAND(spr, db_show_spr)
765 volatile uint32_t *p;
766 int sprno, saved_sprno;
771 saved_sprno = sprno = (intptr_t) addr;
772 sprno = ((sprno & 0x3e0) >> 5) | ((sprno & 0x1f) << 5);
773 p = (uint32_t *)(void *)&get_spr;
775 #if defined(_CALL_ELF) && _CALL_ELF == 2
776 /* Account for ELFv2 function prologue. */
779 p = *(volatile uint32_t * volatile *)p;
782 *p = (*p & ~0x001ff800) | (sprno << 11);
783 __syncicache(__DEVOLATILE(uint32_t *, p), cacheline_size);
784 spr = get_spr(sprno);
786 db_printf("SPR %d(%x): %lx\n", saved_sprno, saved_sprno,
790 DB_SHOW_COMMAND(frame, db_show_frame)
792 struct trapframe *tf;
796 tf = have_addr ? (struct trapframe *)addr : curthread->td_frame;
799 * Everything casts through long to simplify the printing.
800 * 'long' is native register size anyway.
802 db_printf("trap frame %p\n", tf);
803 for (i = 0; i < nitems(tf->fixreg); i++) {
805 db_printf(" r%d:\t%#lx (%ld)\n", i, reg, reg);
808 db_printf(" lr:\t%#lx\n", reg);
810 db_printf(" cr:\t%#lx\n", reg);
812 db_printf(" xer:\t%#lx\n", reg);
814 db_printf(" ctr:\t%#lx (%ld)\n", reg, reg);
816 db_printf(" srr0:\t%#lx\n", reg);
818 db_printf(" srr1:\t%#lx\n", reg);
820 db_printf(" exc:\t%#lx\n", reg);
822 db_printf(" dar:\t%#lx\n", reg);
824 reg = tf->cpu.aim.dsisr;
825 db_printf(" dsisr:\t%#lx\n", reg);
827 reg = tf->cpu.booke.esr;
828 db_printf(" esr:\t%#lx\n", reg);
829 reg = tf->cpu.booke.dbcr0;
830 db_printf(" dbcr0:\t%#lx\n", reg);
837 bzero(void *buf, size_t len)
843 while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) {
848 while (len >= sizeof(u_long) * 8) {
850 *((u_long*) p + 1) = 0;
851 *((u_long*) p + 2) = 0;
852 *((u_long*) p + 3) = 0;
853 len -= sizeof(u_long) * 8;
854 *((u_long*) p + 4) = 0;
855 *((u_long*) p + 5) = 0;
856 *((u_long*) p + 6) = 0;
857 *((u_long*) p + 7) = 0;
858 p += sizeof(u_long) * 8;
861 while (len >= sizeof(u_long)) {
863 len -= sizeof(u_long);
873 /* __stack_chk_fail_local() is called in secure-plt (32-bit). */
874 #if !defined(__powerpc64__)
875 extern void __stack_chk_fail(void);
876 void __stack_chk_fail_local(void);
879 __stack_chk_fail_local(void)