2 * Copyright (C) 1995, 1996 Wolfgang Solfrank.
3 * Copyright (C) 1995, 1996 TooLs GmbH.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by TooLs GmbH.
17 * 4. The name of TooLs GmbH may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * Copyright (C) 2001 Benno Rice
33 * All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
44 * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
49 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
50 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
51 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
52 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
53 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $
57 #include <sys/cdefs.h>
58 __FBSDID("$FreeBSD$");
61 #include "opt_kstack_pages.h"
62 #include "opt_platform.h"
64 #include <sys/param.h>
66 #include <sys/systm.h>
72 #include <sys/eventhandler.h>
74 #include <sys/imgact.h>
76 #include <sys/kernel.h>
78 #include <sys/linker.h>
80 #include <sys/malloc.h>
82 #include <sys/msgbuf.h>
83 #include <sys/mutex.h>
84 #include <sys/ptrace.h>
85 #include <sys/reboot.h>
86 #include <sys/rwlock.h>
87 #include <sys/signalvar.h>
88 #include <sys/syscallsubr.h>
89 #include <sys/sysctl.h>
90 #include <sys/sysent.h>
91 #include <sys/sysproto.h>
92 #include <sys/ucontext.h>
94 #include <sys/vmmeter.h>
95 #include <sys/vnode.h>
97 #include <net/netisr.h>
100 #include <vm/vm_extern.h>
101 #include <vm/vm_kern.h>
102 #include <vm/vm_page.h>
103 #include <vm/vm_phys.h>
104 #include <vm/vm_map.h>
105 #include <vm/vm_object.h>
106 #include <vm/vm_pager.h>
108 #include <machine/altivec.h>
109 #ifndef __powerpc64__
110 #include <machine/bat.h>
112 #include <machine/cpu.h>
113 #include <machine/elf.h>
114 #include <machine/fpu.h>
115 #include <machine/hid.h>
116 #include <machine/ifunc.h>
117 #include <machine/kdb.h>
118 #include <machine/md_var.h>
119 #include <machine/metadata.h>
120 #include <machine/mmuvar.h>
121 #include <machine/pcb.h>
122 #include <machine/reg.h>
123 #include <machine/sigframe.h>
124 #include <machine/spr.h>
125 #include <machine/trap.h>
126 #include <machine/vmparam.h>
127 #include <machine/ofw_machdep.h>
131 #include <dev/ofw/openfirm.h>
132 #include <dev/ofw/ofw_subr.h>
136 int cacheline_size = 128;
138 int cacheline_size = 32;
141 int hw_direct_map = -1;
143 int hw_direct_map = 1;
147 extern vm_paddr_t kernload;
150 extern void *ap_pcpu;
152 struct pcpu __pcpu[MAXCPU] __aligned(PAGE_SIZE);
153 static char init_kenv[2048];
155 static struct trapframe frame0;
157 char machine[] = "powerpc";
158 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
160 static void cpu_startup(void *);
161 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
163 SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size,
164 CTLFLAG_RD, &cacheline_size, 0, "");
166 uintptr_t powerpc_init(vm_offset_t, vm_offset_t, vm_offset_t, void *,
169 static void fake_preload_metadata(void);
174 /* Default MSR values set in the AIM/Book-E early startup code */
175 register_t psl_kernset;
176 register_t psl_userset;
177 register_t psl_userstatic;
179 register_t psl_userset32;
182 struct kva_md_info kmi;
185 cpu_startup(void *dummy)
189 * Initialise the decrementer-based clock.
194 * Good {morning,afternoon,evening,night}.
196 cpu_setup(PCPU_GET(cpuid));
201 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)physmem),
202 ptoa((uintmax_t)physmem) / 1048576);
206 printf("available KVA = %zu (%zu MB)\n",
207 virtual_end - virtual_avail,
208 (virtual_end - virtual_avail) / 1048576);
211 * Display any holes after the first chunk of extended memory.
216 printf("Physical memory chunk(s):\n");
217 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
219 phys_avail[indx + 1] - phys_avail[indx];
222 printf("0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
224 printf("0x%09jx - 0x%09jx, %ju bytes (%ju pages)\n",
226 (uintmax_t)phys_avail[indx],
227 (uintmax_t)phys_avail[indx + 1] - 1,
228 (uintmax_t)size1, (uintmax_t)size1 / PAGE_SIZE);
232 vm_ksubmap_init(&kmi);
234 printf("avail memory = %ju (%ju MB)\n",
235 ptoa((uintmax_t)vm_free_count()),
236 ptoa((uintmax_t)vm_free_count()) / 1048576);
239 * Set up buffers, so they can be used to read disk labels.
242 vm_pager_bufferinit();
245 extern vm_offset_t __startkernel, __endkernel;
246 extern unsigned char __bss_start[];
247 extern unsigned char __sbss_start[];
248 extern unsigned char __sbss_end[];
249 extern unsigned char _end[];
251 void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry,
252 void *mdp, uint32_t mdp_cookie);
253 void aim_cpu_init(vm_offset_t toc);
254 void booke_cpu_init(void);
257 static void load_external_symtab(void);
261 powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp,
266 vm_offset_t startkernel, endkernel;
269 bool ofw_bootargs = false;
271 bool symbols_provided = false;
272 vm_offset_t ksym_start;
273 vm_offset_t ksym_end;
277 /* First guess at start/end kernel positions */
278 startkernel = __startkernel;
279 endkernel = __endkernel;
282 * If the metadata pointer cookie is not set to the magic value,
283 * the number in mdp should be treated as nonsense.
285 if (mdp_cookie != 0xfb5d104d)
290 * On BOOKE the BSS is already cleared and some variables
291 * initialized. Do not wipe them out.
293 bzero(__sbss_start, __sbss_end - __sbss_start);
294 bzero(__bss_start, _end - __bss_start);
300 aim_early_init(fdt, toc, ofentry, mdp, mdp_cookie);
304 * At this point, we are executing in our correct memory space.
305 * Book-E started there, and AIM has done an rfi and restarted
306 * execution from _start.
308 * We may still be in real mode, however. If we are running out of
309 * the direct map on 64 bit, this is possible to do.
313 * Parse metadata if present and fetch parameters. Must be done
314 * before console is inited so cninit gets the right value of
319 * Starting up from loader.
321 * Full metadata has been provided, but we need to figure
322 * out the correct address to relocate it to.
325 uintptr_t md_offset = 0;
326 vm_paddr_t kernelstartphys, kernelendphys;
329 if ((uintptr_t)&powerpc_init > DMAP_BASE_ADDRESS)
330 md_offset = DMAP_BASE_ADDRESS;
332 md_offset = VM_MIN_KERNEL_ADDRESS - kernload;
335 preload_metadata = mdp;
337 /* Translate phys offset into DMAP offset. */
338 preload_metadata += md_offset;
339 preload_bootstrap_relocate(md_offset);
341 kmdp = preload_search_by_type("elf kernel");
343 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
344 envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
347 init_static_kenv(envp, 0);
349 fdt = MD_FETCH(kmdp, MODINFOMD_DTBP, uintptr_t);
353 kernelstartphys = MD_FETCH(kmdp, MODINFO_ADDR,
355 /* kernelstartphys is already relocated. */
356 kernelendphys = MD_FETCH(kmdp, MODINFOMD_KERNEND,
358 if (kernelendphys != 0)
359 kernelendphys += md_offset;
360 endkernel = ulmax(endkernel, kernelendphys);
362 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
363 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
364 ksym_sz = *(Elf_Size*)ksym_start;
366 db_fetch_ksymtab(ksym_start, ksym_end, md_offset);
367 /* Symbols provided by loader. */
368 symbols_provided = true;
373 * Self-loading kernel, we have to fake up metadata.
375 * Since we are creating the metadata from the final
376 * memory space, we don't need to call
377 * preload_boostrap_relocate().
379 fake_preload_metadata();
380 kmdp = preload_search_by_type("elf kernel");
381 init_static_kenv(init_kenv, sizeof(init_kenv));
385 /* Store boot environment state */
386 OF_initial_setup((void *)fdt, NULL, (int (*)(void *))ofentry);
389 * Init params/tunables that can be overridden by the loader
394 * Start initializing proc0 and thread0.
396 proc_linkup0(&proc0, &thread0);
397 thread0.td_frame = &frame0;
399 __asm __volatile("mr 13,%0" :: "r"(&thread0));
401 __asm __volatile("mr 2,%0" :: "r"(&thread0));
405 * Init mutexes, which we use heavily in PMAP
410 * Install the OF client interface
415 if (!symbols_provided && hw_direct_map)
416 load_external_symtab();
420 ofw_parse_bootargs();
423 * Initialize the console before printing anything.
432 /* Make sure the kernel icache is valid before we go too much further */
433 __syncicache((caddr_t)startkernel, endkernel - startkernel);
437 * Choose a platform module so we can get the physical memory map.
440 platform_probe_and_attach();
443 * Set up per-cpu data for the BSP now that the platform can tell
446 if (platform_smp_get_bsp(&bsp) != 0)
448 pc = &__pcpu[bsp.cr_cpuid];
449 __asm __volatile("mtsprg 0, %0" :: "r"(pc));
450 pcpu_init(pc, bsp.cr_cpuid, sizeof(struct pcpu));
451 pc->pc_curthread = &thread0;
452 thread0.td_oncpu = bsp.cr_cpuid;
453 pc->pc_cpuid = bsp.cr_cpuid;
454 pc->pc_hwref = bsp.cr_hwref;
465 link_elf_ireloc(kmdp);
466 pmap_bootstrap(startkernel, endkernel);
467 mtmsr(psl_kernset & ~PSL_EE);
470 * Initialize params/tunables that are derived from memsize
472 init_param2(physmem);
475 * Grab booted kernel's name
477 env = kern_getenv("kernelname");
479 strlcpy(kernelname, env, sizeof(kernelname));
484 * Finish setting up thread0.
486 thread0.td_pcb = (struct pcb *)
487 ((thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE -
488 sizeof(struct pcb)) & ~15UL);
489 bzero((void *)thread0.td_pcb, sizeof(struct pcb));
490 pc->pc_curpcb = thread0.td_pcb;
492 /* Initialise the message buffer. */
493 msgbufinit(msgbufp, msgbufsize);
496 if (boothowto & RB_KDB)
497 kdb_enter(KDB_WHY_BOOTFLAGS,
498 "Boot flags requested debugger");
501 return (((uintptr_t)thread0.td_pcb -
502 (sizeof(struct callframe) - 3*sizeof(register_t))) & ~15UL);
507 * On powernv and some booke systems, we might not have symbols loaded via
508 * loader. However, if the user passed the kernel in as the initrd as well,
509 * we can manually load it via reinterpreting the initrd copy of the kernel.
511 * In the BOOKE case, we don't actually have a DMAP yet, so we have to use
512 * temporary maps to inspect the memory, but write DMAP addresses to the
513 * configuration variables.
516 load_external_symtab(void) {
518 vm_paddr_t start, end;
521 u_char *kernelimg; /* Temporary map */
522 u_char *kernelimg_final; /* Final location */
530 vm_offset_t ksym_start, ksym_sz, kstr_start, kstr_sz,
531 ksym_start_final, kstr_start_final;
536 chosen = OF_finddevice("/chosen");
540 if (!OF_hasprop(chosen, "linux,initrd-start") ||
541 !OF_hasprop(chosen, "linux,initrd-end"))
544 size = OF_getencprop(chosen, "linux,initrd-start", cell, sizeof(cell));
548 start = (uint64_t)cell[0] << 32 | cell[1];
552 size = OF_getencprop(chosen, "linux,initrd-end", cell, sizeof(cell));
556 end = (uint64_t)cell[0] << 32 | cell[1];
560 if (!(end - start > 0))
563 kernelimg_final = (u_char *) PHYS_TO_DMAP(start);
565 kernelimg = kernelimg_final;
567 kernelimg = (u_char *)pmap_early_io_map(start, PAGE_SIZE);
569 ehdr = (Elf_Ehdr *)kernelimg;
571 if (!IS_ELF(*ehdr)) {
573 pmap_early_io_unmap(start, PAGE_SIZE);
579 pmap_early_io_unmap(start, PAGE_SIZE);
580 kernelimg = (u_char *)pmap_early_io_map(start, (end - start));
583 phdr = (Elf_Phdr *)(kernelimg + ehdr->e_phoff);
584 shdr = (Elf_Shdr *)(kernelimg + ehdr->e_shoff);
588 ksym_start_final = 0;
591 kstr_start_final = 0;
592 for (i = 0; i < ehdr->e_shnum; i++) {
593 if (shdr[i].sh_type == SHT_SYMTAB) {
594 ksym_start = (vm_offset_t)(kernelimg +
596 ksym_start_final = (vm_offset_t)
597 (kernelimg_final + shdr[i].sh_offset);
598 ksym_sz = (vm_offset_t)(shdr[i].sh_size);
599 kstr_start = (vm_offset_t)(kernelimg +
600 shdr[shdr[i].sh_link].sh_offset);
601 kstr_start_final = (vm_offset_t)
603 shdr[shdr[i].sh_link].sh_offset);
605 kstr_sz = (vm_offset_t)
606 (shdr[shdr[i].sh_link].sh_size);
610 if (ksym_start != 0 && kstr_start != 0 && ksym_sz != 0 &&
611 kstr_sz != 0 && ksym_start < kstr_start) {
613 * We can't use db_fetch_ksymtab() here, because we need to
614 * feed in DMAP addresses that are not mapped yet on booke.
616 * Write the variables directly, where db_init() will pick
617 * them up later, after the DMAP is up.
619 ksymtab = ksym_start_final;
620 ksymtab_size = ksym_sz;
621 kstrtab = kstr_start_final;
622 ksymtab_relbase = (__startkernel - KERNBASE);
626 pmap_early_io_unmap(start, (end - start));
633 * When not being loaded from loader, we need to create our own metadata
634 * so we can interact with the kernel linker.
637 fake_preload_metadata(void) {
638 /* We depend on dword alignment here. */
639 static uint32_t fake_preload[36] __aligned(8);
642 fake_preload[i++] = MODINFO_NAME;
643 fake_preload[i++] = strlen("kernel") + 1;
644 strcpy((char*)&fake_preload[i], "kernel");
645 /* ['k' 'e' 'r' 'n'] ['e' 'l' '\0' ..] */
648 fake_preload[i++] = MODINFO_TYPE;
649 fake_preload[i++] = strlen("elf kernel") + 1;
650 strcpy((char*)&fake_preload[i], "elf kernel");
651 /* ['e' 'l' 'f' ' '] ['k' 'e' 'r' 'n'] ['e' 'l' '\0' ..] */
655 /* Padding -- Fields start on u_long boundaries */
656 fake_preload[i++] = 0;
659 fake_preload[i++] = MODINFO_ADDR;
660 fake_preload[i++] = sizeof(vm_offset_t);
661 *(vm_offset_t *)&fake_preload[i] =
662 (vm_offset_t)(__startkernel);
663 i += (sizeof(vm_offset_t) / 4);
665 fake_preload[i++] = MODINFO_SIZE;
666 fake_preload[i++] = sizeof(vm_offset_t);
667 *(vm_offset_t *)&fake_preload[i] =
668 (vm_offset_t)(__endkernel) - (vm_offset_t)(__startkernel);
669 i += (sizeof(vm_offset_t) / 4);
672 * MODINFOMD_SSYM and MODINFOMD_ESYM cannot be provided here,
673 * as the memory comes from outside the loaded ELF sections.
675 * If the symbols are being provided by other means (MFS), the
676 * tables will be loaded into the debugger directly.
679 /* Null field at end to mark end of data. */
680 fake_preload[i++] = 0;
682 preload_metadata = (void*)fake_preload;
686 * Flush the D-cache for non-DMA I/O so that the I-cache can
687 * be made coherent later.
690 cpu_flush_dcache(void *ptr, size_t len)
692 register_t addr, off;
695 * Align the address to a cacheline and adjust the length
696 * accordingly. Then round the length to a multiple of the
697 * cacheline for easy looping.
699 addr = (uintptr_t)ptr;
700 off = addr & (cacheline_size - 1);
702 len = roundup2(len + off, cacheline_size);
705 __asm __volatile ("dcbf 0,%0" :: "r"(addr));
706 __asm __volatile ("sync");
707 addr += cacheline_size;
708 len -= cacheline_size;
713 ptrace_set_pc(struct thread *td, unsigned long addr)
715 struct trapframe *tf;
718 tf->srr0 = (register_t)addr;
730 if (td->td_md.md_spinlock_count == 0) {
732 msr = intr_disable();
733 td->td_md.md_spinlock_count = 1;
734 td->td_md.md_saved_msr = msr;
737 td->td_md.md_spinlock_count++;
747 msr = td->td_md.md_saved_msr;
748 td->td_md.md_spinlock_count--;
749 if (td->td_md.md_spinlock_count == 0) {
757 * Simple ddb(4) command/hack to view any SPR on the running CPU.
758 * Uses a trivial asm function to perform the mfspr, and rewrites the mfspr
759 * instruction each time.
760 * XXX: Since it uses code modification, it won't work if the kernel code pages
763 extern register_t get_spr(int);
766 DB_SHOW_COMMAND(spr, db_show_spr)
769 volatile uint32_t *p;
770 int sprno, saved_sprno;
775 saved_sprno = sprno = (intptr_t) addr;
776 sprno = ((sprno & 0x3e0) >> 5) | ((sprno & 0x1f) << 5);
777 p = (uint32_t *)(void *)&get_spr;
779 #if defined(_CALL_ELF) && _CALL_ELF == 2
780 /* Account for ELFv2 function prologue. */
783 p = *(volatile uint32_t * volatile *)p;
786 *p = (*p & ~0x001ff800) | (sprno << 11);
787 __syncicache(__DEVOLATILE(uint32_t *, p), cacheline_size);
788 spr = get_spr(sprno);
790 db_printf("SPR %d(%x): %lx\n", saved_sprno, saved_sprno,
794 DB_SHOW_COMMAND(frame, db_show_frame)
796 struct trapframe *tf;
800 tf = have_addr ? (struct trapframe *)addr : curthread->td_frame;
803 * Everything casts through long to simplify the printing.
804 * 'long' is native register size anyway.
806 db_printf("trap frame %p\n", tf);
807 for (i = 0; i < nitems(tf->fixreg); i++) {
809 db_printf(" r%d:\t%#lx (%ld)\n", i, reg, reg);
812 db_printf(" lr:\t%#lx\n", reg);
814 db_printf(" cr:\t%#lx\n", reg);
816 db_printf(" xer:\t%#lx\n", reg);
818 db_printf(" ctr:\t%#lx (%ld)\n", reg, reg);
820 db_printf(" srr0:\t%#lx\n", reg);
822 db_printf(" srr1:\t%#lx\n", reg);
824 db_printf(" exc:\t%#lx\n", reg);
826 db_printf(" dar:\t%#lx\n", reg);
828 reg = tf->cpu.aim.dsisr;
829 db_printf(" dsisr:\t%#lx\n", reg);
831 reg = tf->cpu.booke.esr;
832 db_printf(" esr:\t%#lx\n", reg);
833 reg = tf->cpu.booke.dbcr0;
834 db_printf(" dbcr0:\t%#lx\n", reg);
841 bzero(void *buf, size_t len)
847 while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) {
852 while (len >= sizeof(u_long) * 8) {
854 *((u_long*) p + 1) = 0;
855 *((u_long*) p + 2) = 0;
856 *((u_long*) p + 3) = 0;
857 len -= sizeof(u_long) * 8;
858 *((u_long*) p + 4) = 0;
859 *((u_long*) p + 5) = 0;
860 *((u_long*) p + 6) = 0;
861 *((u_long*) p + 7) = 0;
862 p += sizeof(u_long) * 8;
865 while (len >= sizeof(u_long)) {
867 len -= sizeof(u_long);
877 /* __stack_chk_fail_local() is called in secure-plt (32-bit). */
878 #if !defined(__powerpc64__)
879 extern void __stack_chk_fail(void);
880 void __stack_chk_fail_local(void);
883 __stack_chk_fail_local(void)