2 * Copyright (C) 1995, 1996 Wolfgang Solfrank.
3 * Copyright (C) 1995, 1996 TooLs GmbH.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by TooLs GmbH.
17 * 4. The name of TooLs GmbH may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * Copyright (C) 2001 Benno Rice
33 * All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
44 * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
49 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
50 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
51 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
52 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
53 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $
57 #include <sys/cdefs.h>
58 __FBSDID("$FreeBSD$");
61 #include "opt_kstack_pages.h"
62 #include "opt_platform.h"
64 #include <sys/param.h>
66 #include <sys/systm.h>
72 #include <sys/eventhandler.h>
74 #include <sys/imgact.h>
76 #include <sys/kernel.h>
78 #include <sys/linker.h>
80 #include <sys/malloc.h>
82 #include <sys/msgbuf.h>
83 #include <sys/mutex.h>
84 #include <sys/ptrace.h>
85 #include <sys/reboot.h>
87 #include <sys/rwlock.h>
88 #include <sys/signalvar.h>
89 #include <sys/syscallsubr.h>
90 #include <sys/sysctl.h>
91 #include <sys/sysent.h>
92 #include <sys/sysproto.h>
93 #include <sys/ucontext.h>
95 #include <sys/vmmeter.h>
96 #include <sys/vnode.h>
98 #include <net/netisr.h>
101 #include <vm/vm_extern.h>
102 #include <vm/vm_kern.h>
103 #include <vm/vm_page.h>
104 #include <vm/vm_phys.h>
105 #include <vm/vm_map.h>
106 #include <vm/vm_object.h>
107 #include <vm/vm_pager.h>
109 #include <machine/altivec.h>
110 #ifndef __powerpc64__
111 #include <machine/bat.h>
113 #include <machine/cpu.h>
114 #include <machine/elf.h>
115 #include <machine/fpu.h>
116 #include <machine/hid.h>
117 #include <machine/ifunc.h>
118 #include <machine/kdb.h>
119 #include <machine/md_var.h>
120 #include <machine/metadata.h>
121 #include <machine/mmuvar.h>
122 #include <machine/pcb.h>
123 #include <machine/sigframe.h>
124 #include <machine/spr.h>
125 #include <machine/trap.h>
126 #include <machine/vmparam.h>
127 #include <machine/ofw_machdep.h>
131 #include <dev/ofw/openfirm.h>
132 #include <dev/ofw/ofw_subr.h>
136 int cacheline_size = 128;
138 int cacheline_size = 32;
141 int hw_direct_map = -1;
143 int hw_direct_map = 1;
147 extern vm_paddr_t kernload;
150 extern void *ap_pcpu;
152 struct pcpu __pcpu[MAXCPU] __aligned(PAGE_SIZE);
153 static char init_kenv[2048];
155 static struct trapframe frame0;
157 char machine[] = "powerpc";
158 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD | CTLFLAG_CAPRD, machine, 0, "");
160 static void cpu_startup(void *);
161 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
163 SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size,
164 CTLFLAG_RD, &cacheline_size, 0, "");
166 uintptr_t powerpc_init(vm_offset_t, vm_offset_t, vm_offset_t, void *,
169 static void fake_preload_metadata(void);
174 /* Default MSR values set in the AIM/Book-E early startup code */
175 register_t psl_kernset;
176 register_t psl_userset;
177 register_t psl_userstatic;
179 register_t psl_userset32;
182 struct kva_md_info kmi;
185 cpu_startup(void *dummy)
189 * Initialise the decrementer-based clock.
194 * Good {morning,afternoon,evening,night}.
196 cpu_setup(PCPU_GET(cpuid));
201 printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)physmem),
202 ptoa((uintmax_t)physmem) / 1048576);
206 printf("available KVA = %zu (%zu MB)\n",
207 virtual_end - virtual_avail,
208 (virtual_end - virtual_avail) / 1048576);
211 * Display any holes after the first chunk of extended memory.
216 printf("Physical memory chunk(s):\n");
217 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
219 phys_avail[indx + 1] - phys_avail[indx];
222 printf("0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
224 printf("0x%09jx - 0x%09jx, %ju bytes (%ju pages)\n",
226 (uintmax_t)phys_avail[indx],
227 (uintmax_t)phys_avail[indx + 1] - 1,
228 (uintmax_t)size1, (uintmax_t)size1 / PAGE_SIZE);
232 vm_ksubmap_init(&kmi);
234 printf("avail memory = %ju (%ju MB)\n",
235 ptoa((uintmax_t)vm_free_count()),
236 ptoa((uintmax_t)vm_free_count()) / 1048576);
239 * Set up buffers, so they can be used to read disk labels.
242 vm_pager_bufferinit();
245 extern vm_offset_t __startkernel, __endkernel;
246 extern unsigned char __bss_start[];
247 extern unsigned char __sbss_start[];
248 extern unsigned char __sbss_end[];
249 extern unsigned char _end[];
251 void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry,
252 void *mdp, uint32_t mdp_cookie);
253 void aim_cpu_init(vm_offset_t toc);
254 void booke_cpu_init(void);
257 static void load_external_symtab(void);
261 powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp,
266 vm_offset_t startkernel, endkernel;
269 bool ofw_bootargs = false;
271 bool symbols_provided = false;
272 vm_offset_t ksym_start;
273 vm_offset_t ksym_end;
276 /* First guess at start/end kernel positions */
277 startkernel = __startkernel;
278 endkernel = __endkernel;
281 * If the metadata pointer cookie is not set to the magic value,
282 * the number in mdp should be treated as nonsense.
284 if (mdp_cookie != 0xfb5d104d)
289 * On BOOKE the BSS is already cleared and some variables
290 * initialized. Do not wipe them out.
292 bzero(__sbss_start, __sbss_end - __sbss_start);
293 bzero(__bss_start, _end - __bss_start);
299 aim_early_init(fdt, toc, ofentry, mdp, mdp_cookie);
303 * At this point, we are executing in our correct memory space.
304 * Book-E started there, and AIM has done an rfi and restarted
305 * execution from _start.
307 * We may still be in real mode, however. If we are running out of
308 * the direct map on 64 bit, this is possible to do.
312 * Parse metadata if present and fetch parameters. Must be done
313 * before console is inited so cninit gets the right value of
318 * Starting up from loader.
320 * Full metadata has been provided, but we need to figure
321 * out the correct address to relocate it to.
324 uintptr_t md_offset = 0;
325 vm_paddr_t kernelendphys;
328 if ((uintptr_t)&powerpc_init > DMAP_BASE_ADDRESS)
329 md_offset = DMAP_BASE_ADDRESS;
331 md_offset = VM_MIN_KERNEL_ADDRESS - kernload;
334 preload_metadata = mdp;
336 /* Translate phys offset into DMAP offset. */
337 preload_metadata += md_offset;
338 preload_bootstrap_relocate(md_offset);
340 kmdp = preload_search_by_type("elf kernel");
342 boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
343 envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
346 init_static_kenv(envp, 0);
348 fdt = MD_FETCH(kmdp, MODINFOMD_DTBP, uintptr_t);
352 /* kernelstartphys is already relocated. */
353 kernelendphys = MD_FETCH(kmdp, MODINFOMD_KERNEND,
355 if (kernelendphys != 0)
356 kernelendphys += md_offset;
357 endkernel = ulmax(endkernel, kernelendphys);
359 ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
360 ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
362 db_fetch_ksymtab(ksym_start, ksym_end, md_offset);
363 /* Symbols provided by loader. */
364 symbols_provided = true;
369 * Self-loading kernel, we have to fake up metadata.
371 * Since we are creating the metadata from the final
372 * memory space, we don't need to call
373 * preload_boostrap_relocate().
375 fake_preload_metadata();
376 kmdp = preload_search_by_type("elf kernel");
377 init_static_kenv(init_kenv, sizeof(init_kenv));
381 /* Store boot environment state */
382 OF_initial_setup((void *)fdt, NULL, (int (*)(void *))ofentry);
385 * Init params/tunables that can be overridden by the loader
390 * Start initializing proc0 and thread0.
392 proc_linkup0(&proc0, &thread0);
393 thread0.td_frame = &frame0;
395 __asm __volatile("mr 13,%0" :: "r"(&thread0));
397 __asm __volatile("mr 2,%0" :: "r"(&thread0));
401 * Init mutexes, which we use heavily in PMAP
406 * Install the OF client interface
411 if (!symbols_provided && hw_direct_map)
412 load_external_symtab();
416 ofw_parse_bootargs();
420 * Early I/O map needs to be initialized before console, in order to
421 * map frame buffers properly, and after boot args have been parsed,
422 * to handle tunables properly.
424 pmap_early_io_map_init();
428 * Initialize the console before printing anything.
437 /* Make sure the kernel icache is valid before we go too much further */
438 __syncicache((caddr_t)startkernel, endkernel - startkernel);
442 * Choose a platform module so we can get the physical memory map.
445 platform_probe_and_attach();
448 * Set up per-cpu data for the BSP now that the platform can tell
451 if (platform_smp_get_bsp(&bsp) != 0)
453 pc = &__pcpu[bsp.cr_cpuid];
454 __asm __volatile("mtsprg 0, %0" :: "r"(pc));
455 pcpu_init(pc, bsp.cr_cpuid, sizeof(struct pcpu));
456 pc->pc_curthread = &thread0;
457 thread0.td_oncpu = bsp.cr_cpuid;
458 pc->pc_cpuid = bsp.cr_cpuid;
459 pc->pc_hwref = bsp.cr_hwref;
470 link_elf_ireloc(kmdp);
471 pmap_bootstrap(startkernel, endkernel);
472 mtmsr(psl_kernset & ~PSL_EE);
475 * Initialize params/tunables that are derived from memsize
477 init_param2(physmem);
480 * Grab booted kernel's name
482 env = kern_getenv("kernelname");
484 strlcpy(kernelname, env, sizeof(kernelname));
489 * Finish setting up thread0.
491 thread0.td_pcb = (struct pcb *)
492 ((thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE -
493 sizeof(struct pcb)) & ~15UL);
494 bzero((void *)thread0.td_pcb, sizeof(struct pcb));
495 pc->pc_curpcb = thread0.td_pcb;
497 /* Initialise the message buffer. */
498 msgbufinit(msgbufp, msgbufsize);
501 if (boothowto & RB_KDB)
502 kdb_enter(KDB_WHY_BOOTFLAGS,
503 "Boot flags requested debugger");
506 return (((uintptr_t)thread0.td_pcb -
507 (sizeof(struct callframe) - 3*sizeof(register_t))) & ~15UL);
512 * On powernv and some booke systems, we might not have symbols loaded via
513 * loader. However, if the user passed the kernel in as the initrd as well,
514 * we can manually load it via reinterpreting the initrd copy of the kernel.
516 * In the BOOKE case, we don't actually have a DMAP yet, so we have to use
517 * temporary maps to inspect the memory, but write DMAP addresses to the
518 * configuration variables.
521 load_external_symtab(void) {
523 vm_paddr_t start, end;
526 u_char *kernelimg; /* Temporary map */
527 u_char *kernelimg_final; /* Final location */
534 vm_offset_t ksym_start, ksym_sz, kstr_start, kstr_sz,
535 ksym_start_final, kstr_start_final;
540 chosen = OF_finddevice("/chosen");
544 if (!OF_hasprop(chosen, "linux,initrd-start") ||
545 !OF_hasprop(chosen, "linux,initrd-end"))
548 size = OF_getencprop(chosen, "linux,initrd-start", cell, sizeof(cell));
552 start = (uint64_t)cell[0] << 32 | cell[1];
556 size = OF_getencprop(chosen, "linux,initrd-end", cell, sizeof(cell));
560 end = (uint64_t)cell[0] << 32 | cell[1];
564 if (!(end - start > 0))
567 kernelimg_final = (u_char *) PHYS_TO_DMAP(start);
569 kernelimg = kernelimg_final;
571 kernelimg = (u_char *)pmap_early_io_map(start, PAGE_SIZE);
573 ehdr = (Elf_Ehdr *)kernelimg;
575 if (!IS_ELF(*ehdr)) {
577 pmap_early_io_unmap(start, PAGE_SIZE);
583 pmap_early_io_unmap(start, PAGE_SIZE);
584 kernelimg = (u_char *)pmap_early_io_map(start, (end - start));
587 shdr = (Elf_Shdr *)(kernelimg + ehdr->e_shoff);
591 ksym_start_final = 0;
594 kstr_start_final = 0;
595 for (i = 0; i < ehdr->e_shnum; i++) {
596 if (shdr[i].sh_type == SHT_SYMTAB) {
597 ksym_start = (vm_offset_t)(kernelimg +
599 ksym_start_final = (vm_offset_t)
600 (kernelimg_final + shdr[i].sh_offset);
601 ksym_sz = (vm_offset_t)(shdr[i].sh_size);
602 kstr_start = (vm_offset_t)(kernelimg +
603 shdr[shdr[i].sh_link].sh_offset);
604 kstr_start_final = (vm_offset_t)
606 shdr[shdr[i].sh_link].sh_offset);
608 kstr_sz = (vm_offset_t)
609 (shdr[shdr[i].sh_link].sh_size);
613 if (ksym_start != 0 && kstr_start != 0 && ksym_sz != 0 &&
614 kstr_sz != 0 && ksym_start < kstr_start) {
616 * We can't use db_fetch_ksymtab() here, because we need to
617 * feed in DMAP addresses that are not mapped yet on booke.
619 * Write the variables directly, where db_init() will pick
620 * them up later, after the DMAP is up.
622 ksymtab = ksym_start_final;
623 ksymtab_size = ksym_sz;
624 kstrtab = kstr_start_final;
625 ksymtab_relbase = (__startkernel - KERNBASE);
629 pmap_early_io_unmap(start, (end - start));
636 * When not being loaded from loader, we need to create our own metadata
637 * so we can interact with the kernel linker.
640 fake_preload_metadata(void) {
641 /* We depend on dword alignment here. */
642 static uint32_t fake_preload[36] __aligned(8);
645 fake_preload[i++] = MODINFO_NAME;
646 fake_preload[i++] = strlen("kernel") + 1;
647 strcpy((char*)&fake_preload[i], "kernel");
648 /* ['k' 'e' 'r' 'n'] ['e' 'l' '\0' ..] */
651 fake_preload[i++] = MODINFO_TYPE;
652 fake_preload[i++] = strlen("elf kernel") + 1;
653 strcpy((char*)&fake_preload[i], "elf kernel");
654 /* ['e' 'l' 'f' ' '] ['k' 'e' 'r' 'n'] ['e' 'l' '\0' ..] */
658 /* Padding -- Fields start on u_long boundaries */
659 fake_preload[i++] = 0;
662 fake_preload[i++] = MODINFO_ADDR;
663 fake_preload[i++] = sizeof(vm_offset_t);
664 *(vm_offset_t *)&fake_preload[i] =
665 (vm_offset_t)(__startkernel);
666 i += (sizeof(vm_offset_t) / 4);
668 fake_preload[i++] = MODINFO_SIZE;
669 fake_preload[i++] = sizeof(vm_offset_t);
670 *(vm_offset_t *)&fake_preload[i] =
671 (vm_offset_t)(__endkernel) - (vm_offset_t)(__startkernel);
672 i += (sizeof(vm_offset_t) / 4);
675 * MODINFOMD_SSYM and MODINFOMD_ESYM cannot be provided here,
676 * as the memory comes from outside the loaded ELF sections.
678 * If the symbols are being provided by other means (MFS), the
679 * tables will be loaded into the debugger directly.
682 /* Null field at end to mark end of data. */
683 fake_preload[i++] = 0;
685 preload_metadata = (void*)fake_preload;
689 * Flush the D-cache for non-DMA I/O so that the I-cache can
690 * be made coherent later.
693 cpu_flush_dcache(void *ptr, size_t len)
695 register_t addr, off;
698 * Align the address to a cacheline and adjust the length
699 * accordingly. Then round the length to a multiple of the
700 * cacheline for easy looping.
702 addr = (uintptr_t)ptr;
703 off = addr & (cacheline_size - 1);
705 len = roundup2(len + off, cacheline_size);
708 __asm __volatile ("dcbf 0,%0" :: "r"(addr));
709 __asm __volatile ("sync");
710 addr += cacheline_size;
711 len -= cacheline_size;
716 ptrace_set_pc(struct thread *td, unsigned long addr)
718 struct trapframe *tf;
721 tf->srr0 = (register_t)addr;
733 if (td->td_md.md_spinlock_count == 0) {
735 msr = intr_disable();
736 td->td_md.md_spinlock_count = 1;
737 td->td_md.md_saved_msr = msr;
740 td->td_md.md_spinlock_count++;
750 msr = td->td_md.md_saved_msr;
751 td->td_md.md_spinlock_count--;
752 if (td->td_md.md_spinlock_count == 0) {
760 * Simple ddb(4) command/hack to view any SPR on the running CPU.
761 * Uses a trivial asm function to perform the mfspr, and rewrites the mfspr
762 * instruction each time.
763 * XXX: Since it uses code modification, it won't work if the kernel code pages
766 extern register_t get_spr(int);
769 DB_SHOW_COMMAND(spr, db_show_spr)
772 volatile uint32_t *p;
773 int sprno, saved_sprno;
778 saved_sprno = sprno = (intptr_t) addr;
779 sprno = ((sprno & 0x3e0) >> 5) | ((sprno & 0x1f) << 5);
780 p = (uint32_t *)(void *)&get_spr;
782 #if defined(_CALL_ELF) && _CALL_ELF == 2
783 /* Account for ELFv2 function prologue. */
786 p = *(volatile uint32_t * volatile *)p;
789 *p = (*p & ~0x001ff800) | (sprno << 11);
790 __syncicache(__DEVOLATILE(uint32_t *, p), cacheline_size);
791 spr = get_spr(sprno);
793 db_printf("SPR %d(%x): %lx\n", saved_sprno, saved_sprno,
797 DB_SHOW_COMMAND(frame, db_show_frame)
799 struct trapframe *tf;
803 tf = have_addr ? (struct trapframe *)addr : curthread->td_frame;
806 * Everything casts through long to simplify the printing.
807 * 'long' is native register size anyway.
809 db_printf("trap frame %p\n", tf);
810 for (i = 0; i < nitems(tf->fixreg); i++) {
812 db_printf(" r%d:\t%#lx (%ld)\n", i, reg, reg);
815 db_printf(" lr:\t%#lx\n", reg);
817 db_printf(" cr:\t%#lx\n", reg);
819 db_printf(" xer:\t%#lx\n", reg);
821 db_printf(" ctr:\t%#lx (%ld)\n", reg, reg);
823 db_printf(" srr0:\t%#lx\n", reg);
825 db_printf(" srr1:\t%#lx\n", reg);
827 db_printf(" exc:\t%#lx\n", reg);
829 db_printf(" dar:\t%#lx\n", reg);
831 reg = tf->cpu.aim.dsisr;
832 db_printf(" dsisr:\t%#lx\n", reg);
834 reg = tf->cpu.booke.esr;
835 db_printf(" esr:\t%#lx\n", reg);
836 reg = tf->cpu.booke.dbcr0;
837 db_printf(" dbcr0:\t%#lx\n", reg);
842 /* __stack_chk_fail_local() is called in secure-plt (32-bit). */
843 #if !defined(__powerpc64__)
844 extern void __stack_chk_fail(void);
845 void __stack_chk_fail_local(void);
848 __stack_chk_fail_local(void)