2 * Copyright (c) 2005 Olivier Houchard. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 * Since we are compiled outside of the normal kernel build process, we
27 * need to include opt_global.h manually.
29 #include "opt_global.h"
30 #include "opt_kernname.h"
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 #include <machine/asm.h>
35 #include <sys/param.h>
36 #include <sys/elf32.h>
37 #include <sys/inflate.h>
38 #include <machine/elf.h>
39 #include <machine/pte.h>
40 #include <machine/cpufunc.h>
41 #include <machine/armreg.h>
43 extern char kernel_start[];
44 extern char kernel_end[];
52 extern unsigned int cpu_ident(void);
53 extern void armv6_idcache_wbinv_all(void);
54 extern void armv7_idcache_wbinv_all(void);
55 extern void do_call(void *, void *, void *, int);
60 #define cpu_idcache_wbinv_all arm9_idcache_wbinv_all
61 extern void arm9_idcache_wbinv_all(void);
62 #elif defined(CPU_FA526)
63 #define cpu_idcache_wbinv_all fa526_idcache_wbinv_all
64 extern void fa526_idcache_wbinv_all(void);
65 #elif defined(CPU_ARM9E)
66 #define cpu_idcache_wbinv_all armv5_ec_idcache_wbinv_all
67 extern void armv5_ec_idcache_wbinv_all(void);
68 #elif defined(CPU_ARM1176)
69 #define cpu_idcache_wbinv_all armv6_idcache_wbinv_all
70 #elif defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425)
71 #define cpu_idcache_wbinv_all xscale_cache_purgeID
72 extern void xscale_cache_purgeID(void);
73 #elif defined(CPU_XSCALE_81342)
74 #define cpu_idcache_wbinv_all xscalec3_cache_purgeID
75 extern void xscalec3_cache_purgeID(void);
76 #elif defined(CPU_MV_PJ4B)
77 #if !defined(SOC_MV_ARMADAXP)
78 #define cpu_idcache_wbinv_all armv6_idcache_wbinv_all
79 extern void armv6_idcache_wbinv_all(void);
81 #define cpu_idcache_wbinv_all() armadaxp_idcache_wbinv_all
83 #endif /* CPU_MV_PJ4B */
84 #ifdef CPU_XSCALE_81342
85 #define cpu_l2cache_wbinv_all xscalec3_l2cache_purge
86 extern void xscalec3_l2cache_purge(void);
87 #elif defined(SOC_MV_KIRKWOOD) || defined(SOC_MV_DISCOVERY)
88 #define cpu_l2cache_wbinv_all sheeva_l2cache_wbinv_all
89 extern void sheeva_l2cache_wbinv_all(void);
90 #elif defined(CPU_CORTEXA) || defined(CPU_KRAIT)
91 #define cpu_idcache_wbinv_all armv7_idcache_wbinv_all
92 #define cpu_l2cache_wbinv_all()
94 #define cpu_l2cache_wbinv_all()
97 static void armadaxp_idcache_wbinv_all(void);
100 int arm_picache_line_size;
101 int arm_picache_ways;
103 int arm_pdcache_size; /* and unified */
104 int arm_pdcache_line_size = 32;
105 int arm_pdcache_ways;
108 int arm_pcache_unified;
110 int arm_dcache_align;
111 int arm_dcache_align_mask;
113 int arm_dcache_min_line_size = 32;
114 int arm_icache_min_line_size = 32;
115 int arm_idcache_min_line_size = 32;
117 u_int arm_cache_level;
118 u_int arm_cache_type[14];
121 /* Additional cache information local to this file. Log2 of some of the
123 static int arm_dcache_l2_nsets;
124 static int arm_dcache_l2_assoc;
125 static int arm_dcache_l2_linesize;
128 extern int arm9_dcache_sets_inc;
129 extern int arm9_dcache_sets_max;
130 extern int arm9_dcache_index_max;
131 extern int arm9_dcache_index_inc;
133 static __inline void *
134 memcpy(void *dst, const void *src, int len)
140 if (0 && len >= 4 && !((vm_offset_t)d & 3) &&
141 !((vm_offset_t)s & 3)) {
142 *(uint32_t *)d = *(uint32_t *)s;
155 bzero(void *addr, int count)
157 char *tmp = (char *)addr;
160 if (count >= 4 && !((vm_offset_t)tmp & 3)) {
161 *(uint32_t *)tmp = 0;
172 static void arm9_setup(void);
178 unsigned int sp = ((unsigned int)&_end & ~3) + 4;
179 unsigned int pc, kernphysaddr;
182 * Figure out the physical address the kernel was loaded at. This
183 * assumes the entry point (this code right here) is in the first page,
184 * which will always be the case for this trampoline code.
186 __asm __volatile("mov %0, pc\n"
188 kernphysaddr = pc & ~PAGE_MASK;
190 #if defined(FLASHADDR) && defined(PHYSADDR) && defined(LOADERRAMADDR)
191 if ((FLASHADDR > LOADERRAMADDR && pc >= FLASHADDR) ||
192 (FLASHADDR < LOADERRAMADDR && pc < LOADERRAMADDR)) {
194 * We're running from flash, so just copy the whole thing
195 * from flash to memory.
196 * This is far from optimal, we could do the relocation or
197 * the unzipping directly from flash to memory to avoid this
198 * needless copy, but it would require to know the flash
201 unsigned int target_addr;
203 uint32_t src_addr = (uint32_t)&_start - PHYSADDR + FLASHADDR
204 + (pc - FLASHADDR - ((uint32_t)&_startC - PHYSADDR)) & 0xfffff000;
206 target_addr = (unsigned int)&_start - PHYSADDR + LOADERRAMADDR;
207 tmp_sp = target_addr + 0x100000 +
208 (unsigned int)&_end - (unsigned int)&_start;
209 memcpy((char *)target_addr, (char *)src_addr,
210 (unsigned int)&_end - (unsigned int)&_start);
211 /* Temporary set the sp and jump to the new location. */
215 : : "r" (target_addr), "r" (tmp_sp));
220 sp += KERNSIZE + 0x100;
221 sp &= ~(L1_TABLE_SIZE - 1);
222 sp += 2 * L1_TABLE_SIZE;
224 sp += 1024 * 1024; /* Should be enough for a stack */
226 __asm __volatile("adr %0, 2f\n"
227 "bic %0, %0, #0xff000000\n"
228 "and %1, %1, #0xff000000\n"
230 "mrc p15, 0, %1, c1, c0, 0\n"
231 "bic %1, %1, #1\n" /* Disable MMU */
232 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
234 "orr %1, %1, #0x1000\n" /* Add IC enable */
235 "orr %1, %1, #(0x800)\n" /* BPRD enable */
237 "mcr p15, 0, %1, c1, c0, 0\n"
244 : "=r" (tmp1), "+r" (kernphysaddr), "+r" (sp));
247 /* So that idcache_wbinv works; */
248 if ((cpu_ident() & 0x0000f000) == 0x00009000)
258 u_int ctype, isize, dsize, cpuid;
259 u_int clevel, csize, i, sel;
263 __asm __volatile("mrc p15, 0, %0, c0, c0, 1"
268 * ...and thus spake the ARM ARM:
270 * If an <opcode2> value corresponding to an unimplemented or
271 * reserved ID register is encountered, the System Control
272 * processor returns the value of the main ID register.
277 if (CPU_CT_FORMAT(ctype) == CPU_CT_ARMV7) {
278 /* Resolve minimal cache line sizes */
279 arm_dcache_min_line_size = 1 << (CPU_CT_DMINLINE(ctype) + 2);
280 arm_icache_min_line_size = 1 << (CPU_CT_IMINLINE(ctype) + 2);
281 arm_idcache_min_line_size =
282 (arm_dcache_min_line_size > arm_icache_min_line_size ?
283 arm_icache_min_line_size : arm_dcache_min_line_size);
285 __asm __volatile("mrc p15, 1, %0, c0, c0, 1"
287 arm_cache_level = clevel;
288 arm_cache_loc = CPU_CLIDR_LOC(arm_cache_level) + 1;
290 while ((type = (clevel & 0x7)) && i < 7) {
291 if (type == CACHE_DCACHE || type == CACHE_UNI_CACHE ||
292 type == CACHE_SEP_CACHE) {
294 __asm __volatile("mcr p15, 2, %0, c0, c0, 0"
296 __asm __volatile("mrc p15, 1, %0, c0, c0, 0"
298 arm_cache_type[sel] = csize;
300 if (type == CACHE_ICACHE || type == CACHE_SEP_CACHE) {
302 __asm __volatile("mcr p15, 2, %0, c0, c0, 0"
304 __asm __volatile("mrc p15, 1, %0, c0, c0, 0"
306 arm_cache_type[sel] = csize;
312 if ((ctype & CPU_CT_S) == 0)
313 arm_pcache_unified = 1;
316 * If you want to know how this code works, go read the ARM ARM.
319 arm_pcache_type = CPU_CT_CTYPE(ctype);
321 if (arm_pcache_unified == 0) {
322 isize = CPU_CT_ISIZE(ctype);
323 multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
324 arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
325 if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
326 if (isize & CPU_CT_xSIZE_M)
327 arm_picache_line_size = 0; /* not present */
329 arm_picache_ways = 1;
331 arm_picache_ways = multiplier <<
332 (CPU_CT_xSIZE_ASSOC(isize) - 1);
334 arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
337 dsize = CPU_CT_DSIZE(ctype);
338 multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
339 arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
340 if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
341 if (dsize & CPU_CT_xSIZE_M)
342 arm_pdcache_line_size = 0; /* not present */
344 arm_pdcache_ways = 1;
346 arm_pdcache_ways = multiplier <<
347 (CPU_CT_xSIZE_ASSOC(dsize) - 1);
349 arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
351 arm_dcache_align = arm_pdcache_line_size;
353 arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
354 arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
355 arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
356 CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
359 arm_dcache_align_mask = arm_dcache_align - 1;
367 get_cachetype_cp15();
368 arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
369 arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
370 arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
371 arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
372 arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
376 armadaxp_idcache_wbinv_all(void)
380 __asm __volatile("mrc p15, 0, %0, c0, c1, 0" : "=r" (feat));
381 if (feat & ARM_PFR0_THUMBEE_MASK)
382 armv7_idcache_wbinv_all();
384 armv6_idcache_wbinv_all();
388 static unsigned char *orig_input, *i_input, *i_output;
391 static u_int memcnt; /* Memory allocated: blocks */
392 static size_t memtot; /* Memory allocated: bytes */
394 * Library functions required by inflate().
397 #define MEMSIZ 0x8000
400 * Allocate memory block.
406 static u_char mem[MEMSIZ];
408 if (memtot + size > MEMSIZ)
417 * Free allocated memory block.
435 if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
442 output(void *dummy, unsigned char *ptr, unsigned long len)
446 memcpy(i_output, ptr, len);
452 inflate_kernel(void *kernel, void *startaddr)
455 unsigned char slide[GZ_WSIZE];
459 i_input = (unsigned char *)kernel + GZ_HEAD;
460 if (((char *)kernel)[3] & 0x18) {
465 i_output = startaddr;
466 bzero(&infl, sizeof(infl));
467 infl.gz_input = input;
468 infl.gz_output = output;
469 infl.gz_slide = slide;
471 return ((char *)(((vm_offset_t)i_output & ~3) + 4));
477 load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
481 Elf32_Phdr phdr[64] /* XXX */, *php;
482 Elf32_Shdr shdr[64] /* XXX */;
485 int symtabindex = -1;
486 int symstrindex = -1;
487 vm_offset_t lastaddr = 0;
491 eh = (Elf32_Ehdr *)kstart;
493 entry_point = (void*)eh->e_entry;
494 memcpy(phdr, (void *)(kstart + eh->e_phoff ),
495 eh->e_phnum * sizeof(phdr[0]));
497 /* Determine lastaddr. */
498 for (i = 0; i < eh->e_phnum; i++) {
499 if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
501 lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
502 curaddr + phdr[i].p_memsz;
505 /* Save the symbol tables, as there're about to be scratched. */
506 memcpy(shdr, (void *)(kstart + eh->e_shoff),
507 sizeof(*shdr) * eh->e_shnum);
508 if (eh->e_shnum * eh->e_shentsize != 0 &&
510 for (i = 0; i < eh->e_shnum; i++) {
511 if (shdr[i].sh_type == SHT_SYMTAB) {
512 for (j = 0; j < eh->e_phnum; j++) {
513 if (phdr[j].p_type == PT_LOAD &&
520 shdr[i].sh_offset = 0;
525 if (shdr[i].sh_offset != 0 &&
526 shdr[i].sh_size != 0) {
528 symstrindex = shdr[i].sh_link;
532 func_end = roundup(func_end, sizeof(long));
533 if (symtabindex >= 0 && symstrindex >= 0) {
536 memcpy((void *)func_end, (void *)(
537 shdr[symtabindex].sh_offset + kstart),
538 shdr[symtabindex].sh_size);
539 memcpy((void *)(func_end +
540 shdr[symtabindex].sh_size),
541 (void *)(shdr[symstrindex].sh_offset +
542 kstart), shdr[symstrindex].sh_size);
544 lastaddr += shdr[symtabindex].sh_size;
545 lastaddr = roundup(lastaddr,
546 sizeof(shdr[symtabindex].sh_size));
547 lastaddr += sizeof(shdr[symstrindex].sh_size);
548 lastaddr += shdr[symstrindex].sh_size;
549 lastaddr = roundup(lastaddr,
550 sizeof(shdr[symstrindex].sh_size));
556 return ((void *)lastaddr);
559 for (i = 0; i < j; i++) {
562 if (phdr[i].p_type != PT_LOAD)
564 memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
565 (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
566 /* Clean space from oversized segments, eg: bss. */
567 if (phdr[i].p_filesz < phdr[i].p_memsz)
568 bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
569 curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
572 /* Now grab the symbol tables. */
573 if (symtabindex >= 0 && symstrindex >= 0) {
574 *(Elf_Size *)lastaddr =
575 shdr[symtabindex].sh_size;
576 lastaddr += sizeof(shdr[symtabindex].sh_size);
577 memcpy((void*)lastaddr,
579 shdr[symtabindex].sh_size);
580 lastaddr += shdr[symtabindex].sh_size;
581 lastaddr = roundup(lastaddr,
582 sizeof(shdr[symtabindex].sh_size));
583 *(Elf_Size *)lastaddr =
584 shdr[symstrindex].sh_size;
585 lastaddr += sizeof(shdr[symstrindex].sh_size);
586 memcpy((void*)lastaddr,
588 shdr[symtabindex].sh_size),
589 shdr[symstrindex].sh_size);
590 lastaddr += shdr[symstrindex].sh_size;
591 lastaddr = roundup(lastaddr,
592 sizeof(shdr[symstrindex].sh_size));
593 *(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
594 *((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
595 *((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
597 *(Elf_Addr *)curaddr = 0;
598 /* Invalidate the instruction cache. */
599 __asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
600 "mcr p15, 0, %0, c7, c10, 4\n"
602 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
603 "bic %0, %0, #1\n" /* MMU_ENABLE */
604 "mcr p15, 0, %0, c1, c0, 0\n"
606 /* Jump to the entry point. */
607 ((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
608 __asm __volatile(".globl func_end\n"
615 extern char func_end[];
618 #define PMAP_DOMAIN_KERNEL 0 /*
619 * Just define it instead of including the
620 * whole VM headers set.
624 setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
627 unsigned int *pd = (unsigned int *)pt_addr;
629 int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
632 bzero(pd, L1_TABLE_SIZE);
633 for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
634 pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
635 L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
637 pd[addr >> L1_S_SHIFT] |= L1_S_B;
640 if (0xfff00000 < physstart || 0xfff00000 > physend)
641 pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
642 L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
643 __asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
644 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
645 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
646 "mrc p15, 0, %0, c1, c0, 0\n"
647 "orr %0, %0, #1\n" /* MMU_ENABLE */
648 "mcr p15, 0, %0, c1, c0, 0\n"
649 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
652 "=r" (tmp) : "r" (pd), "r" (domain));
655 * XXX: This is the most stupid workaround I've ever wrote.
656 * For some reason, the KB9202 won't boot the kernel unless
657 * we access an address which is not in the
658 * 0x20000000 - 0x20ffffff range. I hope I'll understand
659 * what's going on later.
661 __hack = *(volatile int *)0xfffff21c;
669 char *kernel = (char *)&kernel_start;
673 __asm __volatile("mov %0, pc" :
675 curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
677 if (*kernel == 0x1f && kernel[1] == 0x8b) {
678 pt_addr = (((int)&_end + KERNSIZE + 0x100) &
679 ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
682 /* So that idcache_wbinv works; */
683 if ((cpu_ident() & 0x0000f000) == 0x00009000)
686 setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
687 (vm_paddr_t)curaddr + 0x10000000, 1);
689 dst = inflate_kernel(kernel, &_end);
690 kernel = (char *)&_end;
691 altdst = 4 + load_kernel((unsigned int)kernel,
692 (unsigned int)curaddr,
693 (unsigned int)&func_end + 800 , 0);
698 * Disable MMU. Otherwise, setup_pagetables call below
699 * might overwrite the L1 table we are currently using.
701 cpu_idcache_wbinv_all();
702 cpu_l2cache_wbinv_all();
703 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
704 "bic %0, %0, #1\n" /* MMU_DISABLE */
705 "mcr p15, 0, %0, c1, c0, 0\n"
709 dst = 4 + load_kernel((unsigned int)&kernel_start,
710 (unsigned int)curaddr,
711 (unsigned int)&func_end, 0);
712 dst = (void *)(((vm_offset_t)dst & ~3));
713 pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
714 setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
715 (vm_paddr_t)curaddr + 0x10000000, 0);
716 sp = pt_addr + L1_TABLE_SIZE + 8192;
718 dst = (void *)(sp + 4);
719 memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
720 (unsigned int)&load_kernel + 800);
721 do_call(dst, kernel, dst + (unsigned int)(&func_end) -
722 (unsigned int)(&load_kernel) + 800, sp);
725 /* We need to provide these functions but never call them */
726 void __aeabi_unwind_cpp_pr0(void);
727 void __aeabi_unwind_cpp_pr1(void);
728 void __aeabi_unwind_cpp_pr2(void);
730 __strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr1);
731 __strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr2);
733 __aeabi_unwind_cpp_pr0(void)