2 * Copyright (c) 2005 Olivier Houchard. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 #include <sys/cdefs.h>
26 __FBSDID("$FreeBSD$");
27 #include <machine/asm.h>
28 #include <sys/param.h>
29 #include <sys/elf32.h>
30 #include <sys/inflate.h>
31 #include <machine/elf.h>
32 #include <machine/pte.h>
33 #include <machine/cpufunc.h>
34 #include <machine/armreg.h>
39 * Since we are compiled outside of the normal kernel build process, we
40 * need to include opt_global.h manually.
42 #include "opt_global.h"
43 #include "opt_kernname.h"
45 extern char kernel_start[];
46 extern char kernel_end[];
56 #define cpu_idcache_wbinv_all arm7tdmi_cache_flushID
57 #elif defined(CPU_ARM8)
58 #define cpu_idcache_wbinv_all arm8_cache_purgeID
59 #elif defined(CPU_ARM9)
60 #define cpu_idcache_wbinv_all arm9_idcache_wbinv_all
61 #elif defined(CPU_ARM9E)
62 #define cpu_idcache_wbinv_all armv5_ec_idcache_wbinv_all
63 #elif defined(CPU_ARM10)
64 #define cpu_idcache_wbinv_all arm10_idcache_wbinv_all
65 #elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \
67 #define cpu_idcache_wbinv_all sa1_cache_purgeID
68 #elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
69 defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \
70 defined(CPU_XSCALE_80219)
71 #define cpu_idcache_wbinv_all xscale_cache_purgeID
72 #elif defined(CPU_XSCALE_81342)
73 #define cpu_idcache_wbinv_all xscalec3_cache_purgeID
75 #ifdef CPU_XSCALE_81342
76 #define cpu_l2cache_wbinv_all xscalec3_l2cache_purge
78 #define cpu_l2cache_wbinv_all()
83 int arm_picache_line_size;
86 int arm_pdcache_size; /* and unified */
87 int arm_pdcache_line_size = 32;
91 int arm_pcache_unified;
94 int arm_dcache_align_mask;
96 /* Additional cache information local to this file. Log2 of some of the
98 static int arm_dcache_l2_nsets;
99 static int arm_dcache_l2_assoc;
100 static int arm_dcache_l2_linesize;
103 int block_userspace_access = 0;
104 extern int arm9_dcache_sets_inc;
105 extern int arm9_dcache_sets_max;
106 extern int arm9_dcache_index_max;
107 extern int arm9_dcache_index_inc;
109 static __inline void *
110 memcpy(void *dst, const void *src, int len)
116 if (0 && len >= 4 && !((vm_offset_t)d & 3) &&
117 !((vm_offset_t)s & 3)) {
118 *(uint32_t *)d = *(uint32_t *)s;
131 bzero(void *addr, int count)
133 char *tmp = (char *)addr;
136 if (count >= 4 && !((vm_offset_t)tmp & 3)) {
137 *(uint32_t *)tmp = 0;
148 static void arm9_setup(void);
153 int physaddr = KERNPHYSADDR;
155 unsigned int sp = ((unsigned int)&_end & ~3) + 4;
156 #if defined(FLASHADDR) && defined(LOADERRAMADDR)
159 __asm __volatile("adr %0, _start\n"
161 if ((FLASHADDR > LOADERRAMADDR && pc >= FLASHADDR) ||
162 (FLASHADDR < LOADERRAMADDR && pc < LOADERRAMADDR)) {
164 * We're running from flash, so just copy the whole thing
165 * from flash to memory.
166 * This is far from optimal, we could do the relocation or
167 * the unzipping directly from flash to memory to avoid this
168 * needless copy, but it would require to know the flash
171 unsigned int target_addr;
174 target_addr = (unsigned int)&_start - PHYSADDR + LOADERRAMADDR;
175 tmp_sp = target_addr + 0x100000 +
176 (unsigned int)&_end - (unsigned int)&_start;
177 memcpy((char *)target_addr, (char *)pc,
178 (unsigned int)&_end - (unsigned int)&_start);
179 /* Temporary set the sp and jump to the new location. */
183 : : "r" (target_addr), "r" (tmp_sp));
188 sp += KERNSIZE + 0x100;
189 sp &= ~(L1_TABLE_SIZE - 1);
190 sp += 2 * L1_TABLE_SIZE;
192 sp += 1024 * 1024; /* Should be enough for a stack */
194 __asm __volatile("adr %0, 2f\n"
195 "bic %0, %0, #0xff000000\n"
196 "and %1, %1, #0xff000000\n"
198 "mrc p15, 0, %1, c1, c0, 0\n"
199 "bic %1, %1, #1\n" /* Disable MMU */
200 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
202 "orr %1, %1, #0x1000\n" /* Add IC enable */
203 "orr %1, %1, #(0x800)\n" /* BPRD enable */
205 "mcr p15, 0, %1, c1, c0, 0\n"
212 : "=r" (tmp1), "+r" (physaddr), "+r" (sp));
215 /* So that idcache_wbinv works; */
216 if ((cpufunc_id() & 0x0000f000) == 0x00009000)
219 cpu_idcache_wbinv_all();
220 cpu_l2cache_wbinv_all();
228 u_int ctype, isize, dsize;
231 __asm __volatile("mrc p15, 0, %0, c0, c0, 1"
235 * ...and thus spake the ARM ARM:
237 * If an <opcode2> value corresponding to an unimplemented or
238 * reserved ID register is encountered, the System Control
239 * processor returns the value of the main ID register.
241 if (ctype == cpufunc_id())
244 if ((ctype & CPU_CT_S) == 0)
245 arm_pcache_unified = 1;
248 * If you want to know how this code works, go read the ARM ARM.
251 arm_pcache_type = CPU_CT_CTYPE(ctype);
252 if (arm_pcache_unified == 0) {
253 isize = CPU_CT_ISIZE(ctype);
254 multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
255 arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
256 if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
257 if (isize & CPU_CT_xSIZE_M)
258 arm_picache_line_size = 0; /* not present */
260 arm_picache_ways = 1;
262 arm_picache_ways = multiplier <<
263 (CPU_CT_xSIZE_ASSOC(isize) - 1);
265 arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
268 dsize = CPU_CT_DSIZE(ctype);
269 multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
270 arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
271 if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
272 if (dsize & CPU_CT_xSIZE_M)
273 arm_pdcache_line_size = 0; /* not present */
275 arm_pdcache_ways = 1;
277 arm_pdcache_ways = multiplier <<
278 (CPU_CT_xSIZE_ASSOC(dsize) - 1);
280 arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
282 arm_dcache_align = arm_pdcache_line_size;
284 arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
285 arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
286 arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
287 CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
289 arm_dcache_align_mask = arm_dcache_align - 1;
296 get_cachetype_cp15();
297 arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
298 arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
299 arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
300 arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
301 arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
306 static unsigned char *orig_input, *i_input, *i_output;
309 static u_int memcnt; /* Memory allocated: blocks */
310 static size_t memtot; /* Memory allocated: bytes */
312 * Library functions required by inflate().
315 #define MEMSIZ 0x8000
318 * Allocate memory block.
324 static u_char mem[MEMSIZ];
326 if (memtot + size > MEMSIZ)
335 * Free allocated memory block.
353 if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
360 output(void *dummy, unsigned char *ptr, unsigned long len)
364 memcpy(i_output, ptr, len);
370 inflate_kernel(void *kernel, void *startaddr)
373 char slide[GZ_WSIZE];
377 i_input = (char *)kernel + GZ_HEAD;
378 if (((char *)kernel)[3] & 0x18) {
383 i_output = startaddr;
384 bzero(&infl, sizeof(infl));
385 infl.gz_input = input;
386 infl.gz_output = output;
387 infl.gz_slide = slide;
389 return ((char *)(((vm_offset_t)i_output & ~3) + 4));
395 load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
399 Elf32_Phdr phdr[64] /* XXX */, *php;
400 Elf32_Shdr shdr[64] /* XXX */;
403 int symtabindex = -1;
404 int symstrindex = -1;
405 vm_offset_t lastaddr = 0;
406 Elf_Addr ssym = 0, esym = 0;
409 eh = (Elf32_Ehdr *)kstart;
411 entry_point = (void*)eh->e_entry;
412 memcpy(phdr, (void *)(kstart + eh->e_phoff ),
413 eh->e_phnum * sizeof(phdr[0]));
415 /* Determine lastaddr. */
416 for (i = 0; i < eh->e_phnum; i++) {
417 if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
419 lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
420 curaddr + phdr[i].p_memsz;
423 /* Save the symbol tables, as there're about to be scratched. */
424 memcpy(shdr, (void *)(kstart + eh->e_shoff),
425 sizeof(*shdr) * eh->e_shnum);
426 if (eh->e_shnum * eh->e_shentsize != 0 &&
428 for (i = 0; i < eh->e_shnum; i++) {
429 if (shdr[i].sh_type == SHT_SYMTAB) {
430 for (j = 0; j < eh->e_phnum; j++) {
431 if (phdr[j].p_type == PT_LOAD &&
438 shdr[i].sh_offset = 0;
443 if (shdr[i].sh_offset != 0 &&
444 shdr[i].sh_size != 0) {
446 symstrindex = shdr[i].sh_link;
450 func_end = roundup(func_end, sizeof(long));
451 if (symtabindex >= 0 && symstrindex >= 0) {
454 memcpy((void *)func_end, (void *)(
455 shdr[symtabindex].sh_offset + kstart),
456 shdr[symtabindex].sh_size);
457 memcpy((void *)(func_end +
458 shdr[symtabindex].sh_size),
459 (void *)(shdr[symstrindex].sh_offset +
460 kstart), shdr[symstrindex].sh_size);
462 lastaddr += shdr[symtabindex].sh_size;
463 lastaddr = roundup(lastaddr,
464 sizeof(shdr[symtabindex].sh_size));
465 lastaddr += sizeof(shdr[symstrindex].sh_size);
466 lastaddr += shdr[symstrindex].sh_size;
467 lastaddr = roundup(lastaddr,
468 sizeof(shdr[symstrindex].sh_size));
474 return ((void *)lastaddr);
477 for (i = 0; i < j; i++) {
480 if (phdr[i].p_type != PT_LOAD)
482 memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
483 (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
484 /* Clean space from oversized segments, eg: bss. */
485 if (phdr[i].p_filesz < phdr[i].p_memsz)
486 bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
487 curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
490 /* Now grab the symbol tables. */
491 if (symtabindex >= 0 && symstrindex >= 0) {
492 *(Elf_Size *)lastaddr =
493 shdr[symtabindex].sh_size;
494 lastaddr += sizeof(shdr[symtabindex].sh_size);
495 memcpy((void*)lastaddr,
497 shdr[symtabindex].sh_size);
498 lastaddr += shdr[symtabindex].sh_size;
499 lastaddr = roundup(lastaddr,
500 sizeof(shdr[symtabindex].sh_size));
501 *(Elf_Size *)lastaddr =
502 shdr[symstrindex].sh_size;
503 lastaddr += sizeof(shdr[symstrindex].sh_size);
504 memcpy((void*)lastaddr,
506 shdr[symtabindex].sh_size),
507 shdr[symstrindex].sh_size);
508 lastaddr += shdr[symstrindex].sh_size;
509 lastaddr = roundup(lastaddr,
510 sizeof(shdr[symstrindex].sh_size));
511 *(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
512 *((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
513 *((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
515 *(Elf_Addr *)curaddr = 0;
516 /* Invalidate the instruction cache. */
517 __asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
518 "mcr p15, 0, %0, c7, c10, 4\n"
520 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
521 "bic %0, %0, #1\n" /* MMU_ENABLE */
522 "mcr p15, 0, %0, c1, c0, 0\n"
524 /* Jump to the entry point. */
525 ((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
526 __asm __volatile(".globl func_end\n"
531 extern char func_end[];
534 #define PMAP_DOMAIN_KERNEL 0 /*
535 * Just define it instead of including the
536 * whole VM headers set.
540 setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
543 unsigned int *pd = (unsigned int *)pt_addr;
545 int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
548 bzero(pd, L1_TABLE_SIZE);
549 for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
550 pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
551 L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
553 pd[addr >> L1_S_SHIFT] |= L1_S_B;
556 if (0xfff00000 < physstart || 0xfff00000 > physend)
557 pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
558 L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
559 __asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
560 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
561 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
562 "mrc p15, 0, %0, c1, c0, 0\n"
563 "orr %0, %0, #1\n" /* MMU_ENABLE */
564 "mcr p15, 0, %0, c1, c0, 0\n"
565 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
568 "=r" (tmp) : "r" (pd), "r" (domain));
571 * XXX: This is the most stupid workaround I've ever wrote.
572 * For some reason, the KB9202 won't boot the kernel unless
573 * we access an address which is not in the
574 * 0x20000000 - 0x20ffffff range. I hope I'll understand
575 * what's going on later.
577 __hack = *(volatile int *)0xfffff21c;
585 char *kernel = (char *)&kernel_start;
589 __asm __volatile("mov %0, pc" :
591 curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
593 if (*kernel == 0x1f && kernel[1] == 0x8b) {
594 pt_addr = (((int)&_end + KERNSIZE + 0x100) &
595 ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
598 /* So that idcache_wbinv works; */
599 if ((cpufunc_id() & 0x0000f000) == 0x00009000)
602 setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
603 (vm_paddr_t)curaddr + 0x10000000, 1);
605 dst = inflate_kernel(kernel, &_end);
606 kernel = (char *)&_end;
607 altdst = 4 + load_kernel((unsigned int)kernel,
608 (unsigned int)curaddr,
609 (unsigned int)&func_end + 800 , 0);
612 cpu_idcache_wbinv_all();
613 cpu_l2cache_wbinv_all();
614 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
615 "bic %0, %0, #1\n" /* MMU_ENABLE */
616 "mcr p15, 0, %0, c1, c0, 0\n"
620 dst = 4 + load_kernel((unsigned int)&kernel_start,
621 (unsigned int)curaddr,
622 (unsigned int)&func_end, 0);
623 dst = (void *)(((vm_offset_t)dst & ~3));
624 pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
625 setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
626 (vm_paddr_t)curaddr + 0x10000000, 0);
627 sp = pt_addr + L1_TABLE_SIZE + 8192;
629 dst = (void *)(sp + 4);
630 memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
631 (unsigned int)&load_kernel + 800);
632 do_call(dst, kernel, dst + (unsigned int)(&func_end) -
633 (unsigned int)(&load_kernel) + 800, sp);