]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/arm/arm/elf_trampoline.c
Copy head (r256279) to stable/10 as part of the 10.0-RELEASE cycle.
[FreeBSD/stable/10.git] / sys / arm / arm / elf_trampoline.c
1 /*-
2  * Copyright (c) 2005 Olivier Houchard.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  */
24
25 /*
26  * Since we are compiled outside of the normal kernel build process, we
27  * need to include opt_global.h manually.
28  */
29 #include "opt_global.h"
30 #include "opt_kernname.h"
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 #include <machine/asm.h>
35 #include <sys/param.h>
36 #include <sys/elf32.h>
37 #include <sys/inflate.h>
38 #include <machine/elf.h>
39 #include <machine/pte.h>
40 #include <machine/cpufunc.h>
41 #include <machine/armreg.h>
42
43 extern char kernel_start[];
44 extern char kernel_end[];
45
46 extern void *_end;
47
48 void _start(void);
49 void __start(void);
50 void __startC(void);
51
52 extern unsigned int cpufunc_id(void);
53 extern void armv6_idcache_wbinv_all(void);
54 extern void armv7_idcache_wbinv_all(void);
55 extern void do_call(void *, void *, void *, int);
56
57 #define GZ_HEAD 0xa
58
59 #ifdef CPU_ARM7TDMI
60 #define cpu_idcache_wbinv_all   arm7tdmi_cache_flushID
61 extern void arm7tdmi_cache_flushID(void);
62 #elif defined(CPU_ARM8)
63 #define cpu_idcache_wbinv_all   arm8_cache_purgeID
64 extern void arm8_cache_purgeID(void);
65 #elif defined(CPU_ARM9)
66 #define cpu_idcache_wbinv_all   arm9_idcache_wbinv_all
67 extern void arm9_idcache_wbinv_all(void);
68 #elif defined(CPU_FA526) || defined(CPU_FA626TE)
69 #define cpu_idcache_wbinv_all   fa526_idcache_wbinv_all
70 extern void fa526_idcache_wbinv_all(void);
71 #elif defined(CPU_ARM9E)
72 #define cpu_idcache_wbinv_all   armv5_ec_idcache_wbinv_all
73 extern void armv5_ec_idcache_wbinv_all(void);
74 #elif defined(CPU_ARM10)
75 #define cpu_idcache_wbinv_all   arm10_idcache_wbinv_all
76 extern void arm10_idcache_wbinv_all(void);
77 #elif defined(CPU_ARM1136) || defined(CPU_ARM1176)
78 #define cpu_idcache_wbinv_all   armv6_idcache_wbinv_all
79 #elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \
80     defined(CPU_IXP12X0)
81 #define cpu_idcache_wbinv_all   sa1_cache_purgeID
82 extern void sa1_cache_purgeID(void);
83 #elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
84   defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) ||   \
85   defined(CPU_XSCALE_80219)
86 #define cpu_idcache_wbinv_all   xscale_cache_purgeID
87 extern void xscale_cache_purgeID(void);
88 #elif defined(CPU_XSCALE_81342)
89 #define cpu_idcache_wbinv_all   xscalec3_cache_purgeID
90 extern void xscalec3_cache_purgeID(void);
91 #elif defined(CPU_MV_PJ4B)
92 #if !defined(SOC_MV_ARMADAXP)
93 #define cpu_idcache_wbinv_all   armv6_idcache_wbinv_all
94 extern void armv6_idcache_wbinv_all(void);
95 #else
96 #define cpu_idcache_wbinv_all() armadaxp_idcache_wbinv_all
97 #endif
98 #endif /* CPU_MV_PJ4B */
99 #ifdef CPU_XSCALE_81342
100 #define cpu_l2cache_wbinv_all   xscalec3_l2cache_purge
101 extern void xscalec3_l2cache_purge(void);
102 #elif defined(SOC_MV_KIRKWOOD) || defined(SOC_MV_DISCOVERY)
103 #define cpu_l2cache_wbinv_all   sheeva_l2cache_wbinv_all
104 extern void sheeva_l2cache_wbinv_all(void);
105 #elif defined(CPU_CORTEXA)
106 #define cpu_idcache_wbinv_all   armv7_idcache_wbinv_all
107 #define cpu_l2cache_wbinv_all()
108 #else
109 #define cpu_l2cache_wbinv_all() 
110 #endif
111
112 static void armadaxp_idcache_wbinv_all(void);
113
114 int     arm_picache_size;
115 int     arm_picache_line_size;
116 int     arm_picache_ways;
117
118 int     arm_pdcache_size;       /* and unified */
119 int     arm_pdcache_line_size = 32;
120 int     arm_pdcache_ways;
121
122 int     arm_pcache_type;
123 int     arm_pcache_unified;
124
125 int     arm_dcache_align;
126 int     arm_dcache_align_mask;
127
128 u_int   arm_cache_level;
129 u_int   arm_cache_type[14];
130 u_int   arm_cache_loc;
131
132 /* Additional cache information local to this file.  Log2 of some of the
133       above numbers.  */
134 static int      arm_dcache_l2_nsets;
135 static int      arm_dcache_l2_assoc;
136 static int      arm_dcache_l2_linesize;
137
138
139 int block_userspace_access = 0;
140 extern int arm9_dcache_sets_inc;
141 extern int arm9_dcache_sets_max;
142 extern int arm9_dcache_index_max;
143 extern int arm9_dcache_index_inc;
144
145 static __inline void *
146 memcpy(void *dst, const void *src, int len)
147 {
148         const char *s = src;
149         char *d = dst;
150
151         while (len) {
152                 if (0 && len >= 4 && !((vm_offset_t)d & 3) &&
153                     !((vm_offset_t)s & 3)) {
154                         *(uint32_t *)d = *(uint32_t *)s;
155                         s += 4;
156                         d += 4;
157                         len -= 4;
158                 } else {
159                         *d++ = *s++;
160                         len--;
161                 }
162         }
163         return (dst);
164 }
165
166 static __inline void
167 bzero(void *addr, int count)
168 {
169         char *tmp = (char *)addr;
170
171         while (count > 0) {
172                 if (count >= 4 && !((vm_offset_t)tmp & 3)) {
173                         *(uint32_t *)tmp = 0;
174                         tmp += 4;
175                         count -= 4;
176                 } else {
177                         *tmp = 0;
178                         tmp++;
179                         count--;
180                 }
181         }
182 }
183
184 static void arm9_setup(void);
185
186 void
187 _startC(void)
188 {
189         int physaddr = KERNPHYSADDR;
190         int tmp1;
191         unsigned int sp = ((unsigned int)&_end & ~3) + 4;
192 #if defined(FLASHADDR) && defined(LOADERRAMADDR)
193         unsigned int pc;
194
195         __asm __volatile("mov %0, pc\n"
196             : "=r" (pc));
197         if ((FLASHADDR > LOADERRAMADDR && pc >= FLASHADDR) ||
198             (FLASHADDR < LOADERRAMADDR && pc < LOADERRAMADDR)) {
199                 /*
200                  * We're running from flash, so just copy the whole thing
201                  * from flash to memory.
202                  * This is far from optimal, we could do the relocation or
203                  * the unzipping directly from flash to memory to avoid this
204                  * needless copy, but it would require to know the flash
205                  * physical address.
206                  */
207                 unsigned int target_addr;
208                 unsigned int tmp_sp;
209                 uint32_t src_addr = (uint32_t)&_start - PHYSADDR + FLASHADDR
210                     + (pc - FLASHADDR - ((uint32_t)&_startC - PHYSADDR)) & 0xfffff000;
211
212                 target_addr = (unsigned int)&_start - PHYSADDR + LOADERRAMADDR;
213                 tmp_sp = target_addr + 0x100000 +
214                     (unsigned int)&_end - (unsigned int)&_start;
215                 memcpy((char *)target_addr, (char *)src_addr,
216                     (unsigned int)&_end - (unsigned int)&_start);
217                 /* Temporary set the sp and jump to the new location. */
218                 __asm __volatile(
219                     "mov sp, %1\n"
220                     "mov pc, %0\n"
221                     : : "r" (target_addr), "r" (tmp_sp));
222                 
223         }
224 #endif
225 #ifdef KZIP
226         sp += KERNSIZE + 0x100;
227         sp &= ~(L1_TABLE_SIZE - 1);
228         sp += 2 * L1_TABLE_SIZE;
229 #endif
230         sp += 1024 * 1024; /* Should be enough for a stack */
231         
232         __asm __volatile("adr %0, 2f\n"
233                          "bic %0, %0, #0xff000000\n"
234                          "and %1, %1, #0xff000000\n"
235                          "orr %0, %0, %1\n"
236                          "mrc p15, 0, %1, c1, c0, 0\n"
237                          "bic %1, %1, #1\n" /* Disable MMU */
238                          "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
239                                                      WBUF enable */
240                          "orr %1, %1, #0x1000\n" /* Add IC enable */
241                          "orr %1, %1, #(0x800)\n" /* BPRD enable */
242
243                          "mcr p15, 0, %1, c1, c0, 0\n"
244                          "nop\n"
245                          "nop\n"
246                          "nop\n"
247                          "mov pc, %0\n"
248                          "2: nop\n"
249                          "mov sp, %2\n"
250                          : "=r" (tmp1), "+r" (physaddr), "+r" (sp));
251 #ifndef KZIP
252 #ifdef CPU_ARM9
253         /* So that idcache_wbinv works; */
254         if ((cpufunc_id() & 0x0000f000) == 0x00009000)
255                 arm9_setup();
256 #endif
257 #endif
258         __start();
259 }
260
261 static void
262 get_cachetype_cp15()
263 {
264         u_int ctype, isize, dsize, cpuid;
265         u_int clevel, csize, i, sel;
266         u_int multiplier;
267         u_char type;
268
269         __asm __volatile("mrc p15, 0, %0, c0, c0, 1"
270                 : "=r" (ctype));
271
272         cpuid = cpufunc_id();
273         /*
274          * ...and thus spake the ARM ARM:
275          *
276          * If an <opcode2> value corresponding to an unimplemented or
277          * reserved ID register is encountered, the System Control
278          * processor returns the value of the main ID register.
279          */
280         if (ctype == cpuid)
281                 goto out;
282
283         if (CPU_CT_FORMAT(ctype) == CPU_CT_ARMV7) {
284                 __asm __volatile("mrc p15, 1, %0, c0, c0, 1"
285                     : "=r" (clevel));
286                 arm_cache_level = clevel;
287                 arm_cache_loc = CPU_CLIDR_LOC(arm_cache_level) + 1;
288                 i = 0;
289                 while ((type = (clevel & 0x7)) && i < 7) {
290                         if (type == CACHE_DCACHE || type == CACHE_UNI_CACHE ||
291                             type == CACHE_SEP_CACHE) {
292                                 sel = i << 1;
293                                 __asm __volatile("mcr p15, 2, %0, c0, c0, 0"
294                                     : : "r" (sel));
295                                 __asm __volatile("mrc p15, 1, %0, c0, c0, 0"
296                                     : "=r" (csize));
297                                 arm_cache_type[sel] = csize;
298                         }
299                         if (type == CACHE_ICACHE || type == CACHE_SEP_CACHE) {
300                                 sel = (i << 1) | 1;
301                                 __asm __volatile("mcr p15, 2, %0, c0, c0, 0"
302                                     : : "r" (sel));
303                                 __asm __volatile("mrc p15, 1, %0, c0, c0, 0"
304                                     : "=r" (csize));
305                                 arm_cache_type[sel] = csize;
306                         }
307                         i++;
308                         clevel >>= 3;
309                 }
310         } else {
311                 if ((ctype & CPU_CT_S) == 0)
312                         arm_pcache_unified = 1;
313
314                 /*
315                  * If you want to know how this code works, go read the ARM ARM.
316                  */
317
318                 arm_pcache_type = CPU_CT_CTYPE(ctype);
319
320                 if (arm_pcache_unified == 0) {
321                         isize = CPU_CT_ISIZE(ctype);
322                         multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
323                         arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
324                         if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
325                                 if (isize & CPU_CT_xSIZE_M)
326                                         arm_picache_line_size = 0; /* not present */
327                                 else
328                                         arm_picache_ways = 1;
329                         } else {
330                                 arm_picache_ways = multiplier <<
331                                     (CPU_CT_xSIZE_ASSOC(isize) - 1);
332                         }
333                         arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
334                 }
335
336                 dsize = CPU_CT_DSIZE(ctype);
337                 multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
338                 arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
339                 if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
340                         if (dsize & CPU_CT_xSIZE_M)
341                                 arm_pdcache_line_size = 0; /* not present */
342                         else
343                                 arm_pdcache_ways = 1;
344                 } else {
345                         arm_pdcache_ways = multiplier <<
346                             (CPU_CT_xSIZE_ASSOC(dsize) - 1);
347                 }
348                 arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
349
350                 arm_dcache_align = arm_pdcache_line_size;
351
352                 arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
353                 arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
354                 arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
355                     CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
356
357         out:
358                 arm_dcache_align_mask = arm_dcache_align - 1;
359         }
360 }
361
362 static void
363 arm9_setup(void)
364 {
365         
366         get_cachetype_cp15();
367         arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
368         arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
369             arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
370         arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
371         arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
372 }
373
374 static void
375 armadaxp_idcache_wbinv_all(void)
376 {
377         uint32_t feat;
378
379         __asm __volatile("mrc p15, 0, %0, c0, c1, 0" : "=r" (feat));
380         if (feat & ARM_PFR0_THUMBEE_MASK)
381                 armv7_idcache_wbinv_all();
382         else
383                 armv6_idcache_wbinv_all();
384
385 }
386 #ifdef KZIP
387 static  unsigned char *orig_input, *i_input, *i_output;
388
389
390 static u_int memcnt;            /* Memory allocated: blocks */
391 static size_t memtot;           /* Memory allocated: bytes */
392 /*
393  * Library functions required by inflate().
394  */
395
396 #define MEMSIZ 0x8000
397
398 /*
399  * Allocate memory block.
400  */
401 unsigned char *
402 kzipmalloc(int size)
403 {
404         void *ptr;
405         static u_char mem[MEMSIZ];
406
407         if (memtot + size > MEMSIZ)
408                 return NULL;
409         ptr = mem + memtot;
410         memtot += size;
411         memcnt++;
412         return ptr;
413 }
414
415 /*
416  * Free allocated memory block.
417  */
418 void
419 kzipfree(void *ptr)
420 {
421         memcnt--;
422         if (!memcnt)
423                 memtot = 0;
424 }
425
426 void
427 putstr(char *dummy)
428 {
429 }
430
431 static int
432 input(void *dummy)
433 {
434         if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
435                 return (GZ_EOF);
436         }
437         return *i_input++;
438 }
439
440 static int
441 output(void *dummy, unsigned char *ptr, unsigned long len)
442 {
443
444
445         memcpy(i_output, ptr, len);
446         i_output += len;
447         return (0);
448 }
449
450 static void *
451 inflate_kernel(void *kernel, void *startaddr)
452 {
453         struct inflate infl;
454         unsigned char slide[GZ_WSIZE];
455
456         orig_input = kernel;
457         memcnt = memtot = 0;
458         i_input = (unsigned char *)kernel + GZ_HEAD;
459         if (((char *)kernel)[3] & 0x18) {
460                 while (*i_input)
461                         i_input++;
462                 i_input++;
463         }
464         i_output = startaddr;
465         bzero(&infl, sizeof(infl));
466         infl.gz_input = input;
467         infl.gz_output = output;
468         infl.gz_slide = slide;
469         inflate(&infl);
470         return ((char *)(((vm_offset_t)i_output & ~3) + 4));
471 }
472
473 #endif
474
475 void *
476 load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
477     int d)
478 {
479         Elf32_Ehdr *eh;
480         Elf32_Phdr phdr[64] /* XXX */, *php;
481         Elf32_Shdr shdr[64] /* XXX */;
482         int i,j;
483         void *entry_point;
484         int symtabindex = -1;
485         int symstrindex = -1;
486         vm_offset_t lastaddr = 0;
487         Elf_Addr ssym = 0;
488         Elf_Dyn *dp;
489         
490         eh = (Elf32_Ehdr *)kstart;
491         ssym = 0;
492         entry_point = (void*)eh->e_entry;
493         memcpy(phdr, (void *)(kstart + eh->e_phoff ),
494             eh->e_phnum * sizeof(phdr[0]));
495
496         /* Determine lastaddr. */
497         for (i = 0; i < eh->e_phnum; i++) {
498                 if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
499                     + phdr[i].p_memsz))
500                         lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
501                             curaddr + phdr[i].p_memsz;
502         }
503         
504         /* Save the symbol tables, as there're about to be scratched. */
505         memcpy(shdr, (void *)(kstart + eh->e_shoff),
506             sizeof(*shdr) * eh->e_shnum);
507         if (eh->e_shnum * eh->e_shentsize != 0 &&
508             eh->e_shoff != 0) {
509                 for (i = 0; i < eh->e_shnum; i++) {
510                         if (shdr[i].sh_type == SHT_SYMTAB) {
511                                 for (j = 0; j < eh->e_phnum; j++) {
512                                         if (phdr[j].p_type == PT_LOAD &&
513                                             shdr[i].sh_offset >=
514                                             phdr[j].p_offset &&
515                                             (shdr[i].sh_offset +
516                                              shdr[i].sh_size <=
517                                              phdr[j].p_offset +
518                                              phdr[j].p_filesz)) {
519                                                 shdr[i].sh_offset = 0;
520                                                 shdr[i].sh_size = 0;
521                                                 j = eh->e_phnum;
522                                         }
523                                 }
524                                 if (shdr[i].sh_offset != 0 &&
525                                     shdr[i].sh_size != 0) {
526                                         symtabindex = i;
527                                         symstrindex = shdr[i].sh_link;
528                                 }
529                         }
530                 }
531                 func_end = roundup(func_end, sizeof(long));
532                 if (symtabindex >= 0 && symstrindex >= 0) {
533                         ssym = lastaddr;
534                         if (d) {
535                                 memcpy((void *)func_end, (void *)(
536                                     shdr[symtabindex].sh_offset + kstart),
537                                     shdr[symtabindex].sh_size);
538                                 memcpy((void *)(func_end +
539                                     shdr[symtabindex].sh_size),
540                                     (void *)(shdr[symstrindex].sh_offset +
541                                     kstart), shdr[symstrindex].sh_size);
542                         } else {
543                                 lastaddr += shdr[symtabindex].sh_size;
544                                 lastaddr = roundup(lastaddr,
545                                     sizeof(shdr[symtabindex].sh_size));
546                                 lastaddr += sizeof(shdr[symstrindex].sh_size);
547                                 lastaddr += shdr[symstrindex].sh_size;
548                                 lastaddr = roundup(lastaddr,
549                                     sizeof(shdr[symstrindex].sh_size));
550                         }
551                         
552                 }
553         }
554         if (!d)
555                 return ((void *)lastaddr);
556         
557         j = eh->e_phnum;
558         for (i = 0; i < j; i++) {
559                 volatile char c;
560
561                 if (phdr[i].p_type != PT_LOAD)
562                         continue;
563                 memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
564                     (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
565                 /* Clean space from oversized segments, eg: bss. */
566                 if (phdr[i].p_filesz < phdr[i].p_memsz)
567                         bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
568                             curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
569                             phdr[i].p_filesz);
570         }
571         /* Now grab the symbol tables. */
572         if (symtabindex >= 0 && symstrindex >= 0) {
573                 *(Elf_Size *)lastaddr =
574                     shdr[symtabindex].sh_size;
575                 lastaddr += sizeof(shdr[symtabindex].sh_size);
576                 memcpy((void*)lastaddr,
577                     (void *)func_end,
578                     shdr[symtabindex].sh_size);
579                 lastaddr += shdr[symtabindex].sh_size;
580                 lastaddr = roundup(lastaddr,
581                     sizeof(shdr[symtabindex].sh_size));
582                 *(Elf_Size *)lastaddr =
583                     shdr[symstrindex].sh_size;
584                 lastaddr += sizeof(shdr[symstrindex].sh_size);
585                 memcpy((void*)lastaddr,
586                     (void*)(func_end +
587                             shdr[symtabindex].sh_size),
588                     shdr[symstrindex].sh_size);
589                 lastaddr += shdr[symstrindex].sh_size;
590                 lastaddr = roundup(lastaddr,
591                     sizeof(shdr[symstrindex].sh_size));
592                 *(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
593                 *((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
594                 *((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
595         } else
596                 *(Elf_Addr *)curaddr = 0;
597         /* Invalidate the instruction cache. */
598         __asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
599                          "mcr p15, 0, %0, c7, c10, 4\n"
600                          : : "r" (curaddr));
601         __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
602             "bic %0, %0, #1\n" /* MMU_ENABLE */
603             "mcr p15, 0, %0, c1, c0, 0\n"
604             : "=r" (ssym));
605         /* Jump to the entry point. */
606         ((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
607         __asm __volatile(".globl func_end\n"
608             "func_end:");
609         
610         /* NOTREACHED */
611         return NULL;
612 }
613
614 extern char func_end[];
615
616
617 #define PMAP_DOMAIN_KERNEL      0 /*
618                                     * Just define it instead of including the
619                                     * whole VM headers set.
620                                     */
621 int __hack;
622 static __inline void
623 setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
624     int write_back)
625 {
626         unsigned int *pd = (unsigned int *)pt_addr;
627         vm_paddr_t addr;
628         int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
629         int tmp;
630
631         bzero(pd, L1_TABLE_SIZE);
632         for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
633                 pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
634                     L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
635                 if (write_back && 0)
636                         pd[addr >> L1_S_SHIFT] |= L1_S_B;
637         }
638         /* XXX: See below */
639         if (0xfff00000 < physstart || 0xfff00000 > physend)
640                 pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
641                     L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
642         __asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
643                          "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
644                          "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
645                          "mrc p15, 0, %0, c1, c0, 0\n"
646                          "orr %0, %0, #1\n" /* MMU_ENABLE */
647                          "mcr p15, 0, %0, c1, c0, 0\n"
648                          "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
649                          "mov r0, r0\n"
650                          "sub pc, pc, #4\n" :
651                          "=r" (tmp) : "r" (pd), "r" (domain));
652         
653         /*
654          * XXX: This is the most stupid workaround I've ever wrote.
655          * For some reason, the KB9202 won't boot the kernel unless
656          * we access an address which is not in the
657          * 0x20000000 - 0x20ffffff range. I hope I'll understand
658          * what's going on later.
659          */
660         __hack = *(volatile int *)0xfffff21c;
661 }
662
663 void
664 __start(void)
665 {
666         void *curaddr;
667         void *dst, *altdst;
668         char *kernel = (char *)&kernel_start;
669         int sp;
670         int pt_addr;
671
672         __asm __volatile("mov %0, pc"  :
673             "=r" (curaddr));
674         curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
675 #ifdef KZIP
676         if (*kernel == 0x1f && kernel[1] == 0x8b) {
677                 pt_addr = (((int)&_end + KERNSIZE + 0x100) &
678                     ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
679                 
680 #ifdef CPU_ARM9
681                 /* So that idcache_wbinv works; */
682                 if ((cpufunc_id() & 0x0000f000) == 0x00009000)
683                         arm9_setup();
684 #endif
685                 setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
686                     (vm_paddr_t)curaddr + 0x10000000, 1);
687                 /* Gzipped kernel */
688                 dst = inflate_kernel(kernel, &_end);
689                 kernel = (char *)&_end;
690                 altdst = 4 + load_kernel((unsigned int)kernel,
691                     (unsigned int)curaddr,
692                     (unsigned int)&func_end + 800 , 0);
693                 if (altdst > dst)
694                         dst = altdst;
695
696                 /*
697                  * Disable MMU.  Otherwise, setup_pagetables call below
698                  * might overwrite the L1 table we are currently using.
699                  */
700                 cpu_idcache_wbinv_all();
701                 cpu_l2cache_wbinv_all();
702                 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
703                   "bic %0, %0, #1\n" /* MMU_DISABLE */
704                   "mcr p15, 0, %0, c1, c0, 0\n"
705                   :"=r" (pt_addr));
706         } else
707 #endif
708                 dst = 4 + load_kernel((unsigned int)&kernel_start,
709             (unsigned int)curaddr,
710             (unsigned int)&func_end, 0);
711         dst = (void *)(((vm_offset_t)dst & ~3));
712         pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
713         setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
714             (vm_paddr_t)curaddr + 0x10000000, 0);       
715         sp = pt_addr + L1_TABLE_SIZE + 8192;
716         sp = sp &~3;
717         dst = (void *)(sp + 4);
718         memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
719             (unsigned int)&load_kernel + 800);
720         do_call(dst, kernel, dst + (unsigned int)(&func_end) -
721             (unsigned int)(&load_kernel) + 800, sp);
722 }
723
724 #ifdef __ARM_EABI__
725 /* We need to provide these functions but never call them */
726 void __aeabi_unwind_cpp_pr0(void);
727 void __aeabi_unwind_cpp_pr1(void);
728 void __aeabi_unwind_cpp_pr2(void);
729
730 __strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr1);
731 __strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr2);
732 void
733 __aeabi_unwind_cpp_pr0(void)
734 {
735 }
736 #endif
737