]> CyberLeo.Net >> Repos - FreeBSD/releng/10.1.git/blob - sys/arm/arm/elf_trampoline.c
Copy stable/10@r272459 to releng/10.1 as part of
[FreeBSD/releng/10.1.git] / sys / arm / arm / elf_trampoline.c
1 /*-
2  * Copyright (c) 2005 Olivier Houchard.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  */
24
25 /*
26  * Since we are compiled outside of the normal kernel build process, we
27  * need to include opt_global.h manually.
28  */
29 #include "opt_global.h"
30 #include "opt_kernname.h"
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 #include <machine/asm.h>
35 #include <sys/param.h>
36 #include <sys/elf32.h>
37 #include <sys/inflate.h>
38 #include <machine/elf.h>
39 #include <machine/pte.h>
40 #include <machine/cpufunc.h>
41 #include <machine/armreg.h>
42
43 extern char kernel_start[];
44 extern char kernel_end[];
45
46 extern void *_end;
47
48 void _start(void);
49 void __start(void);
50 void __startC(void);
51
52 extern unsigned int cpufunc_id(void);
53 extern void armv6_idcache_wbinv_all(void);
54 extern void armv7_idcache_wbinv_all(void);
55 extern void do_call(void *, void *, void *, int);
56
57 #define GZ_HEAD 0xa
58
59 #if defined(CPU_ARM9)
60 #define cpu_idcache_wbinv_all   arm9_idcache_wbinv_all
61 extern void arm9_idcache_wbinv_all(void);
62 #elif defined(CPU_FA526) || defined(CPU_FA626TE)
63 #define cpu_idcache_wbinv_all   fa526_idcache_wbinv_all
64 extern void fa526_idcache_wbinv_all(void);
65 #elif defined(CPU_ARM9E)
66 #define cpu_idcache_wbinv_all   armv5_ec_idcache_wbinv_all
67 extern void armv5_ec_idcache_wbinv_all(void);
68 #elif defined(CPU_ARM10)
69 #define cpu_idcache_wbinv_all   arm10_idcache_wbinv_all
70 extern void arm10_idcache_wbinv_all(void);
71 #elif defined(CPU_ARM1136) || defined(CPU_ARM1176)
72 #define cpu_idcache_wbinv_all   armv6_idcache_wbinv_all
73 #elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
74   defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) ||   \
75   defined(CPU_XSCALE_80219)
76 #define cpu_idcache_wbinv_all   xscale_cache_purgeID
77 extern void xscale_cache_purgeID(void);
78 #elif defined(CPU_XSCALE_81342)
79 #define cpu_idcache_wbinv_all   xscalec3_cache_purgeID
80 extern void xscalec3_cache_purgeID(void);
81 #elif defined(CPU_MV_PJ4B)
82 #if !defined(SOC_MV_ARMADAXP)
83 #define cpu_idcache_wbinv_all   armv6_idcache_wbinv_all
84 extern void armv6_idcache_wbinv_all(void);
85 #else
86 #define cpu_idcache_wbinv_all() armadaxp_idcache_wbinv_all
87 #endif
88 #endif /* CPU_MV_PJ4B */
89 #ifdef CPU_XSCALE_81342
90 #define cpu_l2cache_wbinv_all   xscalec3_l2cache_purge
91 extern void xscalec3_l2cache_purge(void);
92 #elif defined(SOC_MV_KIRKWOOD) || defined(SOC_MV_DISCOVERY)
93 #define cpu_l2cache_wbinv_all   sheeva_l2cache_wbinv_all
94 extern void sheeva_l2cache_wbinv_all(void);
95 #elif defined(CPU_CORTEXA) || defined(CPU_KRAIT)
96 #define cpu_idcache_wbinv_all   armv7_idcache_wbinv_all
97 #define cpu_l2cache_wbinv_all()
98 #else
99 #define cpu_l2cache_wbinv_all() 
100 #endif
101
102 static void armadaxp_idcache_wbinv_all(void);
103
104 int     arm_picache_size;
105 int     arm_picache_line_size;
106 int     arm_picache_ways;
107
108 int     arm_pdcache_size;       /* and unified */
109 int     arm_pdcache_line_size = 32;
110 int     arm_pdcache_ways;
111
112 int     arm_pcache_type;
113 int     arm_pcache_unified;
114
115 int     arm_dcache_align;
116 int     arm_dcache_align_mask;
117
118 u_int   arm_cache_level;
119 u_int   arm_cache_type[14];
120 u_int   arm_cache_loc;
121
122 /* Additional cache information local to this file.  Log2 of some of the
123       above numbers.  */
124 static int      arm_dcache_l2_nsets;
125 static int      arm_dcache_l2_assoc;
126 static int      arm_dcache_l2_linesize;
127
128
129 int block_userspace_access = 0;
130 extern int arm9_dcache_sets_inc;
131 extern int arm9_dcache_sets_max;
132 extern int arm9_dcache_index_max;
133 extern int arm9_dcache_index_inc;
134
135 static __inline void *
136 memcpy(void *dst, const void *src, int len)
137 {
138         const char *s = src;
139         char *d = dst;
140
141         while (len) {
142                 if (0 && len >= 4 && !((vm_offset_t)d & 3) &&
143                     !((vm_offset_t)s & 3)) {
144                         *(uint32_t *)d = *(uint32_t *)s;
145                         s += 4;
146                         d += 4;
147                         len -= 4;
148                 } else {
149                         *d++ = *s++;
150                         len--;
151                 }
152         }
153         return (dst);
154 }
155
156 static __inline void
157 bzero(void *addr, int count)
158 {
159         char *tmp = (char *)addr;
160
161         while (count > 0) {
162                 if (count >= 4 && !((vm_offset_t)tmp & 3)) {
163                         *(uint32_t *)tmp = 0;
164                         tmp += 4;
165                         count -= 4;
166                 } else {
167                         *tmp = 0;
168                         tmp++;
169                         count--;
170                 }
171         }
172 }
173
174 static void arm9_setup(void);
175
176 void
177 _startC(void)
178 {
179         int tmp1;
180         unsigned int sp = ((unsigned int)&_end & ~3) + 4;
181         unsigned int pc, kernphysaddr;
182
183         /*
184          * Figure out the physical address the kernel was loaded at.  This
185          * assumes the entry point (this code right here) is in the first page,
186          * which will always be the case for this trampoline code.
187          */
188         __asm __volatile("mov %0, pc\n"
189             : "=r" (pc));
190         kernphysaddr = pc & ~PAGE_MASK;
191
192 #if defined(FLASHADDR) && defined(PHYSADDR) && defined(LOADERRAMADDR)
193         if ((FLASHADDR > LOADERRAMADDR && pc >= FLASHADDR) ||
194             (FLASHADDR < LOADERRAMADDR && pc < LOADERRAMADDR)) {
195                 /*
196                  * We're running from flash, so just copy the whole thing
197                  * from flash to memory.
198                  * This is far from optimal, we could do the relocation or
199                  * the unzipping directly from flash to memory to avoid this
200                  * needless copy, but it would require to know the flash
201                  * physical address.
202                  */
203                 unsigned int target_addr;
204                 unsigned int tmp_sp;
205                 uint32_t src_addr = (uint32_t)&_start - PHYSADDR + FLASHADDR
206                     + (pc - FLASHADDR - ((uint32_t)&_startC - PHYSADDR)) & 0xfffff000;
207
208                 target_addr = (unsigned int)&_start - PHYSADDR + LOADERRAMADDR;
209                 tmp_sp = target_addr + 0x100000 +
210                     (unsigned int)&_end - (unsigned int)&_start;
211                 memcpy((char *)target_addr, (char *)src_addr,
212                     (unsigned int)&_end - (unsigned int)&_start);
213                 /* Temporary set the sp and jump to the new location. */
214                 __asm __volatile(
215                     "mov sp, %1\n"
216                     "mov pc, %0\n"
217                     : : "r" (target_addr), "r" (tmp_sp));
218                 
219         }
220 #endif
221 #ifdef KZIP
222         sp += KERNSIZE + 0x100;
223         sp &= ~(L1_TABLE_SIZE - 1);
224         sp += 2 * L1_TABLE_SIZE;
225 #endif
226         sp += 1024 * 1024; /* Should be enough for a stack */
227         
228         __asm __volatile("adr %0, 2f\n"
229                          "bic %0, %0, #0xff000000\n"
230                          "and %1, %1, #0xff000000\n"
231                          "orr %0, %0, %1\n"
232                          "mrc p15, 0, %1, c1, c0, 0\n"
233                          "bic %1, %1, #1\n" /* Disable MMU */
234                          "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
235                                                      WBUF enable */
236                          "orr %1, %1, #0x1000\n" /* Add IC enable */
237                          "orr %1, %1, #(0x800)\n" /* BPRD enable */
238
239                          "mcr p15, 0, %1, c1, c0, 0\n"
240                          "nop\n"
241                          "nop\n"
242                          "nop\n"
243                          "mov pc, %0\n"
244                          "2: nop\n"
245                          "mov sp, %2\n"
246                          : "=r" (tmp1), "+r" (kernphysaddr), "+r" (sp));
247 #ifndef KZIP
248 #ifdef CPU_ARM9
249         /* So that idcache_wbinv works; */
250         if ((cpufunc_id() & 0x0000f000) == 0x00009000)
251                 arm9_setup();
252 #endif
253 #endif
254         __start();
255 }
256
257 static void
258 get_cachetype_cp15()
259 {
260         u_int ctype, isize, dsize, cpuid;
261         u_int clevel, csize, i, sel;
262         u_int multiplier;
263         u_char type;
264
265         __asm __volatile("mrc p15, 0, %0, c0, c0, 1"
266                 : "=r" (ctype));
267
268         cpuid = cpufunc_id();
269         /*
270          * ...and thus spake the ARM ARM:
271          *
272          * If an <opcode2> value corresponding to an unimplemented or
273          * reserved ID register is encountered, the System Control
274          * processor returns the value of the main ID register.
275          */
276         if (ctype == cpuid)
277                 goto out;
278
279         if (CPU_CT_FORMAT(ctype) == CPU_CT_ARMV7) {
280                 __asm __volatile("mrc p15, 1, %0, c0, c0, 1"
281                     : "=r" (clevel));
282                 arm_cache_level = clevel;
283                 arm_cache_loc = CPU_CLIDR_LOC(arm_cache_level) + 1;
284                 i = 0;
285                 while ((type = (clevel & 0x7)) && i < 7) {
286                         if (type == CACHE_DCACHE || type == CACHE_UNI_CACHE ||
287                             type == CACHE_SEP_CACHE) {
288                                 sel = i << 1;
289                                 __asm __volatile("mcr p15, 2, %0, c0, c0, 0"
290                                     : : "r" (sel));
291                                 __asm __volatile("mrc p15, 1, %0, c0, c0, 0"
292                                     : "=r" (csize));
293                                 arm_cache_type[sel] = csize;
294                         }
295                         if (type == CACHE_ICACHE || type == CACHE_SEP_CACHE) {
296                                 sel = (i << 1) | 1;
297                                 __asm __volatile("mcr p15, 2, %0, c0, c0, 0"
298                                     : : "r" (sel));
299                                 __asm __volatile("mrc p15, 1, %0, c0, c0, 0"
300                                     : "=r" (csize));
301                                 arm_cache_type[sel] = csize;
302                         }
303                         i++;
304                         clevel >>= 3;
305                 }
306         } else {
307                 if ((ctype & CPU_CT_S) == 0)
308                         arm_pcache_unified = 1;
309
310                 /*
311                  * If you want to know how this code works, go read the ARM ARM.
312                  */
313
314                 arm_pcache_type = CPU_CT_CTYPE(ctype);
315
316                 if (arm_pcache_unified == 0) {
317                         isize = CPU_CT_ISIZE(ctype);
318                         multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
319                         arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
320                         if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
321                                 if (isize & CPU_CT_xSIZE_M)
322                                         arm_picache_line_size = 0; /* not present */
323                                 else
324                                         arm_picache_ways = 1;
325                         } else {
326                                 arm_picache_ways = multiplier <<
327                                     (CPU_CT_xSIZE_ASSOC(isize) - 1);
328                         }
329                         arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
330                 }
331
332                 dsize = CPU_CT_DSIZE(ctype);
333                 multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
334                 arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
335                 if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
336                         if (dsize & CPU_CT_xSIZE_M)
337                                 arm_pdcache_line_size = 0; /* not present */
338                         else
339                                 arm_pdcache_ways = 1;
340                 } else {
341                         arm_pdcache_ways = multiplier <<
342                             (CPU_CT_xSIZE_ASSOC(dsize) - 1);
343                 }
344                 arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
345
346                 arm_dcache_align = arm_pdcache_line_size;
347
348                 arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
349                 arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
350                 arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
351                     CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
352
353         out:
354                 arm_dcache_align_mask = arm_dcache_align - 1;
355         }
356 }
357
358 static void
359 arm9_setup(void)
360 {
361         
362         get_cachetype_cp15();
363         arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
364         arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
365             arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
366         arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
367         arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
368 }
369
370 static void
371 armadaxp_idcache_wbinv_all(void)
372 {
373         uint32_t feat;
374
375         __asm __volatile("mrc p15, 0, %0, c0, c1, 0" : "=r" (feat));
376         if (feat & ARM_PFR0_THUMBEE_MASK)
377                 armv7_idcache_wbinv_all();
378         else
379                 armv6_idcache_wbinv_all();
380
381 }
382 #ifdef KZIP
383 static  unsigned char *orig_input, *i_input, *i_output;
384
385
386 static u_int memcnt;            /* Memory allocated: blocks */
387 static size_t memtot;           /* Memory allocated: bytes */
388 /*
389  * Library functions required by inflate().
390  */
391
392 #define MEMSIZ 0x8000
393
394 /*
395  * Allocate memory block.
396  */
397 unsigned char *
398 kzipmalloc(int size)
399 {
400         void *ptr;
401         static u_char mem[MEMSIZ];
402
403         if (memtot + size > MEMSIZ)
404                 return NULL;
405         ptr = mem + memtot;
406         memtot += size;
407         memcnt++;
408         return ptr;
409 }
410
411 /*
412  * Free allocated memory block.
413  */
414 void
415 kzipfree(void *ptr)
416 {
417         memcnt--;
418         if (!memcnt)
419                 memtot = 0;
420 }
421
422 void
423 putstr(char *dummy)
424 {
425 }
426
427 static int
428 input(void *dummy)
429 {
430         if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
431                 return (GZ_EOF);
432         }
433         return *i_input++;
434 }
435
436 static int
437 output(void *dummy, unsigned char *ptr, unsigned long len)
438 {
439
440
441         memcpy(i_output, ptr, len);
442         i_output += len;
443         return (0);
444 }
445
446 static void *
447 inflate_kernel(void *kernel, void *startaddr)
448 {
449         struct inflate infl;
450         unsigned char slide[GZ_WSIZE];
451
452         orig_input = kernel;
453         memcnt = memtot = 0;
454         i_input = (unsigned char *)kernel + GZ_HEAD;
455         if (((char *)kernel)[3] & 0x18) {
456                 while (*i_input)
457                         i_input++;
458                 i_input++;
459         }
460         i_output = startaddr;
461         bzero(&infl, sizeof(infl));
462         infl.gz_input = input;
463         infl.gz_output = output;
464         infl.gz_slide = slide;
465         inflate(&infl);
466         return ((char *)(((vm_offset_t)i_output & ~3) + 4));
467 }
468
469 #endif
470
471 void *
472 load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
473     int d)
474 {
475         Elf32_Ehdr *eh;
476         Elf32_Phdr phdr[64] /* XXX */, *php;
477         Elf32_Shdr shdr[64] /* XXX */;
478         int i,j;
479         void *entry_point;
480         int symtabindex = -1;
481         int symstrindex = -1;
482         vm_offset_t lastaddr = 0;
483         Elf_Addr ssym = 0;
484         Elf_Dyn *dp;
485         
486         eh = (Elf32_Ehdr *)kstart;
487         ssym = 0;
488         entry_point = (void*)eh->e_entry;
489         memcpy(phdr, (void *)(kstart + eh->e_phoff ),
490             eh->e_phnum * sizeof(phdr[0]));
491
492         /* Determine lastaddr. */
493         for (i = 0; i < eh->e_phnum; i++) {
494                 if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
495                     + phdr[i].p_memsz))
496                         lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
497                             curaddr + phdr[i].p_memsz;
498         }
499         
500         /* Save the symbol tables, as there're about to be scratched. */
501         memcpy(shdr, (void *)(kstart + eh->e_shoff),
502             sizeof(*shdr) * eh->e_shnum);
503         if (eh->e_shnum * eh->e_shentsize != 0 &&
504             eh->e_shoff != 0) {
505                 for (i = 0; i < eh->e_shnum; i++) {
506                         if (shdr[i].sh_type == SHT_SYMTAB) {
507                                 for (j = 0; j < eh->e_phnum; j++) {
508                                         if (phdr[j].p_type == PT_LOAD &&
509                                             shdr[i].sh_offset >=
510                                             phdr[j].p_offset &&
511                                             (shdr[i].sh_offset +
512                                              shdr[i].sh_size <=
513                                              phdr[j].p_offset +
514                                              phdr[j].p_filesz)) {
515                                                 shdr[i].sh_offset = 0;
516                                                 shdr[i].sh_size = 0;
517                                                 j = eh->e_phnum;
518                                         }
519                                 }
520                                 if (shdr[i].sh_offset != 0 &&
521                                     shdr[i].sh_size != 0) {
522                                         symtabindex = i;
523                                         symstrindex = shdr[i].sh_link;
524                                 }
525                         }
526                 }
527                 func_end = roundup(func_end, sizeof(long));
528                 if (symtabindex >= 0 && symstrindex >= 0) {
529                         ssym = lastaddr;
530                         if (d) {
531                                 memcpy((void *)func_end, (void *)(
532                                     shdr[symtabindex].sh_offset + kstart),
533                                     shdr[symtabindex].sh_size);
534                                 memcpy((void *)(func_end +
535                                     shdr[symtabindex].sh_size),
536                                     (void *)(shdr[symstrindex].sh_offset +
537                                     kstart), shdr[symstrindex].sh_size);
538                         } else {
539                                 lastaddr += shdr[symtabindex].sh_size;
540                                 lastaddr = roundup(lastaddr,
541                                     sizeof(shdr[symtabindex].sh_size));
542                                 lastaddr += sizeof(shdr[symstrindex].sh_size);
543                                 lastaddr += shdr[symstrindex].sh_size;
544                                 lastaddr = roundup(lastaddr,
545                                     sizeof(shdr[symstrindex].sh_size));
546                         }
547                         
548                 }
549         }
550         if (!d)
551                 return ((void *)lastaddr);
552         
553         j = eh->e_phnum;
554         for (i = 0; i < j; i++) {
555                 volatile char c;
556
557                 if (phdr[i].p_type != PT_LOAD)
558                         continue;
559                 memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
560                     (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
561                 /* Clean space from oversized segments, eg: bss. */
562                 if (phdr[i].p_filesz < phdr[i].p_memsz)
563                         bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
564                             curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
565                             phdr[i].p_filesz);
566         }
567         /* Now grab the symbol tables. */
568         if (symtabindex >= 0 && symstrindex >= 0) {
569                 *(Elf_Size *)lastaddr =
570                     shdr[symtabindex].sh_size;
571                 lastaddr += sizeof(shdr[symtabindex].sh_size);
572                 memcpy((void*)lastaddr,
573                     (void *)func_end,
574                     shdr[symtabindex].sh_size);
575                 lastaddr += shdr[symtabindex].sh_size;
576                 lastaddr = roundup(lastaddr,
577                     sizeof(shdr[symtabindex].sh_size));
578                 *(Elf_Size *)lastaddr =
579                     shdr[symstrindex].sh_size;
580                 lastaddr += sizeof(shdr[symstrindex].sh_size);
581                 memcpy((void*)lastaddr,
582                     (void*)(func_end +
583                             shdr[symtabindex].sh_size),
584                     shdr[symstrindex].sh_size);
585                 lastaddr += shdr[symstrindex].sh_size;
586                 lastaddr = roundup(lastaddr,
587                     sizeof(shdr[symstrindex].sh_size));
588                 *(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
589                 *((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
590                 *((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
591         } else
592                 *(Elf_Addr *)curaddr = 0;
593         /* Invalidate the instruction cache. */
594         __asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
595                          "mcr p15, 0, %0, c7, c10, 4\n"
596                          : : "r" (curaddr));
597         __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
598             "bic %0, %0, #1\n" /* MMU_ENABLE */
599             "mcr p15, 0, %0, c1, c0, 0\n"
600             : "=r" (ssym));
601         /* Jump to the entry point. */
602         ((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
603         __asm __volatile(".globl func_end\n"
604             "func_end:");
605         
606         /* NOTREACHED */
607         return NULL;
608 }
609
610 extern char func_end[];
611
612
613 #define PMAP_DOMAIN_KERNEL      0 /*
614                                     * Just define it instead of including the
615                                     * whole VM headers set.
616                                     */
617 int __hack;
618 static __inline void
619 setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
620     int write_back)
621 {
622         unsigned int *pd = (unsigned int *)pt_addr;
623         vm_paddr_t addr;
624         int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
625         int tmp;
626
627         bzero(pd, L1_TABLE_SIZE);
628         for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
629                 pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
630                     L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
631                 if (write_back && 0)
632                         pd[addr >> L1_S_SHIFT] |= L1_S_B;
633         }
634         /* XXX: See below */
635         if (0xfff00000 < physstart || 0xfff00000 > physend)
636                 pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
637                     L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
638         __asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
639                          "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
640                          "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
641                          "mrc p15, 0, %0, c1, c0, 0\n"
642                          "orr %0, %0, #1\n" /* MMU_ENABLE */
643                          "mcr p15, 0, %0, c1, c0, 0\n"
644                          "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
645                          "mov r0, r0\n"
646                          "sub pc, pc, #4\n" :
647                          "=r" (tmp) : "r" (pd), "r" (domain));
648         
649         /*
650          * XXX: This is the most stupid workaround I've ever wrote.
651          * For some reason, the KB9202 won't boot the kernel unless
652          * we access an address which is not in the
653          * 0x20000000 - 0x20ffffff range. I hope I'll understand
654          * what's going on later.
655          */
656         __hack = *(volatile int *)0xfffff21c;
657 }
658
659 void
660 __start(void)
661 {
662         void *curaddr;
663         void *dst, *altdst;
664         char *kernel = (char *)&kernel_start;
665         int sp;
666         int pt_addr;
667
668         __asm __volatile("mov %0, pc"  :
669             "=r" (curaddr));
670         curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
671 #ifdef KZIP
672         if (*kernel == 0x1f && kernel[1] == 0x8b) {
673                 pt_addr = (((int)&_end + KERNSIZE + 0x100) &
674                     ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
675                 
676 #ifdef CPU_ARM9
677                 /* So that idcache_wbinv works; */
678                 if ((cpufunc_id() & 0x0000f000) == 0x00009000)
679                         arm9_setup();
680 #endif
681                 setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
682                     (vm_paddr_t)curaddr + 0x10000000, 1);
683                 /* Gzipped kernel */
684                 dst = inflate_kernel(kernel, &_end);
685                 kernel = (char *)&_end;
686                 altdst = 4 + load_kernel((unsigned int)kernel,
687                     (unsigned int)curaddr,
688                     (unsigned int)&func_end + 800 , 0);
689                 if (altdst > dst)
690                         dst = altdst;
691
692                 /*
693                  * Disable MMU.  Otherwise, setup_pagetables call below
694                  * might overwrite the L1 table we are currently using.
695                  */
696                 cpu_idcache_wbinv_all();
697                 cpu_l2cache_wbinv_all();
698                 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
699                   "bic %0, %0, #1\n" /* MMU_DISABLE */
700                   "mcr p15, 0, %0, c1, c0, 0\n"
701                   :"=r" (pt_addr));
702         } else
703 #endif
704                 dst = 4 + load_kernel((unsigned int)&kernel_start,
705             (unsigned int)curaddr,
706             (unsigned int)&func_end, 0);
707         dst = (void *)(((vm_offset_t)dst & ~3));
708         pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
709         setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
710             (vm_paddr_t)curaddr + 0x10000000, 0);       
711         sp = pt_addr + L1_TABLE_SIZE + 8192;
712         sp = sp &~3;
713         dst = (void *)(sp + 4);
714         memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
715             (unsigned int)&load_kernel + 800);
716         do_call(dst, kernel, dst + (unsigned int)(&func_end) -
717             (unsigned int)(&load_kernel) + 800, sp);
718 }
719
720 #ifdef __ARM_EABI__
721 /* We need to provide these functions but never call them */
722 void __aeabi_unwind_cpp_pr0(void);
723 void __aeabi_unwind_cpp_pr1(void);
724 void __aeabi_unwind_cpp_pr2(void);
725
726 __strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr1);
727 __strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr2);
728 void
729 __aeabi_unwind_cpp_pr0(void)
730 {
731 }
732 #endif
733