2 * Copyright (c) 2000,2001 Doug Rabson
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include "opt_compat.h"
32 #include "opt_msgbuf.h"
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/eventhandler.h>
37 #include <sys/sysproto.h>
38 #include <sys/signalvar.h>
39 #include <sys/kernel.h>
43 #include <sys/malloc.h>
44 #include <sys/reboot.h>
48 #include <sys/vmmeter.h>
49 #include <sys/msgbuf.h>
51 #include <sys/sysctl.h>
53 #include <sys/linker.h>
54 #include <sys/random.h>
56 #include <net/netisr.h>
58 #include <vm/vm_kern.h>
59 #include <vm/vm_page.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_extern.h>
62 #include <vm/vm_object.h>
63 #include <vm/vm_pager.h>
65 #include <sys/ptrace.h>
66 #include <machine/clock.h>
67 #include <machine/md_var.h>
68 #include <machine/reg.h>
69 #include <machine/fpu.h>
70 #include <machine/pal.h>
71 #include <machine/sal.h>
72 #include <machine/bootinfo.h>
73 #include <machine/mutex.h>
74 #include <machine/vmparam.h>
75 #include <machine/elf.h>
77 #include <sys/vnode.h>
78 #include <machine/sigframe.h>
79 #include <machine/efi.h>
80 #include <machine/inst.h>
81 #include <machine/rse.h>
82 #include <machine/unwind.h>
85 extern void ia64_ski_init(void);
88 u_int64_t processor_frequency;
89 u_int64_t bus_frequency;
90 u_int64_t itc_frequency;
92 struct bootinfo bootinfo;
94 struct mtx sched_lock;
98 struct user *proc0uarea;
99 vm_offset_t proc0kstack;
101 extern u_int64_t kernel_text[], _end[];
102 extern u_int64_t _ia64_unwind_start[];
103 extern u_int64_t _ia64_unwind_end[];
105 u_int64_t ia64_port_base;
107 char machine[] = "ia64";
108 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
110 static char cpu_model[128];
111 SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "");
114 /* start and end of kernel symbol table */
115 void *ksym_start, *ksym_end;
118 int ia64_unaligned_print = 1; /* warn about unaligned accesses */
119 int ia64_unaligned_fix = 1; /* fix up unaligned accesses */
120 int ia64_unaligned_sigbus = 0; /* don't SIGBUS on fixed-up accesses */
122 SYSCTL_INT(_machdep, CPU_UNALIGNED_PRINT, unaligned_print,
123 CTLFLAG_RW, &ia64_unaligned_print, 0, "");
125 SYSCTL_INT(_machdep, CPU_UNALIGNED_FIX, unaligned_fix,
126 CTLFLAG_RW, &ia64_unaligned_fix, 0, "");
128 SYSCTL_INT(_machdep, CPU_UNALIGNED_SIGBUS, unaligned_sigbus,
129 CTLFLAG_RW, &ia64_unaligned_sigbus, 0, "");
131 static void cpu_startup __P((void *));
132 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
134 struct msgbuf *msgbufp=0;
136 int bootverbose = 0, Maxmem = 0;
139 int totalphysmem; /* total amount of physical memory in system */
140 int physmem; /* physical memory used by NetBSD + some rsvd */
141 int resvmem; /* amount of memory reserved for PROM */
142 int unusedmem; /* amount of memory for OS that we don't use */
143 int unknownmem; /* amount of memory with an unknown use */
144 int ncpus; /* number of cpus */
146 vm_offset_t phys_avail[10];
149 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
151 int error = sysctl_handle_int(oidp, 0, ia64_ptob(physmem), req);
155 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
156 0, 0, sysctl_hw_physmem, "I", "");
159 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
161 int error = sysctl_handle_int(oidp, 0,
162 ia64_ptob(physmem - cnt.v_wire_count), req);
166 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
167 0, 0, sysctl_hw_usermem, "I", "");
169 SYSCTL_INT(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, "");
171 /* must be 2 less so 0 0 can signal end of chunks */
172 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
174 static void identifycpu __P((void));
176 struct kva_md_info kmi;
184 * Good {morning,afternoon,evening,night}.
188 /* startrtclock(); */
192 printf("real memory = %ld (%ldK bytes)\n", ia64_ptob(Maxmem), ia64_ptob(Maxmem) / 1024);
195 * Display any holes after the first chunk of extended memory.
200 printf("Physical memory chunk(s):\n");
201 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
202 int size1 = phys_avail[indx + 1] - phys_avail[indx];
204 printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
205 phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE);
209 vm_ksubmap_init(&kmi);
213 * Calculate callout wheel size
215 for (callwheelsize = 1, callwheelbits = 0;
216 callwheelsize < ncallout;
217 callwheelsize <<= 1, ++callwheelbits)
219 callwheelmask = callwheelsize - 1;
222 * Allocate space for system data structures.
223 * The first available kernel virtual address is in "v".
224 * As pages of kernel virtual memory are allocated, "v" is incremented.
225 * As pages of memory are allocated and cleared,
226 * "firstaddr" is incremented.
227 * An index into the kernel page table corresponding to the
228 * virtual memory address maintained in "v" is kept in "mapaddr".
232 * Make two passes. The first pass calculates how much memory is
233 * needed and allocates it. The second pass assigns virtual
234 * addresses to the various data structures.
238 v = (caddr_t)firstaddr;
240 #define valloc(name, type, num) \
241 (name) = (type *)v; v = (caddr_t)((name)+(num))
242 #define valloclim(name, type, num, lim) \
243 (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
245 valloc(callout, struct callout, ncallout);
246 valloc(callwheel, struct callout_tailq, callwheelsize);
249 * Discount the physical memory larger than the size of kernel_map
250 * to avoid eating up all of KVA space.
252 if (kernel_map->first_free == NULL) {
253 printf("Warning: no free entries in kernel_map.\n");
254 physmem_est = physmem;
256 physmem_est = min(physmem, btoc(kernel_map->max_offset -
257 kernel_map->min_offset));
260 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
261 * For the first 64MB of ram nominally allocate sufficient buffers to
262 * cover 1/4 of our ram. Beyond the first 64MB allocate additional
263 * buffers to cover 1/20 of our ram over 64MB. When auto-sizing
264 * the buffer cache we limit the eventual kva reservation to
269 int factor = 4 * BKVASIZE / PAGE_SIZE;
272 if (physmem_est > 1024)
273 nbuf += min((physmem_est - 1024) / factor,
275 if (physmem_est > 16384)
276 nbuf += (physmem_est - 16384) * 2 / (factor * 5);
277 if (maxbcache && nbuf > maxbcache / BKVASIZE)
278 nbuf = maxbcache / BKVASIZE;
280 nswbuf = max(min(nbuf/4, 64), 16);
282 valloc(swbuf, struct buf, nswbuf);
283 valloc(buf, struct buf, nbuf);
287 * End of first pass, size has been calculated so allocate memory
289 if (firstaddr == 0) {
290 size = (vm_size_t)(v - firstaddr);
291 firstaddr = (vm_offset_t)kmem_alloc(kernel_map, round_page(size));
293 panic("startup: no room for tables");
298 * End of second pass, addresses have been assigned
300 if ((vm_size_t)(v - firstaddr) != size)
301 panic("startup: table size inconsistency");
303 clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
304 (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
305 buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
307 buffer_map->system_map = 1;
308 pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
309 (nswbuf*MAXPHYS) + pager_map_size);
310 pager_map->system_map = 1;
311 exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
312 (16*(ARG_MAX+(PAGE_SIZE*3))));
315 * Finally, allocate mbuf pool.
316 * XXX: Mbuf system machine-specific initializations should
317 * go here, if anywhere.
321 * Initialize callouts
323 SLIST_INIT(&callfree);
324 for (i = 0; i < ncallout; i++) {
325 callout_init(&callout[i], 0);
326 callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
327 SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
330 for (i = 0; i < callwheelsize; i++) {
331 TAILQ_INIT(&callwheel[i]);
334 mtx_init(&callout_lock, "callout", MTX_SPIN | MTX_RECURSE);
337 #if defined(USERCONFIG)
338 #if defined(USERCONFIG_BOOT)
341 if (boothowto & RB_CONFIG)
345 cninit(); /* the preferred console may have changed */
349 printf("avail memory = %ld (%ldK bytes)\n", ptoa(cnt.v_free_count),
350 ptoa(cnt.v_free_count) / 1024);
353 * Set up buffers, so they can be used to read disk labels.
356 vm_pager_bufferinit();
364 int number, revision, model, family, archrev;
368 * Assumes little-endian.
370 *(u_int64_t *) &vendor[0] = ia64_get_cpuid(0);
371 *(u_int64_t *) &vendor[8] = ia64_get_cpuid(1);
374 t = ia64_get_cpuid(3);
375 number = (t >> 0) & 0xff;
376 revision = (t >> 8) & 0xff;
377 model = (t >> 16) & 0xff;
378 family = (t >> 24) & 0xff;
379 archrev = (t >> 32) & 0xff;
382 strcpy(cpu_model, "Itanium");
383 else if (family == 0x1f)
384 strcpy(cpu_model, "McKinley");
386 snprintf(cpu_model, sizeof(cpu_model), "Family=%d", family);
388 features = ia64_get_cpuid(4);
390 printf("CPU: %s", cpu_model);
391 if (processor_frequency)
392 printf(" (%ld.%02ld-Mhz)\n",
393 (processor_frequency + 4999) / 1000000,
394 ((processor_frequency + 4999) / 10000) % 100);
397 printf(" Origin = \"%s\" Model = %d Revision = %d\n",
398 vendor, model, revision);
399 printf(" Features = 0x%b\n", (u_int32_t) features,
405 add_kernel_unwind_tables(void *arg)
408 * Register the kernel's unwind table.
410 ia64_add_unwind_table(kernel_text,
414 SYSINIT(unwind, SI_SUB_KMEM, SI_ORDER_ANY, add_kernel_unwind_tables, 0);
419 EFI_MEMORY_DESCRIPTOR *md, *mdp;
425 if (!bootinfo.bi_systab)
428 mdcount = bootinfo.bi_memmap_size / bootinfo.bi_memdesc_size;
429 md = (EFI_MEMORY_DESCRIPTOR *) IA64_PHYS_TO_RR7(bootinfo.bi_memmap);
431 for (i = 0, mdp = md; i < mdcount; i++,
432 mdp = NextMemoryDescriptor(mdp, bootinfo.bi_memdesc_size)) {
433 if (mdp->Type == EfiPalCode)
438 printf("Can't find PAL Code\n");
443 * We use a TR to map the first 256M of memory - this might
444 * cover the palcode too.
446 if ((mdp->PhysicalStart & ~((1 << 28) - 1)) == 0) {
447 printf("PAL Code is mapped by the kernel's TR\n");
451 addr = mdp->PhysicalStart & ~((1 << 28) - 1);
453 bzero(&pte, sizeof(pte));
455 pte.pte_ma = PTE_MA_WB;
458 pte.pte_pl = PTE_PL_KERN;
459 pte.pte_ar = PTE_AR_RWX;
460 pte.pte_ppn = addr >> 12;
462 __asm __volatile("mov %0=psr;;" : "=r" (psr));
463 __asm __volatile("rsm psr.ic|psr.i;; srlz.i;;");
464 __asm __volatile("mov cr.ifa=%0" :: "r"(IA64_PHYS_TO_RR7(addr)));
465 __asm __volatile("mov cr.itir=%0" :: "r"(28 << 2));
466 __asm __volatile("srlz.i;;");
467 __asm __volatile("itr.i itr[%0]=%1;;"
468 :: "r"(2), "r"(*(u_int64_t*)&pte));
469 __asm __volatile("srlz.i;;");
470 __asm __volatile("mov psr.l=%0;; srlz.i;;" :: "r" (psr));
474 calculate_frequencies(void)
476 struct ia64_sal_result sal;
477 struct ia64_pal_result pal;
479 sal = ia64_sal_entry(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0);
480 pal = ia64_call_pal_static(PAL_FREQ_RATIOS, 0, 0, 0);
482 if (sal.sal_status == 0 && pal.pal_status == 0) {
484 printf("Platform clock frequency %ld Hz\n",
486 printf("Processor ratio %ld/%ld, Bus ratio %ld/%ld, "
487 "ITC ratio %ld/%ld\n",
488 pal.pal_result[0] >> 32,
489 pal.pal_result[0] & ((1L << 32) - 1),
490 pal.pal_result[1] >> 32,
491 pal.pal_result[1] & ((1L << 32) - 1),
492 pal.pal_result[2] >> 32,
493 pal.pal_result[2] & ((1L << 32) - 1));
495 processor_frequency =
496 sal.sal_result[0] * (pal.pal_result[0] >> 32)
497 / (pal.pal_result[0] & ((1L << 32) - 1));
499 sal.sal_result[0] * (pal.pal_result[1] >> 32)
500 / (pal.pal_result[1] & ((1L << 32) - 1));
502 sal.sal_result[0] * (pal.pal_result[2] >> 32)
503 / (pal.pal_result[2] & ((1L << 32) - 1));
508 ia64_init(u_int64_t arg1, u_int64_t arg2)
511 vm_offset_t kernstart, kernend;
512 vm_offset_t kernstartpfn, kernendpfn, pfn0, pfn1;
514 EFI_MEMORY_DESCRIPTOR *md, *mdp;
517 /* NO OUTPUT ALLOWED UNTIL FURTHER NOTICE */
520 * TODO: Disable interrupts, floating point etc.
521 * Maybe flush cache and tlb
523 ia64_set_fpsr(IA64_FPSR_DEFAULT);
526 * TODO: Get critical system information (if possible, from the
527 * information provided by the boot program).
531 * Gross and disgusting hack. The bootinfo is written into
532 * memory at a fixed address.
534 bootinfo = *(struct bootinfo *) 0xe000000000508000;
535 if (bootinfo.bi_magic != BOOTINFO_MAGIC
536 || bootinfo.bi_version != 1) {
537 bzero(&bootinfo, sizeof(bootinfo));
538 bootinfo.bi_kernend = (vm_offset_t) round_page(_end);
542 * Look for the I/O ports first - we need them for console
545 mdcount = bootinfo.bi_memmap_size / bootinfo.bi_memdesc_size;
546 md = (EFI_MEMORY_DESCRIPTOR *) IA64_PHYS_TO_RR7(bootinfo.bi_memmap);
547 if (md == NULL || mdcount == 0) {
549 static EFI_MEMORY_DESCRIPTOR ski_md[2];
551 * XXX hack for ski. In reality, the loader will probably ask
552 * EFI and pass the results to us. Possibly, we will call EFI
555 ski_md[0].Type = EfiConventionalMemory;
556 ski_md[0].PhysicalStart = 2L*1024*1024;
557 ski_md[0].VirtualStart = 0;
558 ski_md[0].NumberOfPages = (64L*1024*1024)>>12;
559 ski_md[0].Attribute = EFI_MEMORY_WB;
561 ski_md[1].Type = EfiMemoryMappedIOPortSpace;
562 ski_md[1].PhysicalStart = 0xffffc000000;
563 ski_md[1].VirtualStart = 0;
564 ski_md[1].NumberOfPages = (64L*1024*1024)>>12;
565 ski_md[1].Attribute = EFI_MEMORY_UC;
572 for (i = 0, mdp = md; i < mdcount; i++,
573 mdp = NextMemoryDescriptor(mdp, bootinfo.bi_memdesc_size)) {
574 if (mdp->Type == EfiMemoryMappedIOPortSpace) {
575 ia64_port_base = IA64_PHYS_TO_RR6(mdp->PhysicalStart);
580 * Look at arguments passed to us and compute boothowto.
582 boothowto = bootinfo.bi_boothowto;
588 * Catch case of boot_verbose set in environment.
590 if ((p = getenv("boot_verbose")) != NULL) {
591 if (strcmp(p, "yes") == 0 || strcmp(p, "YES") == 0) {
592 boothowto |= RB_VERBOSE;
596 if (boothowto & RB_VERBOSE)
600 * Initialize the console before we print anything out.
604 /* OUTPUT NOW ALLOWED */
607 * Wire things up so we can call the firmware.
614 calculate_frequencies();
617 * Find the beginning and end of the kernel.
619 kernstart = trunc_page(kernel_text);
620 ksym_start = (void *)bootinfo.bi_symtab;
621 ksym_end = (void *)bootinfo.bi_esymtab;
622 kernend = (vm_offset_t)round_page(ksym_end);
623 /* But if the bootstrap tells us otherwise, believe it! */
624 if (bootinfo.bi_kernend)
625 kernend = round_page(bootinfo.bi_kernend);
626 preload_metadata = (caddr_t)bootinfo.bi_modulep;
628 kern_envp = static_env;
630 kern_envp = (caddr_t)bootinfo.bi_envp;
632 /* Init basic tunables, including hz */
635 p = getenv("kernelname");
637 strncpy(kernelname, p, sizeof(kernelname) - 1);
639 kernstartpfn = atop(IA64_RR_MASK(kernstart));
640 kernendpfn = atop(IA64_RR_MASK(kernend));
643 * Size the memory regions and load phys_avail[] with the results.
647 * Find out how much memory is available, by looking at
648 * the memory descriptors.
652 printf("Memory descriptor count: %d\n", mdcount);
656 for (i = 0, mdp = md; i < mdcount; i++,
657 mdp = NextMemoryDescriptor(mdp, bootinfo.bi_memdesc_size)) {
659 printf("MD %d: type %d pa 0x%lx cnt 0x%lx\n", i,
665 pfn0 = ia64_btop(round_page(mdp->PhysicalStart));
666 pfn1 = ia64_btop(trunc_page(mdp->PhysicalStart
667 + mdp->NumberOfPages * 4096));
671 if (mdp->Type != EfiConventionalMemory) {
672 resvmem += (pfn1 - pfn0);
676 totalphysmem += (pfn1 - pfn0);
679 * We have a memory descriptors available for system
680 * software use. We must determine if this cluster
683 physmem += (pfn1 - pfn0);
684 if (pfn0 <= kernendpfn && kernstartpfn <= pfn1) {
686 * Must compute the location of the kernel
687 * within the segment.
690 printf("Descriptor %d contains kernel\n", i);
692 if (pfn0 < kernstartpfn) {
694 * There is a chunk before the kernel.
697 printf("Loading chunk before kernel: "
698 "0x%lx / 0x%lx\n", pfn0, kernstartpfn);
700 phys_avail[phys_avail_cnt] = ia64_ptob(pfn0);
701 phys_avail[phys_avail_cnt+1] = ia64_ptob(kernstartpfn);
704 if (kernendpfn < pfn1) {
706 * There is a chunk after the kernel.
709 printf("Loading chunk after kernel: "
710 "0x%lx / 0x%lx\n", kernendpfn, pfn1);
712 phys_avail[phys_avail_cnt] = ia64_ptob(kernendpfn);
713 phys_avail[phys_avail_cnt+1] = ia64_ptob(pfn1);
718 * Just load this cluster as one chunk.
721 printf("Loading descriptor %d: 0x%lx / 0x%lx\n", i,
724 phys_avail[phys_avail_cnt] = ia64_ptob(pfn0);
725 phys_avail[phys_avail_cnt+1] = ia64_ptob(pfn1);
730 phys_avail[phys_avail_cnt] = 0;
735 * Initialize error message buffer (at end of core).
738 size_t sz = round_page(MSGBUF_SIZE);
739 int i = phys_avail_cnt - 2;
741 /* shrink so that it'll fit in the last segment */
742 if (phys_avail[i+1] - phys_avail[i] < sz)
743 sz = phys_avail[i+1] - phys_avail[i];
745 phys_avail[i+1] -= sz;
746 msgbufp = (struct msgbuf*) IA64_PHYS_TO_RR7(phys_avail[i+1]);
748 msgbufinit(msgbufp, sz);
750 /* Remove the last segment if it now has no pages. */
751 if (phys_avail[i] == phys_avail[i+1])
754 /* warn if the message buffer had to be shrunk */
755 if (sz != round_page(MSGBUF_SIZE))
756 printf("WARNING: %ld bytes not available for msgbuf in last cluster (%ld used)\n",
757 round_page(MSGBUF_SIZE), sz);
763 * Init mapping for u page(s) for proc 0
765 proc0uarea = (struct user *)pmap_steal_memory(UAREA_PAGES * PAGE_SIZE);
766 proc0kstack = (vm_offset_t)kstack;
767 proc0.p_uarea = proc0uarea;
768 thread0 = &proc0.p_thread;
769 thread0->td_kstack = proc0kstack;
770 thread0->td_pcb = (struct pcb *)
771 (thread0->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
773 * Setup the global data for the bootstrap cpu.
776 /* This is not a 'struct user' */
777 size_t sz = round_page(KSTACK_PAGES * PAGE_SIZE);
778 globalp = (struct globaldata *) pmap_steal_memory(sz);
779 globaldata_init(globalp, 0, sz);
780 ia64_set_k4((u_int64_t) globalp);
781 PCPU_GET(next_asn) = 1; /* 0 used for proc0 pmap */
785 * Initialize the virtual memory system.
790 * Initialize the rest of proc 0's PCB.
792 * Set the kernel sp, reserving space for an (empty) trapframe,
793 * and make proc0's trapframe pointer point to it for sanity.
794 * Initialise proc0's backing store to start after u area.
796 * XXX what is all this +/- 16 stuff?
798 thread0->td_frame = (struct trapframe *)thread0->td_pcb - 1;
799 thread0->td_pcb->pcb_sp = (u_int64_t)thread0->td_frame - 16;
800 thread0->td_pcb->pcb_bspstore = (u_int64_t)proc0kstack;
802 /* Setup curproc so that mutexes work */
803 PCPU_SET(curthread, thread0);
804 PCPU_SET(spinlocks, NULL);
806 LIST_INIT(&thread0->td_contested);
809 * Initialise mutexes.
811 mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE);
812 mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE);
813 mtx_init(&proc0.p_mtx, "process lock", MTX_DEF);
817 * Initialize debuggers, and break into them if appropriate.
821 if (boothowto & RB_KDB) {
822 printf("Boot flags requested debugger\n");
829 ia64_running_in_simulator()
831 return bootinfo.bi_systab == 0;
835 bzero(void *buf, size_t len)
839 while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) {
843 while (len >= sizeof(u_long) * 8) {
845 *((u_long*) p + 1) = 0;
846 *((u_long*) p + 2) = 0;
847 *((u_long*) p + 3) = 0;
848 len -= sizeof(u_long) * 8;
849 *((u_long*) p + 4) = 0;
850 *((u_long*) p + 5) = 0;
851 *((u_long*) p + 6) = 0;
852 *((u_long*) p + 7) = 0;
853 p += sizeof(u_long) * 8;
855 while (len >= sizeof(u_long)) {
857 len -= sizeof(u_long);
869 u_int64_t start, end, now;
872 * XXX This can't cope with rollovers.
874 start = ia64_get_itc();
875 end = start + (itc_frequency * n) / 1000000;
876 /* printf("DELAY from 0x%lx to 0x%lx\n", start, end); */
878 now = ia64_get_itc();
883 * Send an interrupt to process.
885 * Stack is set up to allow sigcode stored
886 * at top to call routine, followed by kcall
887 * to sigreturn routine below. After sigreturn
888 * resets the signal mask, the stack, and the
889 * frame pointer, it returns to the user
893 sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
897 struct trapframe *frame;
899 struct sigframe sf, *sfp;
901 int oonstack, rndfsize;
905 PROC_LOCK_ASSERT(p, MA_OWNED);
907 frame = td->td_frame;
908 oonstack = sigonstack(frame->tf_r[FRAME_SP]);
909 rndfsize = ((sizeof(sf) + 15) / 16) * 16;
912 * Make sure that we restore the entire trapframe after a
915 frame->tf_flags &= ~FRAME_SYSCALL;
917 /* save user context */
918 bzero(&sf, sizeof(struct sigframe));
919 sf.sf_uc.uc_sigmask = *mask;
920 sf.sf_uc.uc_stack = p->p_sigstk;
921 sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
922 ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
923 sf.sf_uc.uc_mcontext.mc_flags = IA64_MC_FLAG_ONSTACK;
924 sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
926 sf.sf_uc.uc_mcontext.mc_nat = 0; /* XXX */
927 sf.sf_uc.uc_mcontext.mc_sp = frame->tf_r[FRAME_SP];
928 sf.sf_uc.uc_mcontext.mc_ip = (frame->tf_cr_iip
929 | ((frame->tf_cr_ipsr >> 41) & 3));
930 sf.sf_uc.uc_mcontext.mc_cfm = frame->tf_cr_ifs & ~(1<<31);
931 sf.sf_uc.uc_mcontext.mc_um = frame->tf_cr_ipsr & 0x1fff;
932 sf.sf_uc.uc_mcontext.mc_ar_rsc = frame->tf_ar_rsc;
933 sf.sf_uc.uc_mcontext.mc_ar_bsp = frame->tf_ar_bspstore;
934 sf.sf_uc.uc_mcontext.mc_ar_rnat = frame->tf_ar_rnat;
935 sf.sf_uc.uc_mcontext.mc_ar_ccv = frame->tf_ar_ccv;
936 sf.sf_uc.uc_mcontext.mc_ar_unat = frame->tf_ar_unat;
937 sf.sf_uc.uc_mcontext.mc_ar_fpsr = frame->tf_ar_fpsr;
938 sf.sf_uc.uc_mcontext.mc_ar_pfs = frame->tf_ar_pfs;
939 sf.sf_uc.uc_mcontext.mc_pr = frame->tf_pr;
941 bcopy(&frame->tf_b[0],
942 &sf.sf_uc.uc_mcontext.mc_br[0],
943 8 * sizeof(unsigned long));
944 sf.sf_uc.uc_mcontext.mc_gr[0] = 0;
945 bcopy(&frame->tf_r[0],
946 &sf.sf_uc.uc_mcontext.mc_gr[1],
947 31 * sizeof(unsigned long));
952 * Allocate and validate space for the signal handler
953 * context. Note that if the stack is in P0 space, the
954 * call to grow() is a nop, and the useracc() check
955 * will fail if the process has not already allocated
956 * the space with a `brk'.
958 if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
959 SIGISMEMBER(psp->ps_sigonstack, sig)) {
960 sbs = (u_int64_t) p->p_sigstk.ss_sp;
961 sfp = (struct sigframe *)((caddr_t)p->p_sigstk.ss_sp +
962 p->p_sigstk.ss_size - rndfsize);
966 sbs = (sbs + 15) & ~15;
967 sfp = (struct sigframe *)((u_int64_t)sfp & ~15);
968 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
969 p->p_sigstk.ss_flags |= SS_ONSTACK;
972 sfp = (struct sigframe *)(frame->tf_r[FRAME_SP] - rndfsize);
975 (void)grow_stack(p, (u_long)sfp);
977 if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid)
978 printf("sendsig(%d): sig %d ssp %p usp %p\n", p->p_pid,
981 if (!useracc((caddr_t)sfp, sizeof(sf), VM_PROT_WRITE)) {
983 if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid)
984 printf("sendsig(%d): useracc failed on sig %d\n",
988 * Process has trashed its stack; give it an illegal
989 * instruction to halt it in its tracks.
992 SIGACTION(p, SIGILL) = SIG_DFL;
993 SIGDELSET(p->p_sigignore, SIGILL);
994 SIGDELSET(p->p_sigcatch, SIGILL);
995 SIGDELSET(p->p_sigmask, SIGILL);
1001 /* save the floating-point state, if necessary, then copy it. */
1002 ia64_fpstate_save(td, 1);
1003 sf.sf_uc.uc_mcontext.mc_ownedfp = td->td_md.md_flags & MDP_FPUSED;
1004 bcopy(&td->td_pcb->pcb_fp,
1005 (struct fpreg *)sf.sf_uc.uc_mcontext.mc_fpregs,
1006 sizeof(struct fpreg));
1007 sf.sf_uc.uc_mcontext.mc_fp_control = td->td_pcb.pcb_fp_control;
1011 * copy the frame out to userland.
1013 (void) copyout((caddr_t)&sf, (caddr_t)sfp, sizeof(sf));
1015 if (sigdebug & SDB_FOLLOW)
1016 printf("sendsig(%d): sig %d sfp %p code %lx\n", p->p_pid, sig,
1021 * Set up the registers to return to sigcode.
1023 frame->tf_cr_ipsr &= ~IA64_PSR_RI;
1024 frame->tf_cr_iip = PS_STRINGS - (esigcode - sigcode);
1025 frame->tf_r[FRAME_R1] = sig;
1027 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
1028 frame->tf_r[FRAME_R15] = (u_int64_t)&(sfp->sf_si);
1030 /* Fill in POSIX parts */
1031 sf.sf_si.si_signo = sig;
1032 sf.sf_si.si_code = code;
1033 sf.sf_si.si_addr = (void*)frame->tf_cr_ifa;
1036 frame->tf_r[FRAME_R15] = code;
1038 frame->tf_r[FRAME_SP] = (u_int64_t)sfp - 16;
1039 frame->tf_r[FRAME_R14] = sig;
1040 frame->tf_r[FRAME_R15] = (u_int64_t) &sfp->sf_si;
1041 frame->tf_r[FRAME_R16] = (u_int64_t) &sfp->sf_uc;
1042 frame->tf_r[FRAME_R17] = (u_int64_t)catcher;
1043 frame->tf_r[FRAME_R18] = sbs;
1046 if (sigdebug & SDB_FOLLOW)
1047 printf("sendsig(%d): pc %lx, catcher %lx\n", p->p_pid,
1048 frame->tf_cr_iip, frame->tf_regs[FRAME_R4]);
1049 if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid)
1050 printf("sendsig(%d): sig %d returns\n",
1056 * System call to cleanup state after a signal
1057 * has been taken. Reset signal mask and
1058 * stack state from context left by sendsig (above).
1059 * Return to previous pc and psl as specified by
1060 * context left by sendsig. Check carefully to
1061 * make sure that the user has not modified the
1062 * state to gain improper privileges.
1066 osigreturn(struct thread *td,
1067 struct osigreturn_args /* {
1068 struct osigcontext *sigcntxp;
1076 * System call to cleanup state after a signal
1077 * has been taken. Reset signal mask and
1078 * stack state from context left by sendsig (above).
1079 * Return to previous pc and psl as specified by
1080 * context left by sendsig. Check carefully to
1081 * make sure that the user has not modified the
1082 * state to gain improper privileges.
1086 sigreturn(struct thread *td,
1087 struct sigreturn_args /* {
1088 ucontext_t *sigcntxp;
1091 ucontext_t uc, *ucp;
1093 struct trapframe *frame = td->td_frame;
1094 struct __mcontext *mcp;
1097 ucp = uap->sigcntxp;
1102 if (sigdebug & SDB_FOLLOW)
1103 printf("sigreturn: pid %d, scp %p\n", p->p_pid, ucp);
1107 * Fetch the entire context structure at once for speed.
1108 * We don't use a normal argument to simplify RSE handling.
1110 if (copyin((caddr_t)frame->tf_r[FRAME_R4],
1111 (caddr_t)&uc, sizeof(ucontext_t)))
1114 if (frame->tf_ndirty != 0) {
1115 printf("sigreturn: dirty user stacked registers\n");
1119 * Restore the user-supplied information
1121 mcp = &uc.uc_mcontext;
1122 bcopy(&mcp->mc_br[0], &frame->tf_b[0], 8*sizeof(u_int64_t));
1123 bcopy(&mcp->mc_gr[1], &frame->tf_r[0], 31*sizeof(u_int64_t));
1126 frame->tf_flags &= ~FRAME_SYSCALL;
1127 frame->tf_cr_iip = mcp->mc_ip & ~15;
1128 frame->tf_cr_ipsr &= ~IA64_PSR_RI;
1129 switch (mcp->mc_ip & 15) {
1131 frame->tf_cr_ipsr |= IA64_PSR_RI_1;
1134 frame->tf_cr_ipsr |= IA64_PSR_RI_2;
1137 frame->tf_cr_ipsr = ((frame->tf_cr_ipsr & ~0x1fff)
1138 | (mcp->mc_um & 0x1fff));
1139 frame->tf_pr = mcp->mc_pr;
1140 frame->tf_ar_rsc = (mcp->mc_ar_rsc & 3) | 12; /* user, loadrs=0 */
1141 frame->tf_ar_pfs = mcp->mc_ar_pfs;
1142 frame->tf_cr_ifs = mcp->mc_cfm | (1UL<<63);
1143 frame->tf_ar_bspstore = mcp->mc_ar_bsp;
1144 frame->tf_ar_rnat = mcp->mc_ar_rnat;
1145 frame->tf_ndirty = 0; /* assumes flushrs in sigcode */
1146 frame->tf_ar_unat = mcp->mc_ar_unat;
1147 frame->tf_ar_ccv = mcp->mc_ar_ccv;
1148 frame->tf_ar_fpsr = mcp->mc_ar_fpsr;
1150 frame->tf_r[FRAME_SP] = mcp->mc_sp;
1153 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1154 if (uc.uc_mcontext.mc_onstack & 1)
1155 p->p_sigstk.ss_flags |= SS_ONSTACK;
1157 p->p_sigstk.ss_flags &= ~SS_ONSTACK;
1160 p->p_sigmask = uc.uc_sigmask;
1161 SIG_CANTMASK(p->p_sigmask);
1164 /* XXX ksc.sc_ownedfp ? */
1165 ia64_fpstate_drop(td);
1167 bcopy((struct fpreg *)uc.uc_mcontext.mc_fpregs,
1168 &td->td_pcb->pcb_fp, sizeof(struct fpreg));
1169 td->td_pcb->pcb_fp_control = uc.uc_mcontext.mc_fp_control;
1173 if (sigdebug & SDB_FOLLOW)
1174 printf("sigreturn(%d): returns\n", p->p_pid);
1176 return (EJUSTRETURN);
1180 * Machine dependent boot() routine
1186 ia64_efi_runtime->ResetSystem(EfiResetWarm, EFI_SUCCESS, 0, 0);
1190 * Shutdown the CPU as much as possible
1196 ia64_efi_runtime->ResetSystem(EfiResetWarm, EFI_SUCCESS, 0, 0);
1200 * Clear registers on exec
1203 setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings)
1205 struct trapframe *frame;
1207 frame = td->td_frame;
1210 * Make sure that we restore the entire trapframe after an
1213 frame->tf_flags &= ~FRAME_SYSCALL;
1215 bzero(frame->tf_r, sizeof(frame->tf_r));
1216 bzero(frame->tf_f, sizeof(frame->tf_f));
1217 frame->tf_cr_iip = entry;
1218 frame->tf_cr_ipsr = (IA64_PSR_IC
1225 | IA64_PSR_CPL_USER);
1227 * Make sure that sp is aligned to a 16 byte boundary and
1228 * reserve 16 bytes of scratch space for _start.
1230 frame->tf_r[FRAME_SP] = (stack & ~15) - 16;
1233 * Write values for out0, out1 and out2 to the user's backing
1234 * store and arrange for them to be restored into the user's
1235 * initial register frame. Assumes that (bspstore & 0x1f8) <
1238 frame->tf_ar_bspstore = td->td_md.md_bspstore + 24;
1239 suword((caddr_t) frame->tf_ar_bspstore - 24, stack);
1240 suword((caddr_t) frame->tf_ar_bspstore - 16, ps_strings);
1241 suword((caddr_t) frame->tf_ar_bspstore - 8, 0);
1242 frame->tf_ndirty = 0;
1243 frame->tf_cr_ifs = (1L<<63) | 3; /* sof=3, v=1 */
1245 frame->tf_ar_rsc = 0xf; /* user mode rsc */
1246 frame->tf_ar_fpsr = IA64_FPSR_DEFAULT;
1248 td->td_md.md_flags &= ~MDP_FPUSED;
1249 ia64_fpstate_drop(td);
1253 ptrace_set_pc(struct thread *td, unsigned long addr)
1255 /* TODO set pc in trapframe */
1260 ptrace_single_step(struct thread *td)
1262 /* TODO arrange for user process to single step */
1267 ia64_pa_access(vm_offset_t pa)
1269 return VM_PROT_READ|VM_PROT_WRITE;
1277 /* TODO copy trapframe to regs */
1286 /* TODO copy regs to trapframe */
1291 fill_dbregs(struct thread *td, struct dbreg *dbregs)
1298 set_dbregs(struct thread *td, struct dbreg *dbregs)
1305 fill_fpregs(td, fpregs)
1307 struct fpreg *fpregs;
1309 /* TODO copy fpu state to fpregs */
1310 ia64_fpstate_save(td, 0);
1313 bcopy(&td->td_pcb->pcb_fp, fpregs, sizeof *fpregs);
1319 set_fpregs(td, fpregs)
1321 struct fpreg *fpregs;
1323 /* TODO copy fpregs fpu state */
1324 ia64_fpstate_drop(td);
1327 bcopy(fpregs, &td->td_pcb->pcb_fp, sizeof *fpregs);
1334 Debugger(const char *msg)
1336 printf("Debugger(\"%s\") called.\n", msg);
1340 #include <sys/disklabel.h>
1343 * Determine the size of the transfer, and make sure it is
1344 * within the boundaries of the partition. Adjust transfer
1345 * if needed, and signal errors or early completion.
1348 bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel)
1351 struct partition *p = lp->d_partitions + dkpart(bp->bio_dev);
1352 int labelsect = lp->d_partitions[0].p_offset;
1353 int maxsz = p->p_size,
1354 sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1356 /* overwriting disk label ? */
1357 /* XXX should also protect bootstrap in first 8K */
1358 if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1359 #if LABELSECTOR != 0
1360 bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1362 (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
1363 bp->bio_error = EROFS;
1367 #if defined(DOSBBSECTOR) && defined(notyet)
1368 /* overwriting master boot record? */
1369 if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR &&
1370 (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
1371 bp->bio_error = EROFS;
1376 /* beyond partition? */
1377 if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) {
1378 /* if exactly at end of disk, return an EOF */
1379 if (bp->bio_blkno == maxsz) {
1380 bp->bio_resid = bp->bio_bcount;
1383 /* or truncate if part of it fits */
1384 sz = maxsz - bp->bio_blkno;
1386 bp->bio_error = EINVAL;
1389 bp->bio_bcount = sz << DEV_BSHIFT;
1392 bp->bio_pblkno = bp->bio_blkno + p->p_offset;
1397 bp->bio_flags |= BIO_ERROR;
1403 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
1406 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
1408 if (!error && req->newptr)
1413 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
1414 &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
1416 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
1417 CTLFLAG_RW, &disable_rtc_set, 0, "");
1419 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
1420 CTLFLAG_RW, &wall_cmos_clock, 0, "");
1423 ia64_fpstate_check(struct thread *td)
1425 if ((td->td_frame->tf_cr_ipsr & IA64_PSR_DFH) == 0)
1426 if (td != PCPU_GET(fpcurthread))
1427 panic("ia64_check_fpcurthread: bogus");
1431 * Save the high floating point state in the pcb. Use this to get
1432 * read-only access to the floating point state. If write is true, the
1433 * current fp process is cleared so that fp state can safely be
1434 * modified. The process will automatically reload the changed state
1435 * by generating a disabled fp trap.
1438 ia64_fpstate_save(struct thread *td, int write)
1440 if (td == PCPU_GET(fpcurthread)) {
1442 * Save the state in the pcb.
1444 savehighfp(td->td_pcb->pcb_highfp);
1447 td->td_frame->tf_cr_ipsr |= IA64_PSR_DFH;
1448 PCPU_SET(fpcurthread, NULL);
1454 * Relinquish ownership of the FP state. This is called instead of
1455 * ia64_save_fpstate() if the entire FP state is being changed
1456 * (e.g. on sigreturn).
1459 ia64_fpstate_drop(struct thread *td)
1461 if (td == PCPU_GET(fpcurthread)) {
1462 td->td_frame->tf_cr_ipsr |= IA64_PSR_DFH;
1463 PCPU_SET(fpcurthread, NULL);
1468 * Switch the current owner of the fp state to p, reloading the state
1472 ia64_fpstate_switch(struct thread *td)
1474 if (PCPU_GET(fpcurthread)) {
1476 * Dump the old fp state if its valid.
1478 savehighfp(PCPU_GET(fpcurthread)->td_pcb->pcb_highfp);
1479 PCPU_GET(fpcurthread)->td_frame->tf_cr_ipsr |= IA64_PSR_DFH;
1483 * Remember the new FP owner and reload its state.
1485 PCPU_SET(fpcurthread, td);
1486 restorehighfp(td->td_pcb->pcb_highfp);
1487 td->td_frame->tf_cr_ipsr &= ~IA64_PSR_DFH;
1489 td->td_md.md_flags |= MDP_FPUSED;
1493 * Initialise a struct globaldata.
1496 globaldata_init(struct globaldata *globaldata, int cpuid, size_t sz)
1498 bzero(globaldata, sz);
1499 globaldata->gd_cpuid = cpuid;
1500 globaldata_register(globaldata);
1504 * Utility functions for manipulating instruction bundles.
1507 ia64_unpack_bundle(u_int64_t low, u_int64_t high, struct ia64_bundle *bp)
1509 bp->template = low & 0x1f;
1510 bp->slot[0] = (low >> 5) & ((1L<<41) - 1);
1511 bp->slot[1] = (low >> 46) | ((high & ((1L<<23) - 1)) << 18);
1512 bp->slot[2] = (high >> 23);
1516 ia64_pack_bundle(u_int64_t *lowp, u_int64_t *highp,
1517 const struct ia64_bundle *bp)
1519 u_int64_t low, high;
1521 low = bp->template | (bp->slot[0] << 5) | (bp->slot[1] << 46);
1522 high = (bp->slot[1] >> 18) | (bp->slot[2] << 23);
1528 rse_slot(u_int64_t *bsp)
1530 return ((u_int64_t) bsp >> 3) & 0x3f;
1534 * Return the address of register regno (regno >= 32) given that bsp
1535 * points at the base of the register stack frame.
1538 ia64_rse_register_address(u_int64_t *bsp, int regno)
1540 int off = regno - 32;
1541 u_int64_t rnats = (rse_slot(bsp) + off) / 63;
1542 return bsp + off + rnats;
1546 * Calculate the base address of the previous frame given that the
1547 * current frame's locals area is 'size'.
1550 ia64_rse_previous_frame(u_int64_t *bsp, int size)
1552 int slot = rse_slot(bsp);
1556 while (count > slot) {
1561 return bsp - size - rnats;