2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
36 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
37 * from: src/sys/i386/i386/vm_machdep.c,v 1.132.2.2 2000/08/26 04:19:26 yokota
38 * JNPR: vm_machdep.c,v 1.8.2.2 2007/08/16 15:59:17 girish
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
44 #include "opt_cputype.h"
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/malloc.h>
51 #include <sys/syscall.h>
53 #include <sys/vnode.h>
54 #include <sys/vmmeter.h>
55 #include <sys/kernel.h>
56 #include <sys/sysctl.h>
57 #include <sys/unistd.h>
59 #include <machine/asm.h>
60 #include <machine/cache.h>
61 #include <machine/clock.h>
62 #include <machine/cpu.h>
63 #include <machine/md_var.h>
64 #include <machine/pcb.h>
67 #include <vm/vm_extern.h>
69 #include <vm/vm_kern.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_page.h>
72 #include <vm/vm_pageout.h>
73 #include <vm/vm_param.h>
75 #include <vm/uma_int.h>
79 #include <sys/sf_buf.h>
82 #define NSFBUFS (512 + maxusers * 16)
86 static void sf_buf_init(void *arg);
87 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
90 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
91 * sf_freelist head with the sf_lock mutex.
94 SLIST_HEAD(, sf_buf) sf_head;
98 static u_int sf_buf_alloc_want;
102 * Finish a fork operation, with process p2 nearly set up.
103 * Copy and update the pcb, set up the stack so that the child
104 * ready to run and return to user mode.
107 cpu_fork(register struct thread *td1,register struct proc *p2,
108 struct thread *td2,int flags)
110 register struct proc *p1;
114 if ((flags & RFPROC) == 0)
116 /* It is assumed that the vm_thread_alloc called
117 * cpu_thread_alloc() before cpu_fork is called.
120 /* Point the pcb to the top of the stack */
123 /* Copy p1's pcb, note that in this case
124 * our pcb also includes the td_frame being copied
125 * too. The older mips2 code did an additional copy
126 * of the td_frame, for us that's not needed any
127 * longer (this copy does them both)
129 bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
131 /* Point mdproc and then copy over td1's contents
132 * md_proc is empty for MIPS
134 td2->td_md.md_flags = td1->td_md.md_flags & MDTD_FPUSED;
137 * Set up return-value registers as fork() libc stub expects.
139 td2->td_frame->v0 = 0;
140 td2->td_frame->v1 = 1;
141 td2->td_frame->a3 = 0;
143 if (td1 == PCPU_GET(fpcurthread))
144 MipsSaveCurFPState(td1);
146 pcb2->pcb_context[PCB_REG_RA] = (register_t)(intptr_t)fork_trampoline;
147 /* Make sp 64-bit aligned */
148 pcb2->pcb_context[PCB_REG_SP] = (register_t)(((vm_offset_t)td2->td_pcb &
149 ~(sizeof(__int64_t) - 1)) - CALLFRAME_SIZ);
150 pcb2->pcb_context[PCB_REG_S0] = (register_t)(intptr_t)fork_return;
151 pcb2->pcb_context[PCB_REG_S1] = (register_t)(intptr_t)td2;
152 pcb2->pcb_context[PCB_REG_S2] = (register_t)(intptr_t)td2->td_frame;
153 pcb2->pcb_context[PCB_REG_SR] = mips_rd_status() &
154 (MIPS_SR_KX | MIPS_SR_UX | MIPS_SR_INT_MASK);
156 * FREEBSD_DEVELOPERS_FIXME:
157 * Setup any other CPU-Specific registers (Not MIPS Standard)
158 * and/or bits in other standard MIPS registers (if CPU-Specific)
162 td2->td_md.md_tls = td1->td_md.md_tls;
163 td2->td_md.md_saved_intr = MIPS_SR_INT_IE;
164 td2->td_md.md_spinlock_count = 1;
166 pcb2->pcb_context[PCB_REG_SR] |= MIPS_SR_COP_2_BIT | MIPS_SR_PX | MIPS_SR_UX | MIPS_SR_KX | MIPS_SR_SX;
171 * Intercept the return address from a freshly forked process that has NOT
172 * been scheduled yet.
174 * This is needed to make kernel threads stay in kernel mode.
177 cpu_set_fork_handler(struct thread *td, void (*func) __P((void *)), void *arg)
180 * Note that the trap frame follows the args, so the function
181 * is really called like this: func(arg, frame);
183 td->td_pcb->pcb_context[PCB_REG_S0] = (register_t)(intptr_t)func;
184 td->td_pcb->pcb_context[PCB_REG_S1] = (register_t)(intptr_t)arg;
188 cpu_exit(struct thread *td)
193 cpu_thread_exit(struct thread *td)
196 if (PCPU_GET(fpcurthread) == td)
197 PCPU_GET(fpcurthread) = (struct thread *)0;
201 cpu_thread_free(struct thread *td)
206 cpu_thread_clean(struct thread *td)
211 cpu_thread_swapin(struct thread *td)
217 * The kstack may be at a different physical address now.
218 * Cache the PTEs for the Kernel stack in the machine dependent
219 * part of the thread struct so cpu_switch() can quickly map in
220 * the pcb struct and kernel stack.
222 for (i = 0; i < KSTACK_PAGES; i++) {
223 pte = pmap_pte(kernel_pmap, td->td_kstack + i * PAGE_SIZE);
224 td->td_md.md_upte[i] = *pte & ~TLBLO_SWBITS_MASK;
229 cpu_thread_swapout(struct thread *td)
234 cpu_thread_alloc(struct thread *td)
239 KASSERT((td->td_kstack & (1 << PAGE_SHIFT)) == 0, ("kernel stack must be aligned."));
240 td->td_pcb = (struct pcb *)(td->td_kstack +
241 td->td_kstack_pages * PAGE_SIZE) - 1;
242 td->td_frame = &td->td_pcb->pcb_regs;
244 for (i = 0; i < KSTACK_PAGES; i++) {
245 pte = pmap_pte(kernel_pmap, td->td_kstack + i * PAGE_SIZE);
246 td->td_md.md_upte[i] = *pte & ~TLBLO_SWBITS_MASK;
251 cpu_set_syscall_retval(struct thread *td, int error)
253 struct trapframe *locr0 = td->td_frame;
259 #if defined(__mips_o32)
260 if (code == SYS___syscall)
264 if (code == SYS_syscall)
266 else if (code == SYS___syscall) {
268 code = _QUAD_LOWWORD ? locr0->a1 : locr0->a0;
275 if (quad_syscall && code != SYS_lseek) {
277 * System call invoked through the
278 * SYS___syscall interface but the
279 * return value is really just 32
282 locr0->v0 = td->td_retval[0];
284 locr0->v1 = td->td_retval[0];
287 locr0->v0 = td->td_retval[0];
288 locr0->v1 = td->td_retval[1];
294 locr0->pc = td->td_pcb->pcb_tpc;
298 break; /* nothing to do */
301 if (quad_syscall && code != SYS_lseek) {
314 * Initialize machine state (pcb and trap frame) for a new thread about to
315 * upcall. Put enough state in the new thread's PCB to get it to go back
316 * userret(), where we can intercept it again to set the return (upcall)
317 * Address and stack, along with those from upcalls that are from other sources
318 * such as those generated in thread_userret() itself.
321 cpu_set_upcall(struct thread *td, struct thread *td0)
325 /* Point the pcb to the top of the stack. */
329 * Copy the upcall pcb. This loads kernel regs.
330 * Those not loaded individually below get their default
333 * XXXKSE It might be a good idea to simply skip this as
334 * the values of the other registers may be unimportant.
335 * This would remove any requirement for knowing the KSE
336 * at this time (see the matching comment below for
337 * more analysis) (need a good safe default).
338 * In MIPS, the trapframe is the first element of the PCB
339 * and gets copied when we copy the PCB. No separate copy
342 bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
345 * Set registers for trampoline to user mode.
348 pcb2->pcb_context[PCB_REG_RA] = (register_t)(intptr_t)fork_trampoline;
349 /* Make sp 64-bit aligned */
350 pcb2->pcb_context[PCB_REG_SP] = (register_t)(((vm_offset_t)td->td_pcb &
351 ~(sizeof(__int64_t) - 1)) - CALLFRAME_SIZ);
352 pcb2->pcb_context[PCB_REG_S0] = (register_t)(intptr_t)fork_return;
353 pcb2->pcb_context[PCB_REG_S1] = (register_t)(intptr_t)td;
354 pcb2->pcb_context[PCB_REG_S2] = (register_t)(intptr_t)td->td_frame;
355 /* Dont set IE bit in SR. sched lock release will take care of it */
356 pcb2->pcb_context[PCB_REG_SR] = mips_rd_status() &
357 (MIPS_SR_KX | MIPS_SR_UX | MIPS_SR_INT_MASK);
360 pcb2->pcb_context[PCB_REG_SR] |= MIPS_SR_COP_2_BIT | MIPS_SR_COP_0_BIT |
361 MIPS_SR_PX | MIPS_SR_UX | MIPS_SR_KX | MIPS_SR_SX;
365 * FREEBSD_DEVELOPERS_FIXME:
366 * Setup any other CPU-Specific registers (Not MIPS Standard)
370 /* SMP Setup to release sched_lock in fork_exit(). */
371 td->td_md.md_spinlock_count = 1;
372 td->td_md.md_saved_intr = MIPS_SR_INT_IE;
374 /* Maybe we need to fix this? */
375 td->td_md.md_saved_sr = ( (MIPS_SR_COP_2_BIT | MIPS_SR_COP_0_BIT) |
376 (MIPS_SR_PX | MIPS_SR_UX | MIPS_SR_KX | MIPS_SR_SX) |
377 (MIPS_SR_INT_IE | MIPS_HARD_INT_MASK));
382 * Set that machine state for performing an upcall that has to
383 * be done in thread_userret() so that those upcalls generated
384 * in thread_userret() itself can be done as well.
387 cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
390 struct trapframe *tf;
394 * At the point where a function is called, sp must be 8
395 * byte aligned[for compatibility with 64-bit CPUs]
396 * in ``See MIPS Run'' by D. Sweetman, p. 269
398 sp = ((register_t)(intptr_t)(stack->ss_sp + stack->ss_size) & ~0x7) -
402 * Set the trap frame to point at the beginning of the uts
406 bzero(tf, sizeof(struct trapframe));
408 tf->pc = (register_t)(intptr_t)entry;
410 * MIPS ABI requires T9 to be the same as PC
411 * in subroutine entry point
413 tf->t9 = (register_t)(intptr_t)entry;
414 tf->a0 = (register_t)(intptr_t)arg;
417 * Keep interrupt mask
419 td->td_frame->sr = MIPS_SR_KSU_USER | MIPS_SR_EXL | MIPS_SR_INT_IE |
420 (mips_rd_status() & MIPS_SR_INT_MASK);
421 #if defined(__mips_n32)
422 td->td_frame->sr |= MIPS_SR_PX;
423 #elif defined(__mips_n64)
424 td->td_frame->sr |= MIPS_SR_PX | MIPS_SR_UX | MIPS_SR_KX;
427 tf->sr |= MIPS_SR_INT_IE | MIPS_SR_COP_0_BIT | MIPS_SR_PX | MIPS_SR_UX |
430 /* tf->sr |= (ALL_INT_MASK & idle_mask) | SR_INT_ENAB; */
431 /**XXX the above may now be wrong -- mips2 implements this as panic */
433 * FREEBSD_DEVELOPERS_FIXME:
434 * Setup any other CPU-Specific registers (Not MIPS Standard)
439 * Convert kernel VA to physical address
446 va = pmap_kextract((vm_offset_t)addr);
448 panic("kvtop: zero page frame");
449 return((intptr_t)va);
453 * Implement the pre-zeroed page mechanism.
454 * This routine is called from the idle loop.
457 #define ZIDLE_LO(v) ((v) * 2 / 3)
458 #define ZIDLE_HI(v) ((v) * 4 / 5)
461 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
465 sf_buf_init(void *arg)
467 struct sf_buf *sf_bufs;
472 TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
474 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
475 SLIST_INIT(&sf_freelist.sf_head);
476 sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);
477 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
479 for (i = 0; i < nsfbufs; i++) {
480 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
481 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
483 sf_buf_alloc_want = 0;
488 * Get an sf_buf from the freelist. Will block if none are available.
491 sf_buf_alloc(struct vm_page *m, int flags)
497 mtx_lock(&sf_freelist.sf_lock);
498 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
499 if (flags & SFB_NOWAIT)
502 mbstat.sf_allocwait++;
503 error = msleep(&sf_freelist, &sf_freelist.sf_lock,
504 (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
508 * If we got a signal, don't risk going back to sleep.
514 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
517 nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
518 pmap_qenter(sf->kva, &sf->m, 1);
520 mtx_unlock(&sf_freelist.sf_lock);
523 return ((struct sf_buf *)m);
528 * Release resources back to the system.
531 sf_buf_free(struct sf_buf *sf)
534 pmap_qremove(sf->kva, 1);
535 mtx_lock(&sf_freelist.sf_lock);
536 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
538 if (sf_buf_alloc_want > 0)
539 wakeup(&sf_freelist);
540 mtx_unlock(&sf_freelist.sf_lock);
545 * Software interrupt handler for queued VM system processing.
551 if (busdma_swi_pending)
556 cpu_set_user_tls(struct thread *td, void *tls_base)
559 td->td_md.md_tls = tls_base;
566 #define DB_PRINT_REG(ptr, regname) \
567 db_printf(" %-12s %p\n", #regname, (void *)(intptr_t)((ptr)->regname))
569 #define DB_PRINT_REG_ARRAY(ptr, arrname, regname) \
570 db_printf(" %-12s %p\n", #regname, (void *)(intptr_t)((ptr)->arrname[regname]))
573 dump_trapframe(struct trapframe *trapframe)
576 db_printf("Trapframe at %p\n", trapframe);
578 DB_PRINT_REG(trapframe, zero);
579 DB_PRINT_REG(trapframe, ast);
580 DB_PRINT_REG(trapframe, v0);
581 DB_PRINT_REG(trapframe, v1);
582 DB_PRINT_REG(trapframe, a0);
583 DB_PRINT_REG(trapframe, a1);
584 DB_PRINT_REG(trapframe, a2);
585 DB_PRINT_REG(trapframe, a3);
586 DB_PRINT_REG(trapframe, t0);
587 DB_PRINT_REG(trapframe, t1);
588 DB_PRINT_REG(trapframe, t2);
589 DB_PRINT_REG(trapframe, t3);
590 DB_PRINT_REG(trapframe, t4);
591 DB_PRINT_REG(trapframe, t5);
592 DB_PRINT_REG(trapframe, t6);
593 DB_PRINT_REG(trapframe, t7);
594 DB_PRINT_REG(trapframe, s0);
595 DB_PRINT_REG(trapframe, s1);
596 DB_PRINT_REG(trapframe, s2);
597 DB_PRINT_REG(trapframe, s3);
598 DB_PRINT_REG(trapframe, s4);
599 DB_PRINT_REG(trapframe, s5);
600 DB_PRINT_REG(trapframe, s6);
601 DB_PRINT_REG(trapframe, s7);
602 DB_PRINT_REG(trapframe, t8);
603 DB_PRINT_REG(trapframe, t9);
604 DB_PRINT_REG(trapframe, k0);
605 DB_PRINT_REG(trapframe, k1);
606 DB_PRINT_REG(trapframe, gp);
607 DB_PRINT_REG(trapframe, sp);
608 DB_PRINT_REG(trapframe, s8);
609 DB_PRINT_REG(trapframe, ra);
610 DB_PRINT_REG(trapframe, sr);
611 DB_PRINT_REG(trapframe, mullo);
612 DB_PRINT_REG(trapframe, mulhi);
613 DB_PRINT_REG(trapframe, badvaddr);
614 DB_PRINT_REG(trapframe, cause);
615 DB_PRINT_REG(trapframe, pc);
618 DB_SHOW_COMMAND(pcb, ddb_dump_pcb)
622 struct trapframe *trapframe;
624 /* Determine which thread to examine. */
626 td = db_lookup_thread(addr, TRUE);
632 db_printf("Thread %d at %p\n", td->td_tid, td);
634 db_printf("PCB at %p\n", pcb);
636 trapframe = &pcb->pcb_regs;
637 dump_trapframe(trapframe);
639 db_printf("PCB Context:\n");
640 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S0);
641 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S1);
642 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S2);
643 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S3);
644 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S4);
645 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S5);
646 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S6);
647 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S7);
648 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_SP);
649 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S8);
650 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_RA);
651 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_SR);
652 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_GP);
653 DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_PC);
655 db_printf("PCB onfault = %p\n", pcb->pcb_onfault);
656 db_printf("md_saved_intr = 0x%0lx\n", (long)td->td_md.md_saved_intr);
657 db_printf("md_spinlock_count = %d\n", td->td_md.md_spinlock_count);
659 if (td->td_frame != trapframe) {
660 db_printf("td->td_frame %p is not the same as pcb_regs %p\n",
661 td->td_frame, trapframe);
666 * Dump the trapframe beginning at address specified by first argument.
668 DB_SHOW_COMMAND(trapframe, ddb_dump_trapframe)
674 dump_trapframe((struct trapframe *)addr);