2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * Copyright (c) 2001 Jake Burkholder.
8 * This code is derived from software contributed to Berkeley by
9 * the Systems Programming Group of the University of Utah Computer
10 * Science Department, and William Jolitz.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
37 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
38 * from: FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.167 2001/07/12
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
46 #include <sys/param.h>
47 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/malloc.h>
53 #include <sys/mutex.h>
55 #include <sys/sysent.h>
56 #include <sys/sf_buf.h>
57 #include <sys/sched.h>
58 #include <sys/sysctl.h>
59 #include <sys/unistd.h>
60 #include <sys/vmmeter.h>
62 #include <dev/ofw/openfirm.h>
65 #include <vm/vm_extern.h>
67 #include <vm/vm_kern.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_pageout.h>
71 #include <vm/vm_param.h>
73 #include <vm/uma_int.h>
75 #include <machine/cache.h>
76 #include <machine/cpu.h>
77 #include <machine/fp.h>
78 #include <machine/frame.h>
79 #include <machine/fsr.h>
80 #include <machine/md_var.h>
81 #include <machine/ofw_machdep.h>
82 #include <machine/ofw_mem.h>
83 #include <machine/pcb.h>
84 #include <machine/tlb.h>
85 #include <machine/tstate.h>
88 #define NSFBUFS (512 + maxusers * 16)
92 static int nsfbufspeak;
93 static int nsfbufsused;
95 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
96 "Maximum number of sendfile(2) sf_bufs available");
97 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
98 "Number of sendfile(2) sf_bufs at peak usage");
99 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
100 "Number of sendfile(2) sf_bufs in use");
102 static void sf_buf_init(void *arg);
103 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
106 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
107 * sf_freelist head with the sf_lock mutex.
110 SLIST_HEAD(, sf_buf) sf_head;
114 static u_int sf_buf_alloc_want;
116 PMAP_STATS_VAR(uma_nsmall_alloc);
117 PMAP_STATS_VAR(uma_nsmall_alloc_oc);
118 PMAP_STATS_VAR(uma_nsmall_free);
121 cpu_exit(struct thread *td)
126 p->p_md.md_sigtramp = NULL;
127 if (p->p_md.md_utrap != NULL) {
128 utrap_free(p->p_md.md_utrap);
129 p->p_md.md_utrap = NULL;
134 cpu_thread_exit(struct thread *td)
140 cpu_thread_clean(struct thread *td)
146 cpu_thread_alloc(struct thread *td)
150 pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
151 sizeof(struct pcb)) & ~0x3fUL);
153 td->td_frame = (struct trapframe *)pcb - 1;
158 cpu_thread_free(struct thread *td)
164 cpu_thread_swapin(struct thread *td)
170 cpu_thread_swapout(struct thread *td)
176 cpu_set_syscall_retval(struct thread *td, int error)
181 td->td_frame->tf_out[0] = td->td_retval[0];
182 td->td_frame->tf_out[1] = td->td_retval[1];
183 td->td_frame->tf_tstate &= ~TSTATE_XCC_C;
188 * Undo the tpc advancement we have done on syscall
189 * enter, we want to reexecute the system call.
191 td->td_frame->tf_tpc = td->td_pcb->pcb_tpc;
192 td->td_frame->tf_tnpc -= 4;
199 if (td->td_proc->p_sysent->sv_errsize) {
200 if (error >= td->td_proc->p_sysent->sv_errsize)
201 error = -1; /* XXX */
203 error = td->td_proc->p_sysent->sv_errtbl[error];
205 td->td_frame->tf_out[0] = error;
206 td->td_frame->tf_tstate |= TSTATE_XCC_C;
212 cpu_set_upcall(struct thread *td, struct thread *td0)
214 struct trapframe *tf;
218 bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
222 fr = (struct frame *)tf - 1;
223 fr->fr_local[0] = (u_long)fork_return;
224 fr->fr_local[1] = (u_long)td;
225 fr->fr_local[2] = (u_long)tf;
226 pcb->pcb_pc = (u_long)fork_trampoline - 8;
227 pcb->pcb_sp = (u_long)fr - SPOFF;
229 /* Setup to release the spin count in fork_exit(). */
230 td->td_md.md_spinlock_count = 1;
231 td->td_md.md_saved_pil = 0;
235 cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
238 struct trapframe *tf;
244 sp = (uint64_t)stack->ss_sp + stack->ss_size;
245 tf->tf_out[0] = (uint64_t)arg;
246 tf->tf_out[6] = sp - SPOFF - sizeof(struct frame);
247 tf->tf_tpc = (uint64_t)entry;
248 tf->tf_tnpc = tf->tf_tpc + 4;
250 td->td_retval[0] = tf->tf_out[0];
251 td->td_retval[1] = tf->tf_out[1];
255 cpu_set_user_tls(struct thread *td, void *tls_base)
260 td->td_frame->tf_global[7] = (uint64_t)tls_base;
265 * Finish a fork operation, with process p2 nearly set up.
266 * Copy and update the pcb, set up the stack so that the child
267 * ready to run and return to user mode.
270 cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
272 struct trapframe *tf;
280 KASSERT(td1 == curthread || td1 == &thread0,
281 ("cpu_fork: p1 not curproc and not proc0"));
283 if ((flags & RFPROC) == 0)
286 p2->p_md.md_sigtramp = td1->td_proc->p_md.md_sigtramp;
287 p2->p_md.md_utrap = utrap_hold(td1->td_proc->p_md.md_utrap);
289 /* The pcb must be aligned on a 64-byte boundary. */
291 pcb2 = (struct pcb *)((td2->td_kstack + td2->td_kstack_pages *
292 PAGE_SIZE - sizeof(struct pcb)) & ~0x3fUL);
296 * Ensure that p1's pcb is up to date.
299 if ((td1->td_frame->tf_fprs & FPRS_FEF) != 0)
300 savefpctx(pcb1->pcb_ufp);
302 /* Make sure the copied windows are spilled. */
304 /* Copy the pcb (this will copy the windows saved in the pcb, too). */
305 bcopy(pcb1, pcb2, sizeof(*pcb1));
308 * If we're creating a new user process and we're sharing the address
309 * space, the parent's top most frame must be saved in the pcb. The
310 * child will pop the frame when it returns to user mode, and may
311 * overwrite it with its own data causing much suffering for the
312 * parent. We check if its already in the pcb, and if not copy it
313 * in. Its unlikely that the copyin will fail, but if so there's not
314 * much we can do. The parent will likely crash soon anyway in that
317 if ((flags & RFMEM) != 0 && td1 != &thread0) {
318 sp = td1->td_frame->tf_sp;
319 for (i = 0; i < pcb1->pcb_nsaved; i++) {
320 if (pcb1->pcb_rwsp[i] == sp)
323 if (i == pcb1->pcb_nsaved) {
324 error = copyin((caddr_t)sp + SPOFF, &pcb1->pcb_rw[i],
325 sizeof(struct rwindow));
327 pcb1->pcb_rwsp[i] = sp;
334 * Create a new fresh stack for the new process.
335 * Copy the trap frame for the return to user mode as if from a
336 * syscall. This copies most of the user mode register values.
338 tf = (struct trapframe *)pcb2 - 1;
339 bcopy(td1->td_frame, tf, sizeof(*tf));
341 tf->tf_out[0] = 0; /* Child returns zero */
343 tf->tf_tstate &= ~TSTATE_XCC_C; /* success */
347 fp = (struct frame *)tf - 1;
348 fp->fr_local[0] = (u_long)fork_return;
349 fp->fr_local[1] = (u_long)td2;
350 fp->fr_local[2] = (u_long)tf;
351 /* Terminate stack traces at this frame. */
352 fp->fr_pc = fp->fr_fp = 0;
353 pcb2->pcb_sp = (u_long)fp - SPOFF;
354 pcb2->pcb_pc = (u_long)fork_trampoline - 8;
356 /* Setup to release the spin count in fork_exit(). */
357 td2->td_md.md_spinlock_count = 1;
358 td2->td_md.md_saved_pil = 0;
361 * Now, cpu_switch() can schedule the new process.
368 static char bspec[64] = "";
382 if ((chosen = OF_finddevice("/chosen")) != -1) {
383 if (OF_getprop(chosen, "bootpath", bspec, sizeof(bspec)) == -1)
385 bspec[sizeof(bspec) - 1] = '\0';
392 * Intercept the return address from a freshly forked process that has NOT
393 * been scheduled yet.
395 * This is needed to make kernel threads stay in kernel mode.
398 cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg)
404 fp = (struct frame *)(pcb->pcb_sp + SPOFF);
405 fp->fr_local[0] = (u_long)func;
406 fp->fr_local[1] = (u_long)arg;
410 is_physical_memory(vm_paddr_t addr)
412 struct ofw_mem_region *mr;
414 for (mr = sparc64_memreg; mr < sparc64_memreg + sparc64_nmemreg; mr++)
415 if (addr >= mr->mr_start && addr < mr->mr_start + mr->mr_size)
421 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
424 sf_buf_init(void *arg)
426 struct sf_buf *sf_bufs;
431 TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
433 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
434 SLIST_INIT(&sf_freelist.sf_head);
435 sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
436 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
438 for (i = 0; i < nsfbufs; i++) {
439 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
440 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
442 sf_buf_alloc_want = 0;
446 * Get an sf_buf from the freelist. Will block if none are available.
449 sf_buf_alloc(struct vm_page *m, int flags)
454 mtx_lock(&sf_freelist.sf_lock);
455 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
456 if (flags & SFB_NOWAIT)
459 SFSTAT_INC(sf_allocwait);
460 error = msleep(&sf_freelist, &sf_freelist.sf_lock,
461 (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
465 * If we got a signal, don't risk going back to sleep.
471 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
474 nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
475 pmap_qenter(sf->kva, &sf->m, 1);
477 mtx_unlock(&sf_freelist.sf_lock);
482 * Release resources back to the system.
485 sf_buf_free(struct sf_buf *sf)
488 pmap_qremove(sf->kva, 1);
489 mtx_lock(&sf_freelist.sf_lock);
490 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
492 if (sf_buf_alloc_want > 0)
493 wakeup(&sf_freelist);
494 mtx_unlock(&sf_freelist.sf_lock);
501 /* Nothing to do here - busdma bounce buffers are not implemented. */
505 uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
512 PMAP_STATS_INC(uma_nsmall_alloc);
514 *flags = UMA_SLAB_PRIV;
515 pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED;
518 m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ);
528 pa = VM_PAGE_TO_PHYS(m);
529 if (dcache_color_ignore == 0 && m->md.color != DCACHE_COLOR(pa)) {
530 KASSERT(m->md.colors[0] == 0 && m->md.colors[1] == 0,
531 ("uma_small_alloc: free page %p still has mappings!", m));
532 PMAP_STATS_INC(uma_nsmall_alloc_oc);
533 m->md.color = DCACHE_COLOR(pa);
534 dcache_page_inval(pa);
536 va = (void *)TLB_PHYS_TO_DIRECT(pa);
537 if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
538 cpu_block_zero(va, PAGE_SIZE);
543 uma_small_free(void *mem, vm_size_t size, u_int8_t flags)
547 PMAP_STATS_INC(uma_nsmall_free);
548 m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS((vm_offset_t)mem));
551 atomic_subtract_int(&cnt.v_wire_count, 1);