2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
11 * Redistribution and use in source and binary :forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
40 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/kernel.h>
49 #include <sys/malloc.h>
52 #include <sys/socketvar.h>
53 #include <sys/sf_buf.h>
54 #include <sys/syscall.h>
55 #include <sys/sysctl.h>
56 #include <sys/sysent.h>
57 #include <sys/unistd.h>
58 #include <machine/cpu.h>
59 #include <machine/frame.h>
60 #include <machine/pcb.h>
61 #include <machine/sysarch.h>
63 #include <sys/mutex.h>
67 #include <vm/vm_extern.h>
68 #include <vm/vm_kern.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_param.h>
72 #include <vm/vm_pageout.h>
74 #include <vm/uma_int.h>
76 #include <machine/md_var.h>
77 #include <machine/vfp.h>
80 * struct switchframe and trapframe must both be a multiple of 8
81 * for correct stack alignment.
83 CTASSERT(sizeof(struct switchframe) == 48);
84 CTASSERT(sizeof(struct trapframe) == 80);
87 #define NSFBUFS (512 + maxusers * 16)
91 static int nsfbufspeak;
92 static int nsfbufsused;
94 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
95 "Maximum number of sendfile(2) sf_bufs available");
96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
97 "Number of sendfile(2) sf_bufs at peak usage");
98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
99 "Number of sendfile(2) sf_bufs in use");
101 static void sf_buf_init(void *arg);
102 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
104 LIST_HEAD(sf_head, sf_buf);
107 * A hash table of active sendfile(2) buffers
109 static struct sf_head *sf_buf_active;
110 static u_long sf_buf_hashmask;
112 #define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
114 static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
115 static u_int sf_buf_alloc_want;
118 * A lock used to synchronize access to the hash table and free list
120 static struct mtx sf_buf_lock;
123 * Finish a fork operation, with process p2 nearly set up.
124 * Copy and update the pcb, set up the stack so that the child
125 * ready to run and return to user mode.
128 cpu_fork(register struct thread *td1, register struct proc *p2,
129 struct thread *td2, int flags)
132 struct trapframe *tf;
135 if ((flags & RFPROC) == 0)
138 /* Point the pcb to the top of the stack */
139 pcb2 = (struct pcb *)
140 (td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1;
142 #ifndef CPU_XSCALE_CORE3
143 pmap_use_minicache(td2->td_kstack, td2->td_kstack_pages * PAGE_SIZE);
148 /* Clone td1's pcb */
149 bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
151 /* Point to mdproc and then copy over td1's contents */
153 bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2));
155 /* Point the frame to the stack in front of pcb and copy td1's frame */
156 td2->td_frame = (struct trapframe *)pcb2 - 1;
157 *td2->td_frame = *td1->td_frame;
160 * Create a new fresh stack for the new process.
161 * Copy the trap frame for the return to user mode as if from a
162 * syscall. This copies most of the user mode register values.
164 pmap_set_pcb_pagedir(vmspace_pmap(p2->p_vmspace), pcb2);
165 pcb2->pcb_regs.sf_r4 = (register_t)fork_return;
166 pcb2->pcb_regs.sf_r5 = (register_t)td2;
167 pcb2->pcb_regs.sf_lr = (register_t)fork_trampoline;
168 pcb2->pcb_regs.sf_sp = STACKALIGN(td2->td_frame);
170 pcb2->pcb_vfpcpu = -1;
171 pcb2->pcb_vfpstate.fpscr = VFPSCR_DN | VFPSCR_FZ;
174 tf->tf_spsr &= ~PSR_C;
179 /* Setup to release spin count in fork_exit(). */
180 td2->td_md.md_spinlock_count = 1;
181 td2->td_md.md_saved_cspr = PSR_SVC32_MODE;;
182 #ifdef ARM_TP_ADDRESS
183 td2->td_md.md_tp = *(register_t *)ARM_TP_ADDRESS;
185 td2->td_md.md_tp = td1->td_md.md_tp;
190 cpu_thread_swapin(struct thread *td)
195 cpu_thread_swapout(struct thread *td)
200 * Detatch mapped page and release resources back to the system.
203 sf_buf_free(struct sf_buf *sf)
206 mtx_lock(&sf_buf_lock);
208 if (sf->ref_count == 0) {
209 TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
211 pmap_kremove(sf->kva);
213 LIST_REMOVE(sf, list_entry);
214 if (sf_buf_alloc_want > 0)
215 wakeup(&sf_buf_freelist);
217 mtx_unlock(&sf_buf_lock);
221 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
224 sf_buf_init(void *arg)
226 struct sf_buf *sf_bufs;
231 TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
233 sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
234 TAILQ_INIT(&sf_buf_freelist);
235 sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
236 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
238 for (i = 0; i < nsfbufs; i++) {
239 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
240 TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
242 sf_buf_alloc_want = 0;
243 mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
247 * Get an sf_buf from the freelist. Will block if none are available.
250 sf_buf_alloc(struct vm_page *m, int flags)
252 struct sf_head *hash_list;
256 hash_list = &sf_buf_active[SF_BUF_HASH(m)];
257 mtx_lock(&sf_buf_lock);
258 LIST_FOREACH(sf, hash_list, list_entry) {
261 if (sf->ref_count == 1) {
262 TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
264 nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
269 while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
270 if (flags & SFB_NOWAIT)
273 SFSTAT_INC(sf_allocwait);
274 error = msleep(&sf_buf_freelist, &sf_buf_lock,
275 (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
280 * If we got a signal, don't risk going back to sleep.
285 TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
287 LIST_REMOVE(sf, list_entry);
288 LIST_INSERT_HEAD(hash_list, sf, list_entry);
292 nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
293 pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
295 mtx_unlock(&sf_buf_lock);
300 cpu_set_syscall_retval(struct thread *td, int error)
302 struct trapframe *frame;
308 frame = td->td_frame;
313 * __syscall returns an off_t while most other syscalls return an
314 * int. As an off_t is 64-bits and an int is 32-bits we need to
315 * place the returned data into r1. As the lseek and frerebsd6_lseek
316 * syscalls also return an off_t they do not need this fixup.
321 call = *(u_int32_t *)(frame->tf_pc - INSN_SIZE) & 0x000fffff;
323 if (call == SYS___syscall) {
324 register_t *ap = &frame->tf_r0;
325 register_t code = ap[_QUAD_LOWWORD];
326 if (td->td_proc->p_sysent->sv_mask)
327 code &= td->td_proc->p_sysent->sv_mask;
328 fixup = (code != SYS_freebsd6_lseek && code != SYS_lseek)
337 frame->tf_r1 = td->td_retval[0];
339 frame->tf_r0 = td->td_retval[0];
340 frame->tf_r1 = td->td_retval[1];
342 frame->tf_spsr &= ~PSR_C; /* carry bit */
346 * Reconstruct the pc to point at the swi.
348 frame->tf_pc -= INSN_SIZE;
354 frame->tf_r0 = error;
355 frame->tf_spsr |= PSR_C; /* carry bit */
361 * Initialize machine state (pcb and trap frame) for a new thread about to
362 * upcall. Put enough state in the new thread's PCB to get it to go back
363 * userret(), where we can intercept it again to set the return (upcall)
364 * Address and stack, along with those from upcals that are from other sources
365 * such as those generated in thread_userret() itself.
368 cpu_set_upcall(struct thread *td, struct thread *td0)
371 bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
372 bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb));
374 td->td_pcb->pcb_regs.sf_r4 = (register_t)fork_return;
375 td->td_pcb->pcb_regs.sf_r5 = (register_t)td;
376 td->td_pcb->pcb_regs.sf_lr = (register_t)fork_trampoline;
377 td->td_pcb->pcb_regs.sf_sp = STACKALIGN(td->td_frame);
379 td->td_frame->tf_spsr &= ~PSR_C;
380 td->td_frame->tf_r0 = 0;
382 /* Setup to release spin count in fork_exit(). */
383 td->td_md.md_spinlock_count = 1;
384 td->td_md.md_saved_cspr = PSR_SVC32_MODE;
388 * Set that machine state for performing an upcall that has to
389 * be done in thread_userret() so that those upcalls generated
390 * in thread_userret() itself can be done as well.
393 cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
396 struct trapframe *tf = td->td_frame;
398 tf->tf_usr_sp = STACKALIGN((int)stack->ss_sp + stack->ss_size);
399 tf->tf_pc = (int)entry;
400 tf->tf_r0 = (int)arg;
401 tf->tf_spsr = PSR_USR32_MODE;
405 cpu_set_user_tls(struct thread *td, void *tls_base)
408 td->td_md.md_tp = (register_t)tls_base;
409 if (td == curthread) {
411 #ifdef ARM_TP_ADDRESS
412 *(register_t *)ARM_TP_ADDRESS = (register_t)tls_base;
422 cpu_thread_exit(struct thread *td)
427 cpu_thread_alloc(struct thread *td)
429 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages *
432 * Ensure td_frame is aligned to an 8 byte boundary as it will be
433 * placed into the stack pointer which must be 8 byte aligned in
436 td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb) - 1;
439 #ifndef CPU_XSCALE_CORE3
440 pmap_use_minicache(td->td_kstack, td->td_kstack_pages * PAGE_SIZE);
446 cpu_thread_free(struct thread *td)
451 cpu_thread_clean(struct thread *td)
456 * Intercept the return address from a freshly forked process that has NOT
457 * been scheduled yet.
459 * This is needed to make kernel threads stay in kernel mode.
462 cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg)
464 td->td_pcb->pcb_regs.sf_r4 = (register_t)func; /* function */
465 td->td_pcb->pcb_regs.sf_r5 = (register_t)arg; /* first arg */
469 * Software interrupt handler for queued VM system processing.
475 if (busdma_swi_pending)
480 cpu_exit(struct thread *td)