2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
11 * Redistribution and use in source and binary :forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
40 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/kernel.h>
49 #include <sys/malloc.h>
52 #include <sys/socketvar.h>
53 #include <sys/sf_buf.h>
54 #include <sys/syscall.h>
55 #include <sys/sysent.h>
56 #include <sys/unistd.h>
57 #include <machine/cpu.h>
58 #include <machine/pcb.h>
59 #include <machine/sysarch.h>
61 #include <sys/mutex.h>
65 #include <vm/vm_extern.h>
66 #include <vm/vm_kern.h>
67 #include <vm/vm_page.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_pageout.h>
72 #include <vm/uma_int.h>
74 #include <machine/md_var.h>
77 #define NSFBUFS (512 + maxusers * 16)
80 #ifndef ARM_USE_SMALL_ALLOC
81 static void sf_buf_init(void *arg);
82 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
84 LIST_HEAD(sf_head, sf_buf);
88 * A hash table of active sendfile(2) buffers
90 static struct sf_head *sf_buf_active;
91 static u_long sf_buf_hashmask;
93 #define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
95 static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
96 static u_int sf_buf_alloc_want;
99 * A lock used to synchronize access to the hash table and free list
101 static struct mtx sf_buf_lock;
105 * Finish a fork operation, with process p2 nearly set up.
106 * Copy and update the pcb, set up the stack so that the child
107 * ready to run and return to user mode.
110 cpu_fork(register struct thread *td1, register struct proc *p2,
111 struct thread *td2, int flags)
114 struct trapframe *tf;
115 struct switchframe *sf;
118 if ((flags & RFPROC) == 0)
120 pcb2 = (struct pcb *)(td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1;
122 #ifndef CPU_XSCALE_CORE3
123 pmap_use_minicache(td2->td_kstack, td2->td_kstack_pages * PAGE_SIZE);
127 bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
129 bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2));
130 pcb2->un_32.pcb32_und_sp = td2->td_kstack + USPACE_UNDEF_STACK_TOP;
131 pcb2->un_32.pcb32_sp = td2->td_kstack +
132 USPACE_SVC_STACK_TOP - sizeof(*pcb2);
135 (struct trapframe *)pcb2->un_32.pcb32_sp - 1;
136 *tf = *td1->td_frame;
137 sf = (struct switchframe *)tf - 1;
138 sf->sf_r4 = (u_int)fork_return;
139 sf->sf_r5 = (u_int)td2;
140 sf->sf_pc = (u_int)fork_trampoline;
141 tf->tf_spsr &= ~PSR_C_bit;
144 pcb2->un_32.pcb32_sp = (u_int)sf;
146 /* Setup to release spin count in fork_exit(). */
147 td2->td_md.md_spinlock_count = 1;
148 td2->td_md.md_saved_cspr = 0;
149 td2->td_md.md_tp = *(register_t *)ARM_TP_ADDRESS;
153 cpu_thread_swapin(struct thread *td)
158 cpu_thread_swapout(struct thread *td)
163 * Detatch mapped page and release resources back to the system.
166 sf_buf_free(struct sf_buf *sf)
168 #ifndef ARM_USE_SMALL_ALLOC
169 mtx_lock(&sf_buf_lock);
171 if (sf->ref_count == 0) {
172 TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
174 pmap_kremove(sf->kva);
176 LIST_REMOVE(sf, list_entry);
177 if (sf_buf_alloc_want > 0)
178 wakeup(&sf_buf_freelist);
180 mtx_unlock(&sf_buf_lock);
184 #ifndef ARM_USE_SMALL_ALLOC
186 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
189 sf_buf_init(void *arg)
191 struct sf_buf *sf_bufs;
196 TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
198 sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
199 TAILQ_INIT(&sf_buf_freelist);
200 sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE);
201 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
203 for (i = 0; i < nsfbufs; i++) {
204 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
205 TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
207 sf_buf_alloc_want = 0;
208 mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
213 * Get an sf_buf from the freelist. Will block if none are available.
216 sf_buf_alloc(struct vm_page *m, int flags)
218 #ifdef ARM_USE_SMALL_ALLOC
219 return ((struct sf_buf *)m);
221 struct sf_head *hash_list;
225 hash_list = &sf_buf_active[SF_BUF_HASH(m)];
226 mtx_lock(&sf_buf_lock);
227 LIST_FOREACH(sf, hash_list, list_entry) {
230 if (sf->ref_count == 1) {
231 TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
233 nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
238 while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
239 if (flags & SFB_NOWAIT)
242 mbstat.sf_allocwait++;
243 error = msleep(&sf_buf_freelist, &sf_buf_lock,
244 (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
249 * If we got a signal, don't risk going back to sleep.
254 TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
256 LIST_REMOVE(sf, list_entry);
257 LIST_INSERT_HEAD(hash_list, sf, list_entry);
261 nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
262 pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
264 mtx_unlock(&sf_buf_lock);
270 cpu_set_syscall_retval(struct thread *td, int error)
278 frame = td->td_frame;
282 insn = *(u_int32_t *)(frame->tf_pc - INSN_SIZE);
283 if ((insn & 0x000fffff) == SYS___syscall) {
284 register_t *ap = &frame->tf_r0;
285 register_t code = ap[_QUAD_LOWWORD];
286 if (td->td_proc->p_sysent->sv_mask)
287 code &= td->td_proc->p_sysent->sv_mask;
288 fixup = (code != SYS_freebsd6_lseek && code != SYS_lseek)
297 frame->tf_r1 = td->td_retval[0];
299 frame->tf_r0 = td->td_retval[0];
300 frame->tf_r1 = td->td_retval[1];
302 frame->tf_spsr &= ~PSR_C_bit; /* carry bit */
306 * Reconstruct the pc to point at the swi.
308 frame->tf_pc -= INSN_SIZE;
314 frame->tf_r0 = error;
315 frame->tf_spsr |= PSR_C_bit; /* carry bit */
321 * Initialize machine state (pcb and trap frame) for a new thread about to
322 * upcall. Put enough state in the new thread's PCB to get it to go back
323 * userret(), where we can intercept it again to set the return (upcall)
324 * Address and stack, along with those from upcals that are from other sources
325 * such as those generated in thread_userret() itself.
328 cpu_set_upcall(struct thread *td, struct thread *td0)
330 struct trapframe *tf;
331 struct switchframe *sf;
333 bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
334 bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb));
336 sf = (struct switchframe *)tf - 1;
337 sf->sf_r4 = (u_int)fork_return;
338 sf->sf_r5 = (u_int)td;
339 sf->sf_pc = (u_int)fork_trampoline;
340 tf->tf_spsr &= ~PSR_C_bit;
342 td->td_pcb->un_32.pcb32_sp = (u_int)sf;
343 td->td_pcb->un_32.pcb32_und_sp = td->td_kstack + USPACE_UNDEF_STACK_TOP;
345 /* Setup to release spin count in fork_exit(). */
346 td->td_md.md_spinlock_count = 1;
347 td->td_md.md_saved_cspr = 0;
351 * Set that machine state for performing an upcall that has to
352 * be done in thread_userret() so that those upcalls generated
353 * in thread_userret() itself can be done as well.
356 cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
359 struct trapframe *tf = td->td_frame;
361 tf->tf_usr_sp = ((int)stack->ss_sp + stack->ss_size
362 - sizeof(struct trapframe)) & ~7;
363 tf->tf_pc = (int)entry;
364 tf->tf_r0 = (int)arg;
365 tf->tf_spsr = PSR_USR32_MODE;
369 cpu_set_user_tls(struct thread *td, void *tls_base)
373 td->td_md.md_tp = (register_t)tls_base;
376 *(register_t *)ARM_TP_ADDRESS = (register_t)tls_base;
383 cpu_thread_exit(struct thread *td)
388 cpu_thread_alloc(struct thread *td)
390 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages *
392 td->td_frame = (struct trapframe *)
393 ((u_int)td->td_kstack + USPACE_SVC_STACK_TOP - sizeof(struct pcb)) - 1;
395 #ifndef CPU_XSCALE_CORE3
396 pmap_use_minicache(td->td_kstack, td->td_kstack_pages * PAGE_SIZE);
402 cpu_thread_free(struct thread *td)
407 cpu_thread_clean(struct thread *td)
412 * Intercept the return address from a freshly forked process that has NOT
413 * been scheduled yet.
415 * This is needed to make kernel threads stay in kernel mode.
418 cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg)
420 struct switchframe *sf;
421 struct trapframe *tf;
424 sf = (struct switchframe *)tf - 1;
425 sf->sf_r4 = (u_int)func;
426 sf->sf_r5 = (u_int)arg;
427 td->td_pcb->un_32.pcb32_sp = (u_int)sf;
431 * Software interrupt handler for queued VM system processing.
437 if (busdma_swi_pending)
442 cpu_exit(struct thread *td)
446 #define BITS_PER_INT (8 * sizeof(int))
447 vm_offset_t arm_nocache_startaddr;
448 static int arm_nocache_allocated[ARM_NOCACHE_KVA_SIZE / (PAGE_SIZE *
452 * Functions to map and unmap memory non-cached into KVA the kernel won't try
453 * to allocate. The goal is to provide uncached memory to busdma, to honor
455 * We can allocate at most ARM_NOCACHE_KVA_SIZE bytes.
456 * The allocator is rather dummy, each page is represented by a bit in
457 * a bitfield, 0 meaning the page is not allocated, 1 meaning it is.
458 * As soon as it finds enough contiguous pages to satisfy the request,
459 * it returns the address.
462 arm_remap_nocache(void *addr, vm_size_t size)
466 size = round_page(size);
467 for (i = 0; i < ARM_NOCACHE_KVA_SIZE / PAGE_SIZE; i++) {
468 if (!(arm_nocache_allocated[i / BITS_PER_INT] & (1 << (i %
470 for (j = i; j < i + (size / (PAGE_SIZE)); j++)
471 if (arm_nocache_allocated[j / BITS_PER_INT] &
472 (1 << (j % BITS_PER_INT)))
474 if (j == i + (size / (PAGE_SIZE)))
478 if (i < ARM_NOCACHE_KVA_SIZE / PAGE_SIZE) {
479 vm_offset_t tomap = arm_nocache_startaddr + i * PAGE_SIZE;
480 void *ret = (void *)tomap;
481 vm_paddr_t physaddr = vtophys((vm_offset_t)addr);
482 vm_offset_t vaddr = (vm_offset_t) addr;
484 vaddr = vaddr & ~PAGE_MASK;
485 for (; tomap < (vm_offset_t)ret + size; tomap += PAGE_SIZE,
486 vaddr += PAGE_SIZE, physaddr += PAGE_SIZE, i++) {
487 cpu_idcache_wbinv_range(vaddr, PAGE_SIZE);
488 cpu_l2cache_wbinv_range(vaddr, PAGE_SIZE);
489 pmap_kenter_nocache(tomap, physaddr);
490 cpu_tlb_flushID_SE(vaddr);
491 arm_nocache_allocated[i / BITS_PER_INT] |= 1 << (i %
501 arm_unmap_nocache(void *addr, vm_size_t size)
503 vm_offset_t raddr = (vm_offset_t)addr;
506 size = round_page(size);
507 i = (raddr - arm_nocache_startaddr) / (PAGE_SIZE);
508 for (; size > 0; size -= PAGE_SIZE, i++) {
509 arm_nocache_allocated[i / BITS_PER_INT] &= ~(1 << (i %
516 #ifdef ARM_USE_SMALL_ALLOC
518 static TAILQ_HEAD(,arm_small_page) pages_normal =
519 TAILQ_HEAD_INITIALIZER(pages_normal);
520 static TAILQ_HEAD(,arm_small_page) pages_wt =
521 TAILQ_HEAD_INITIALIZER(pages_wt);
522 static TAILQ_HEAD(,arm_small_page) free_pgdesc =
523 TAILQ_HEAD_INITIALIZER(free_pgdesc);
525 extern uma_zone_t l2zone;
527 struct mtx smallalloc_mtx;
529 static MALLOC_DEFINE(M_VMSMALLALLOC, "vm_small_alloc", "VM Small alloc data");
531 vm_offset_t alloc_firstaddr;
533 #ifdef ARM_HAVE_SUPERSECTIONS
534 #define S_FRAME L1_SUP_FRAME
535 #define S_SIZE L1_SUP_SIZE
537 #define S_FRAME L1_S_FRAME
538 #define S_SIZE L1_S_SIZE
542 arm_ptovirt(vm_paddr_t pa)
545 vm_offset_t addr = alloc_firstaddr;
547 KASSERT(alloc_firstaddr != 0, ("arm_ptovirt called too early ?"));
548 for (i = 0; dump_avail[i + 1]; i += 2) {
549 if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
551 addr += (dump_avail[i + 1] & S_FRAME) + S_SIZE -
552 (dump_avail[i] & S_FRAME);
554 KASSERT(dump_avail[i + 1] != 0, ("Trying to access invalid physical address"));
555 return (addr + (pa - (dump_avail[i] & S_FRAME)));
559 arm_init_smallalloc(void)
561 vm_offset_t to_map = 0, mapaddr;
565 * We need to use dump_avail and not phys_avail, since we want to
566 * map the whole memory and not just the memory available to the VM
567 * to be able to do a pa => va association for any address.
570 for (i = 0; dump_avail[i + 1]; i+= 2) {
571 to_map += (dump_avail[i + 1] & S_FRAME) + S_SIZE -
572 (dump_avail[i] & S_FRAME);
574 alloc_firstaddr = mapaddr = KERNBASE - to_map;
575 for (i = 0; dump_avail[i + 1]; i+= 2) {
576 vm_offset_t size = (dump_avail[i + 1] & S_FRAME) +
577 S_SIZE - (dump_avail[i] & S_FRAME);
580 #ifdef ARM_HAVE_SUPERSECTIONS
581 pmap_kenter_supersection(mapaddr,
582 (dump_avail[i] & L1_SUP_FRAME) + did,
585 pmap_kenter_section(mapaddr,
586 (dump_avail[i] & L1_S_FRAME) + did, SECTION_CACHE);
596 arm_add_smallalloc_pages(void *list, void *mem, int bytes, int pagetable)
598 struct arm_small_page *pg;
602 pg = (struct arm_small_page *)list;
605 TAILQ_INSERT_HEAD(&pages_wt, pg, pg_list);
607 TAILQ_INSERT_HEAD(&pages_normal, pg, pg_list);
608 list = (char *)list + sizeof(*pg);
609 mem = (char *)mem + PAGE_SIZE;
615 uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
618 struct arm_small_page *sp;
619 TAILQ_HEAD(,arm_small_page) *head;
620 static vm_pindex_t color;
623 *flags = UMA_SLAB_PRIV;
625 * For CPUs where we setup page tables as write back, there's no
626 * need to maintain two separate pools.
628 if (zone == l2zone && pte_l1_s_cache_mode != pte_l1_s_cache_mode_pt)
629 head = (void *)&pages_wt;
631 head = (void *)&pages_normal;
633 mtx_lock(&smallalloc_mtx);
634 sp = TAILQ_FIRST(head);
639 mtx_unlock(&smallalloc_mtx);
640 if (zone == l2zone &&
641 pte_l1_s_cache_mode != pte_l1_s_cache_mode_pt) {
642 *flags = UMA_SLAB_KMEM;
643 ret = ((void *)kmem_malloc(kmem_map, bytes, M_NOWAIT));
646 if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
647 pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED;
649 pflags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED;
651 pflags |= VM_ALLOC_ZERO;
653 m = vm_page_alloc(NULL, color++,
654 pflags | VM_ALLOC_NOOBJ);
662 ret = (void *)arm_ptovirt(VM_PAGE_TO_PHYS(m));
663 if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
664 bzero(ret, PAGE_SIZE);
667 TAILQ_REMOVE(head, sp, pg_list);
668 TAILQ_INSERT_HEAD(&free_pgdesc, sp, pg_list);
670 mtx_unlock(&smallalloc_mtx);
677 uma_small_free(void *mem, int size, u_int8_t flags)
682 if (flags & UMA_SLAB_KMEM)
683 kmem_free(kmem_map, (vm_offset_t)mem, size);
685 struct arm_small_page *sp;
687 if ((vm_offset_t)mem >= KERNBASE) {
688 mtx_lock(&smallalloc_mtx);
689 sp = TAILQ_FIRST(&free_pgdesc);
690 KASSERT(sp != NULL, ("No more free page descriptor ?"));
691 TAILQ_REMOVE(&free_pgdesc, sp, pg_list);
693 pmap_get_pde_pte(kernel_pmap, (vm_offset_t)mem, &pd,
695 if ((*pd & pte_l1_s_cache_mask) ==
696 pte_l1_s_cache_mode_pt &&
697 pte_l1_s_cache_mode_pt != pte_l1_s_cache_mode)
698 TAILQ_INSERT_HEAD(&pages_wt, sp, pg_list);
700 TAILQ_INSERT_HEAD(&pages_normal, sp, pg_list);
701 mtx_unlock(&smallalloc_mtx);
704 vm_paddr_t pa = vtophys((vm_offset_t)mem);
706 m = PHYS_TO_VM_PAGE(pa);
709 atomic_subtract_int(&cnt.v_wire_count, 1);