2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com>
5 * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
20 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * Some hw specific parts of this pmap were derived or influenced
29 * by NetBSD's ibm4xx pmap module. More generic code is shared with
30 * a few other pmap modules from the FreeBSD tree.
36 * Kernel and user threads run within one common virtual address space
40 * Virtual address space layout:
41 * -----------------------------
42 * 0x0000_0000_0000_0000 - 0x3fff_ffff_ffff_ffff : user process
43 * 0x4000_0000_0000_0000 - 0x7fff_ffff_ffff_ffff : unused
44 * 0x8000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : mmio region
45 * 0xc000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : direct map
46 * 0xe000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : KVA
49 #include <sys/cdefs.h>
50 __FBSDID("$FreeBSD$");
53 #include "opt_kstack_pages.h"
55 #include <sys/param.h>
57 #include <sys/malloc.h>
61 #include <sys/queue.h>
62 #include <sys/systm.h>
63 #include <sys/kernel.h>
64 #include <sys/kerneldump.h>
65 #include <sys/linker.h>
66 #include <sys/msgbuf.h>
68 #include <sys/mutex.h>
69 #include <sys/rwlock.h>
70 #include <sys/sched.h>
72 #include <sys/vmmeter.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_kern.h>
77 #include <vm/vm_pageout.h>
78 #include <vm/vm_extern.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_param.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_pager.h>
83 #include <vm/vm_phys.h>
84 #include <vm/vm_pagequeue.h>
87 #include <machine/_inttypes.h>
88 #include <machine/cpu.h>
89 #include <machine/pcb.h>
90 #include <machine/platform.h>
92 #include <machine/tlb.h>
93 #include <machine/spr.h>
94 #include <machine/md_var.h>
95 #include <machine/mmuvar.h>
96 #include <machine/pmap.h>
97 #include <machine/pte.h>
104 #define debugf(fmt, args...) printf(fmt, ##args)
106 #define debugf(fmt, args...)
109 #define PRI0ptrX "016lx"
111 /**************************************************************************/
113 /**************************************************************************/
115 unsigned int kernel_pdirs;
116 static uma_zone_t ptbl_root_zone;
119 * Base of the pmap_mapdev() region. On 32-bit it immediately follows the
120 * userspace address range. On On 64-bit it's far above, at (1 << 63), and
121 * ranges up to the DMAP, giving 62 bits of PA allowed. This is far larger than
122 * the widest Book-E address bus, the e6500 has a 40-bit PA space. This allows
123 * us to map akin to the DMAP, with addresses identical to the PA, offset by the
126 #define VM_MAPDEV_BASE 0x8000000000000000
127 #define VM_MAPDEV_PA_MAX 0x4000000000000000 /* Don't encroach on DMAP */
129 static void tid_flush(tlbtid_t tid);
130 static unsigned long ilog2(unsigned long);
132 /**************************************************************************/
133 /* Page table management */
134 /**************************************************************************/
136 static struct rwlock_padalign pvh_global_lock;
138 #define PMAP_ROOT_SIZE (sizeof(pte_t***) * PP2D_NENTRIES)
139 static pte_t *ptbl_alloc(mmu_t, pmap_t, pte_t **,
140 unsigned int, boolean_t);
141 static void ptbl_free(mmu_t, pmap_t, pte_t **, unsigned int, vm_page_t);
142 static void ptbl_hold(mmu_t, pmap_t, pte_t **, unsigned int);
143 static int ptbl_unhold(mmu_t, pmap_t, vm_offset_t);
145 static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t);
146 static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t);
147 static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t);
148 static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t);
149 static void kernel_pte_alloc(vm_offset_t, vm_offset_t, vm_offset_t);
151 /**************************************************************************/
152 /* Page table related */
153 /**************************************************************************/
155 /* Initialize pool of kva ptbl buffers. */
161 /* Get a pointer to a PTE in a page table. */
162 static __inline pte_t *
163 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va)
168 KASSERT((pmap != NULL), ("pte_find: invalid pmap"));
170 pdir = pmap->pm_pp2d[PP2D_IDX(va)];
173 ptbl = pdir[PDIR_IDX(va)];
174 return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL);
178 * allocate a page of pointers to page directories, do not preallocate the
182 pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep)
188 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
189 while ((m = vm_page_alloc(NULL, pp2d_idx, req)) == NULL) {
198 /* Zero whole ptbl. */
199 pdir = (pte_t **)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
200 mmu_booke_zero_page(mmu, m);
205 /* Free pdir pages and invalidate pdir entry. */
207 pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, vm_page_t m)
211 pdir = pmap->pm_pp2d[pp2d_idx];
213 KASSERT((pdir != NULL), ("pdir_free: null pdir"));
215 pmap->pm_pp2d[pp2d_idx] = NULL;
218 vm_page_free_zero(m);
222 * Decrement pdir pages hold count and attempt to free pdir pages. Called
223 * when removing directory entry from pdir.
225 * Return 1 if pdir pages were freed.
228 pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx)
234 KASSERT((pmap != kernel_pmap),
235 ("pdir_unhold: unholding kernel pdir!"));
237 pdir = pmap->pm_pp2d[pp2d_idx];
239 /* decrement hold count */
240 pa = DMAP_TO_PHYS((vm_offset_t) pdir);
241 m = PHYS_TO_VM_PAGE(pa);
244 * Free pdir page if there are no dir entries in this pdir.
247 if (m->ref_count == 0) {
248 pdir_free(mmu, pmap, pp2d_idx, m);
255 * Increment hold count for pdir pages. This routine is used when new ptlb
256 * entry is being inserted into pdir.
259 pdir_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir)
263 KASSERT((pmap != kernel_pmap),
264 ("pdir_hold: holding kernel pdir!"));
266 KASSERT((pdir != NULL), ("pdir_hold: null pdir"));
268 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pdir));
272 /* Allocate page table. */
274 ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx,
281 KASSERT((pdir[pdir_idx] == NULL),
282 ("%s: valid ptbl entry exists!", __func__));
284 req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
285 while ((m = vm_page_alloc(NULL, pdir_idx, req)) == NULL) {
289 rw_wunlock(&pvh_global_lock);
291 rw_wlock(&pvh_global_lock);
295 /* Zero whole ptbl. */
296 ptbl = (pte_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
297 mmu_booke_zero_page(mmu, m);
302 /* Free ptbl pages and invalidate pdir entry. */
304 ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, vm_page_t m)
308 ptbl = pdir[pdir_idx];
310 KASSERT((ptbl != NULL), ("ptbl_free: null ptbl"));
312 pdir[pdir_idx] = NULL;
315 vm_page_free_zero(m);
319 * Decrement ptbl pages hold count and attempt to free ptbl pages. Called
320 * when removing pte entry from ptbl.
322 * Return 1 if ptbl pages were freed.
325 ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va)
333 pp2d_idx = PP2D_IDX(va);
334 pdir_idx = PDIR_IDX(va);
336 KASSERT((pmap != kernel_pmap),
337 ("ptbl_unhold: unholding kernel ptbl!"));
339 pdir = pmap->pm_pp2d[pp2d_idx];
340 ptbl = pdir[pdir_idx];
342 /* decrement hold count */
343 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl));
346 * Free ptbl pages if there are no pte entries in this ptbl.
347 * ref_count has the same value for all ptbl pages, so check the
351 if (m->ref_count == 0) {
352 ptbl_free(mmu, pmap, pdir, pdir_idx, m);
353 pdir_unhold(mmu, pmap, pp2d_idx);
360 * Increment hold count for ptbl pages. This routine is used when new pte
361 * entry is being inserted into ptbl.
364 ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx)
369 KASSERT((pmap != kernel_pmap),
370 ("ptbl_hold: holding kernel ptbl!"));
372 ptbl = pdir[pdir_idx];
374 KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl"));
376 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl));
381 * Clean pte entry, try to free page table page if requested.
383 * Return 1 if ptbl pages were freed, otherwise return 0.
386 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags)
391 pte = pte_find(mmu, pmap, va);
392 KASSERT(pte != NULL, ("%s: NULL pte", __func__));
394 if (!PTE_ISVALID(pte))
397 /* Get vm_page_t for mapped pte. */
398 m = PHYS_TO_VM_PAGE(PTE_PA(pte));
400 if (PTE_ISWIRED(pte))
401 pmap->pm_stats.wired_count--;
403 /* Handle managed entry. */
404 if (PTE_ISMANAGED(pte)) {
406 /* Handle modified pages. */
407 if (PTE_ISMODIFIED(pte))
410 /* Referenced pages. */
411 if (PTE_ISREFERENCED(pte))
412 vm_page_aflag_set(m, PGA_REFERENCED);
414 /* Remove pv_entry from pv_list. */
415 pv_remove(pmap, va, m);
416 } else if (pmap == kernel_pmap && m && m->md.pv_tracked) {
417 pv_remove(pmap, va, m);
418 if (TAILQ_EMPTY(&m->md.pv_list))
419 m->md.pv_tracked = false;
421 mtx_lock_spin(&tlbivax_mutex);
424 tlb0_flush_entry(va);
428 mtx_unlock_spin(&tlbivax_mutex);
430 pmap->pm_stats.resident_count--;
432 if (flags & PTBL_UNHOLD) {
433 return (ptbl_unhold(mmu, pmap, va));
439 * Insert PTE for a given page and virtual address.
442 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags,
445 unsigned int pp2d_idx = PP2D_IDX(va);
446 unsigned int pdir_idx = PDIR_IDX(va);
447 unsigned int ptbl_idx = PTBL_IDX(va);
448 pte_t *ptbl, *pte, pte_tmp;
451 /* Get the page directory pointer. */
452 pdir = pmap->pm_pp2d[pp2d_idx];
454 pdir = pdir_alloc(mmu, pmap, pp2d_idx, nosleep);
456 /* Get the page table pointer. */
457 ptbl = pdir[pdir_idx];
460 /* Allocate page table pages. */
461 ptbl = ptbl_alloc(mmu, pmap, pdir, pdir_idx, nosleep);
463 KASSERT(nosleep, ("nosleep and NULL ptbl"));
466 pte = &ptbl[ptbl_idx];
469 * Check if there is valid mapping for requested va, if there
472 pte = &ptbl[ptbl_idx];
473 if (PTE_ISVALID(pte)) {
474 pte_remove(mmu, pmap, va, PTBL_HOLD);
477 * pte is not used, increment hold count for ptbl
480 if (pmap != kernel_pmap)
481 ptbl_hold(mmu, pmap, pdir, pdir_idx);
485 if (pdir[pdir_idx] == NULL) {
486 if (pmap != kernel_pmap && pmap->pm_pp2d[pp2d_idx] != NULL)
487 pdir_hold(mmu, pmap, pdir);
488 pdir[pdir_idx] = ptbl;
490 if (pmap->pm_pp2d[pp2d_idx] == NULL)
491 pmap->pm_pp2d[pp2d_idx] = pdir;
494 * Insert pv_entry into pv_list for mapped page if part of managed
497 if ((m->oflags & VPO_UNMANAGED) == 0) {
498 flags |= PTE_MANAGED;
500 /* Create and insert pv entry. */
501 pv_insert(pmap, va, m);
504 pmap->pm_stats.resident_count++;
506 pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m));
507 pte_tmp |= (PTE_VALID | flags);
509 mtx_lock_spin(&tlbivax_mutex);
512 tlb0_flush_entry(va);
516 mtx_unlock_spin(&tlbivax_mutex);
521 /* Return the pa for the given pmap/va. */
523 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va)
528 pte = pte_find(mmu, pmap, va);
529 if ((pte != NULL) && PTE_ISVALID(pte))
530 pa = (PTE_PA(pte) | (va & PTE_PA_MASK));
535 /* allocate pte entries to manage (addr & mask) to (addr & mask) + size */
537 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir)
544 /* Initialize kernel pdir */
545 for (i = 0; i < kernel_pdirs; i++) {
546 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)] =
547 (pte_t **)(pdir + (i * PAGE_SIZE * PDIR_PAGES));
548 for (j = PDIR_IDX(va + (i * PAGE_SIZE * PDIR_NENTRIES * PTBL_NENTRIES));
549 j < PDIR_NENTRIES; j++) {
550 kernel_pmap->pm_pp2d[i + PP2D_IDX(va)][j] =
551 (pte_t *)(pdir + (kernel_pdirs * PAGE_SIZE) +
552 (((i * PDIR_NENTRIES) + j) * PAGE_SIZE));
557 * Fill in PTEs covering kernel code and data. They are not required
558 * for address translation, as this area is covered by static TLB1
559 * entries, but for pte_vatopa() to work correctly with kernel area
562 for (va = addr; va < data_end; va += PAGE_SIZE) {
563 pte = &(kernel_pmap->pm_pp2d[PP2D_IDX(va)][PDIR_IDX(va)][PTBL_IDX(va)]);
564 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart));
565 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED |
566 PTE_VALID | PTE_PS_4KB;
571 * Initialize a preallocated and zeroed pmap structure,
572 * such as one in a vmspace structure.
575 mmu_booke_pinit(mmu_t mmu, pmap_t pmap)
579 CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap,
580 curthread->td_proc->p_pid, curthread->td_proc->p_comm);
582 KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap"));
584 for (i = 0; i < MAXCPU; i++)
585 pmap->pm_tid[i] = TID_NONE;
586 CPU_ZERO(&kernel_pmap->pm_active);
587 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
588 pmap->pm_pp2d = uma_zalloc(ptbl_root_zone, M_WAITOK);
589 bzero(pmap->pm_pp2d, sizeof(pte_t **) * PP2D_NENTRIES);
593 * Release any resources held by the given physical map.
594 * Called when a pmap initialized by mmu_booke_pinit is being released.
595 * Should only be called if the map contains no valid mappings.
598 mmu_booke_release(mmu_t mmu, pmap_t pmap)
601 KASSERT(pmap->pm_stats.resident_count == 0,
602 ("pmap_release: pmap resident count %ld != 0",
603 pmap->pm_stats.resident_count));
604 uma_zfree(ptbl_root_zone, pmap->pm_pp2d);
608 mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz)
616 pte = pte_find(mmu, pm, va);
617 valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0;
621 sync_sz = PAGE_SIZE - (va & PAGE_MASK);
622 sync_sz = min(sync_sz, sz);
624 pa += (va & PAGE_MASK);
625 __syncicache((void *)PHYS_TO_DMAP(pa), sync_sz);
633 * mmu_booke_zero_page_area zeros the specified hardware page by
634 * mapping it into virtual memory and using bzero to clear
637 * off and size must reside within a single page.
640 mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size)
644 /* XXX KASSERT off and size are within a single page? */
646 va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
647 bzero((caddr_t)va + off, size);
651 * mmu_booke_zero_page zeros the specified hardware page.
654 mmu_booke_zero_page(mmu_t mmu, vm_page_t m)
658 va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
660 for (off = 0; off < PAGE_SIZE; off += cacheline_size)
661 __asm __volatile("dcbz 0,%0" :: "r"(va + off));
665 * mmu_booke_copy_page copies the specified (machine independent) page by
666 * mapping the page into virtual memory and using memcopy to copy the page,
667 * one machine dependent page at a time.
670 mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm)
672 vm_offset_t sva, dva;
674 sva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(sm));
675 dva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dm));
676 memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE);
680 mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
681 vm_page_t *mb, vm_offset_t b_offset, int xfersize)
684 vm_offset_t a_pg_offset, b_pg_offset;
689 while (xfersize > 0) {
690 a_pg_offset = a_offset & PAGE_MASK;
691 pa = ma[a_offset >> PAGE_SHIFT];
692 b_pg_offset = b_offset & PAGE_MASK;
693 pb = mb[b_offset >> PAGE_SHIFT];
694 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
695 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
696 a_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pa)) +
698 b_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pb)) +
700 bcopy(a_cp, b_cp, cnt);
708 mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m)
710 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
714 mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr)
718 /**************************************************************************/
720 /**************************************************************************/
723 * Return the largest uint value log such that 2^log <= num.
726 ilog2(unsigned long num)
730 __asm ("cntlzd %0, %1" : "=r" (lz) : "r" (num));
735 * Invalidate all TLB0 entries which match the given TID. Note this is
736 * dedicated for cases when invalidations should NOT be propagated to other
740 tid_flush(tlbtid_t tid)
744 /* Don't evict kernel translations */
745 if (tid == TID_KERNEL)
749 __asm __volatile("wrteei 0");
752 * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use
753 * it for PID invalidation.
755 mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT);
756 __asm __volatile("isync; .long 0x7c200024; isync; msync");
758 __asm __volatile("wrtee %0" :: "r"(msr));