2 * Copyright (c) 1991 Regents of the University of California.
4 * Copyright (c) 1994 John S. Dyson
6 * Copyright (c) 1994 David Greenman
8 * Copyright (c) 2003 Peter Wemm
10 * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
11 * All rights reserved.
12 * Copyright (c) 2014 Andrew Turner
13 * All rights reserved.
14 * Copyright (c) 2014-2016 The FreeBSD Foundation
15 * All rights reserved.
17 * This code is derived from software contributed to Berkeley by
18 * the Systems Programming Group of the University of Utah Computer
19 * Science Department and William Jolitz of UUNET Technologies Inc.
21 * This software was developed by Andrew Turner under sponsorship from
22 * the FreeBSD Foundation.
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
27 * 1. Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in the
31 * documentation and/or other materials provided with the distribution.
32 * 3. All advertising materials mentioning features or use of this software
33 * must display the following acknowledgement:
34 * This product includes software developed by the University of
35 * California, Berkeley and its contributors.
36 * 4. Neither the name of the University nor the names of its contributors
37 * may be used to endorse or promote products derived from this software
38 * without specific prior written permission.
40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
55 * Copyright (c) 2003 Networks Associates Technology, Inc.
56 * All rights reserved.
58 * This software was developed for the FreeBSD Project by Jake Burkholder,
59 * Safeport Network Services, and Network Associates Laboratories, the
60 * Security Research Division of Network Associates, Inc. under
61 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
62 * CHATS research program.
64 * Redistribution and use in source and binary forms, with or without
65 * modification, are permitted provided that the following conditions
67 * 1. Redistributions of source code must retain the above copyright
68 * notice, this list of conditions and the following disclaimer.
69 * 2. Redistributions in binary form must reproduce the above copyright
70 * notice, this list of conditions and the following disclaimer in the
71 * documentation and/or other materials provided with the distribution.
73 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
86 #include <sys/cdefs.h>
87 __FBSDID("$FreeBSD$");
90 * Manages physical address maps.
92 * Since the information managed by this module is
93 * also stored by the logical address mapping module,
94 * this module may throw away valid virtual-to-physical
95 * mappings at almost any time. However, invalidations
96 * of virtual-to-physical mappings must be done as
99 * In order to cope with hardware architectures which
100 * make virtual-to-physical map invalidates expensive,
101 * this module may delay invalidate or reduced protection
102 * operations until such time as they are actually
103 * necessary. This module is given full information as
104 * to which processors are currently using which maps,
105 * and to when physical maps must be made correct.
108 #include <sys/param.h>
110 #include <sys/systm.h>
111 #include <sys/kernel.h>
113 #include <sys/lock.h>
114 #include <sys/malloc.h>
115 #include <sys/mman.h>
116 #include <sys/msgbuf.h>
117 #include <sys/mutex.h>
118 #include <sys/proc.h>
119 #include <sys/rwlock.h>
121 #include <sys/vmem.h>
122 #include <sys/vmmeter.h>
123 #include <sys/sched.h>
124 #include <sys/sysctl.h>
125 #include <sys/_unrhdr.h>
129 #include <vm/vm_param.h>
130 #include <vm/vm_kern.h>
131 #include <vm/vm_page.h>
132 #include <vm/vm_map.h>
133 #include <vm/vm_object.h>
134 #include <vm/vm_extern.h>
135 #include <vm/vm_pageout.h>
136 #include <vm/vm_pager.h>
137 #include <vm/vm_radix.h>
138 #include <vm/vm_reserv.h>
141 #include <machine/machdep.h>
142 #include <machine/md_var.h>
143 #include <machine/pcb.h>
145 #define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t)))
146 #define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t)))
147 #define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t)))
148 #define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t)))
150 #define NUL0E L0_ENTRIES
151 #define NUL1E (NUL0E * NL1PG)
152 #define NUL2E (NUL1E * NL2PG)
154 #if !defined(DIAGNOSTIC)
155 #ifdef __GNUC_GNU_INLINE__
156 #define PMAP_INLINE __attribute__((__gnu_inline__)) inline
158 #define PMAP_INLINE extern inline
165 * These are configured by the mair_el1 register. This is set up in locore.S
167 #define DEVICE_MEMORY 0
168 #define UNCACHED_MEMORY 1
169 #define CACHED_MEMORY 2
173 #define PV_STAT(x) do { x ; } while (0)
175 #define PV_STAT(x) do { } while (0)
178 #define pmap_l2_pindex(v) ((v) >> L2_SHIFT)
180 #define NPV_LIST_LOCKS MAXCPU
182 #define PHYS_TO_PV_LIST_LOCK(pa) \
183 (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
185 #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \
186 struct rwlock **_lockp = (lockp); \
187 struct rwlock *_new_lock; \
189 _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \
190 if (_new_lock != *_lockp) { \
191 if (*_lockp != NULL) \
192 rw_wunlock(*_lockp); \
193 *_lockp = _new_lock; \
198 #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \
199 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
201 #define RELEASE_PV_LIST_LOCK(lockp) do { \
202 struct rwlock **_lockp = (lockp); \
204 if (*_lockp != NULL) { \
205 rw_wunlock(*_lockp); \
210 #define VM_PAGE_TO_PV_LIST_LOCK(m) \
211 PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
213 struct pmap kernel_pmap_store;
215 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
216 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
217 vm_offset_t kernel_vm_end = 0;
219 struct msgbuf *msgbufp = NULL;
221 vm_paddr_t dmap_phys_base; /* The start of the dmap region */
222 vm_paddr_t dmap_phys_max; /* The limit of the dmap region */
223 vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */
225 /* This code assumes all L1 DMAP entries will be used */
226 CTASSERT((DMAP_MIN_ADDRESS & ~L0_OFFSET) == DMAP_MIN_ADDRESS);
227 CTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS);
229 #define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT)
230 extern pt_entry_t pagetable_dmap[];
232 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
234 static int superpages_enabled = 1;
235 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
236 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0,
237 "Are large page mappings enabled?");
240 * Data for the pv entry allocation mechanism
242 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
243 static struct mtx pv_chunks_mutex;
244 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
246 static void free_pv_chunk(struct pv_chunk *pc);
247 static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
248 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
249 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
250 static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
251 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
254 static int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode);
255 static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
256 static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va);
257 static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
258 vm_offset_t va, struct rwlock **lockp);
259 static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
260 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
261 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
262 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
263 pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
264 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
265 vm_page_t m, struct rwlock **lockp);
267 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
268 struct rwlock **lockp);
270 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
271 struct spglist *free);
272 static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
275 * These load the old table data and store the new value.
276 * They need to be atomic as the System MMU may write to the table at
277 * the same time as the CPU.
279 #define pmap_load_store(table, entry) atomic_swap_64(table, entry)
280 #define pmap_set(table, mask) atomic_set_64(table, mask)
281 #define pmap_load_clear(table) atomic_swap_64(table, 0)
282 #define pmap_load(table) (*table)
284 /********************/
285 /* Inline functions */
286 /********************/
289 pagecopy(void *s, void *d)
292 memcpy(d, s, PAGE_SIZE);
295 #define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK)
296 #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK)
297 #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK)
298 #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK)
300 static __inline pd_entry_t *
301 pmap_l0(pmap_t pmap, vm_offset_t va)
304 return (&pmap->pm_l0[pmap_l0_index(va)]);
307 static __inline pd_entry_t *
308 pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va)
312 l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
313 return (&l1[pmap_l1_index(va)]);
316 static __inline pd_entry_t *
317 pmap_l1(pmap_t pmap, vm_offset_t va)
321 l0 = pmap_l0(pmap, va);
322 if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE)
325 return (pmap_l0_to_l1(l0, va));
328 static __inline pd_entry_t *
329 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
333 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
334 return (&l2[pmap_l2_index(va)]);
337 static __inline pd_entry_t *
338 pmap_l2(pmap_t pmap, vm_offset_t va)
342 l1 = pmap_l1(pmap, va);
343 if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE)
346 return (pmap_l1_to_l2(l1, va));
349 static __inline pt_entry_t *
350 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
354 l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
355 return (&l3[pmap_l3_index(va)]);
359 * Returns the lowest valid pde for a given virtual address.
360 * The next level may or may not point to a valid page or block.
362 static __inline pd_entry_t *
363 pmap_pde(pmap_t pmap, vm_offset_t va, int *level)
365 pd_entry_t *l0, *l1, *l2, desc;
367 l0 = pmap_l0(pmap, va);
368 desc = pmap_load(l0) & ATTR_DESCR_MASK;
369 if (desc != L0_TABLE) {
374 l1 = pmap_l0_to_l1(l0, va);
375 desc = pmap_load(l1) & ATTR_DESCR_MASK;
376 if (desc != L1_TABLE) {
381 l2 = pmap_l1_to_l2(l1, va);
382 desc = pmap_load(l2) & ATTR_DESCR_MASK;
383 if (desc != L2_TABLE) {
393 * Returns the lowest valid pte block or table entry for a given virtual
394 * address. If there are no valid entries return NULL and set the level to
395 * the first invalid level.
397 static __inline pt_entry_t *
398 pmap_pte(pmap_t pmap, vm_offset_t va, int *level)
400 pd_entry_t *l1, *l2, desc;
403 l1 = pmap_l1(pmap, va);
408 desc = pmap_load(l1) & ATTR_DESCR_MASK;
409 if (desc == L1_BLOCK) {
414 if (desc != L1_TABLE) {
419 l2 = pmap_l1_to_l2(l1, va);
420 desc = pmap_load(l2) & ATTR_DESCR_MASK;
421 if (desc == L2_BLOCK) {
426 if (desc != L2_TABLE) {
432 l3 = pmap_l2_to_l3(l2, va);
433 if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE)
440 pmap_superpages_enabled(void)
443 return (superpages_enabled != 0);
447 pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1,
448 pd_entry_t **l2, pt_entry_t **l3)
450 pd_entry_t *l0p, *l1p, *l2p;
452 if (pmap->pm_l0 == NULL)
455 l0p = pmap_l0(pmap, va);
458 if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
461 l1p = pmap_l0_to_l1(l0p, va);
464 if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
470 if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE)
473 l2p = pmap_l1_to_l2(l1p, va);
476 if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) {
481 *l3 = pmap_l2_to_l3(l2p, va);
487 pmap_is_current(pmap_t pmap)
490 return ((pmap == pmap_kernel()) ||
491 (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
495 pmap_l3_valid(pt_entry_t l3)
498 return ((l3 & ATTR_DESCR_MASK) == L3_PAGE);
502 pmap_l3_valid_cacheable(pt_entry_t l3)
505 return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) &&
506 ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY)));
509 #define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
512 * Checks if the page is dirty. We currently lack proper tracking of this on
513 * arm64 so for now assume is a page mapped as rw was accessed it is.
516 pmap_page_dirty(pt_entry_t pte)
519 return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) ==
520 (ATTR_AF | ATTR_AP(ATTR_AP_RW)));
524 pmap_resident_count_inc(pmap_t pmap, int count)
527 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
528 pmap->pm_stats.resident_count += count;
532 pmap_resident_count_dec(pmap_t pmap, int count)
535 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
536 KASSERT(pmap->pm_stats.resident_count >= count,
537 ("pmap %p resident count underflow %ld %d", pmap,
538 pmap->pm_stats.resident_count, count));
539 pmap->pm_stats.resident_count -= count;
543 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
549 l1 = (pd_entry_t *)l1pt;
550 *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
552 /* Check locore has used a table L1 map */
553 KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE,
554 ("Invalid bootstrap L1 table"));
555 /* Find the address of the L2 table */
556 l2 = (pt_entry_t *)init_pt_va;
557 *l2_slot = pmap_l2_index(va);
563 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
565 u_int l1_slot, l2_slot;
568 l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
570 return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET));
574 pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
580 pa = dmap_phys_base = min_pa & ~L1_OFFSET;
581 va = DMAP_MIN_ADDRESS;
582 for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
583 pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
584 l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
586 pmap_load_store(&pagetable_dmap[l1_slot],
587 (pa & ~L1_OFFSET) | ATTR_DEFAULT |
588 ATTR_IDX(CACHED_MEMORY) | L1_BLOCK);
591 /* Set the upper limit of the DMAP region */
595 cpu_dcache_wb_range((vm_offset_t)pagetable_dmap,
596 PAGE_SIZE * DMAP_TABLES);
601 pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start)
608 KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address"));
610 l1 = (pd_entry_t *)l1pt;
611 l1_slot = pmap_l1_index(va);
614 for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) {
615 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
617 pa = pmap_early_vtophys(l1pt, l2pt);
618 pmap_load_store(&l1[l1_slot],
619 (pa & ~Ln_TABLE_MASK) | L1_TABLE);
623 /* Clean the L2 page table */
624 memset((void *)l2_start, 0, l2pt - l2_start);
625 cpu_dcache_wb_range(l2_start, l2pt - l2_start);
627 /* Flush the l1 table to ram */
628 cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
634 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
636 vm_offset_t l2pt, l3pt;
641 KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
643 l2 = pmap_l2(kernel_pmap, va);
644 l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE);
645 l2pt = (vm_offset_t)l2;
646 l2_slot = pmap_l2_index(va);
649 for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
650 KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
652 pa = pmap_early_vtophys(l1pt, l3pt);
653 pmap_load_store(&l2[l2_slot],
654 (pa & ~Ln_TABLE_MASK) | L2_TABLE);
658 /* Clean the L2 page table */
659 memset((void *)l3_start, 0, l3pt - l3_start);
660 cpu_dcache_wb_range(l3_start, l3pt - l3_start);
662 cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
668 * Bootstrap the system enough to run with virtual memory.
671 pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
674 u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
677 vm_offset_t va, freemempos;
678 vm_offset_t dpcpu, msgbufpv;
679 vm_paddr_t pa, max_pa, min_pa;
682 kern_delta = KERNBASE - kernstart;
685 printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
686 printf("%lx\n", l1pt);
687 printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
689 /* Set this early so we can use the pagetable walking functions */
690 kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt;
691 PMAP_LOCK_INIT(kernel_pmap);
693 /* Assume the address we were loaded to is a valid physical address */
694 min_pa = max_pa = KERNBASE - kern_delta;
697 * Find the minimum physical address. physmap is sorted,
698 * but may contain empty ranges.
700 for (i = 0; i < (physmap_idx * 2); i += 2) {
701 if (physmap[i] == physmap[i + 1])
703 if (physmap[i] <= min_pa)
705 if (physmap[i + 1] > max_pa)
706 max_pa = physmap[i + 1];
709 /* Create a direct map region early so we can use it for pa -> va */
710 pmap_bootstrap_dmap(l1pt, min_pa, max_pa);
713 pa = KERNBASE - kern_delta;
716 * Start to initialise phys_avail by copying from physmap
717 * up to the physical address KERNBASE points at.
719 map_slot = avail_slot = 0;
720 for (; map_slot < (physmap_idx * 2) &&
721 avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) {
722 if (physmap[map_slot] == physmap[map_slot + 1])
725 if (physmap[map_slot] <= pa &&
726 physmap[map_slot + 1] > pa)
729 phys_avail[avail_slot] = physmap[map_slot];
730 phys_avail[avail_slot + 1] = physmap[map_slot + 1];
731 physmem += (phys_avail[avail_slot + 1] -
732 phys_avail[avail_slot]) >> PAGE_SHIFT;
736 /* Add the memory before the kernel */
737 if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) {
738 phys_avail[avail_slot] = physmap[map_slot];
739 phys_avail[avail_slot + 1] = pa;
740 physmem += (phys_avail[avail_slot + 1] -
741 phys_avail[avail_slot]) >> PAGE_SHIFT;
744 used_map_slot = map_slot;
747 * Read the page table to find out what is already mapped.
748 * This assumes we have mapped a block of memory from KERNBASE
749 * using a single L1 entry.
751 l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
753 /* Sanity check the index, KERNBASE should be the first VA */
754 KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
756 /* Find how many pages we have mapped */
757 for (; l2_slot < Ln_ENTRIES; l2_slot++) {
758 if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0)
761 /* Check locore used L2 blocks */
762 KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK,
763 ("Invalid bootstrap L2 table"));
764 KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa,
765 ("Incorrect PA in L2 table"));
771 va = roundup2(va, L1_SIZE);
773 freemempos = KERNBASE + kernlen;
774 freemempos = roundup2(freemempos, PAGE_SIZE);
775 /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */
776 freemempos = pmap_bootstrap_l2(l1pt, va, freemempos);
777 /* And the l3 tables for the early devmap */
778 freemempos = pmap_bootstrap_l3(l1pt,
779 VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
783 #define alloc_pages(var, np) \
784 (var) = freemempos; \
785 freemempos += (np * PAGE_SIZE); \
786 memset((char *)(var), 0, ((np) * PAGE_SIZE));
788 /* Allocate dynamic per-cpu area. */
789 alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
790 dpcpu_init((void *)dpcpu, 0);
792 /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
793 alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
794 msgbufp = (void *)msgbufpv;
796 virtual_avail = roundup2(freemempos, L1_SIZE);
797 virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
798 kernel_vm_end = virtual_avail;
800 pa = pmap_early_vtophys(l1pt, freemempos);
802 /* Finish initialising physmap */
803 map_slot = used_map_slot;
804 for (; avail_slot < (PHYS_AVAIL_SIZE - 2) &&
805 map_slot < (physmap_idx * 2); map_slot += 2) {
806 if (physmap[map_slot] == physmap[map_slot + 1])
809 /* Have we used the current range? */
810 if (physmap[map_slot + 1] <= pa)
813 /* Do we need to split the entry? */
814 if (physmap[map_slot] < pa) {
815 phys_avail[avail_slot] = pa;
816 phys_avail[avail_slot + 1] = physmap[map_slot + 1];
818 phys_avail[avail_slot] = physmap[map_slot];
819 phys_avail[avail_slot + 1] = physmap[map_slot + 1];
821 physmem += (phys_avail[avail_slot + 1] -
822 phys_avail[avail_slot]) >> PAGE_SHIFT;
826 phys_avail[avail_slot] = 0;
827 phys_avail[avail_slot + 1] = 0;
830 * Maxmem isn't the "maximum memory", it's one larger than the
831 * highest page of the physical address space. It should be
832 * called something like "Maxphyspage".
834 Maxmem = atop(phys_avail[avail_slot - 1]);
840 * Initialize a vm_page's machine-dependent fields.
843 pmap_page_init(vm_page_t m)
846 TAILQ_INIT(&m->md.pv_list);
847 m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
851 * Initialize the pmap module.
852 * Called by vm_init, to initialize any structures that the pmap
853 * system needs to map virtual memory.
861 * Are large page mappings enabled?
863 TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
866 * Initialize the pv chunk list mutex.
868 mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
871 * Initialize the pool of pv list locks.
873 for (i = 0; i < NPV_LIST_LOCKS; i++)
874 rw_init(&pv_list_locks[i], "pmap pv list");
878 * Invalidate a single TLB entry.
881 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
887 "tlbi vaae1is, %0 \n"
890 : : "r"(va >> PAGE_SHIFT));
895 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
901 for (addr = sva; addr < eva; addr += PAGE_SIZE) {
903 "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT));
912 pmap_invalidate_all(pmap_t pmap)
925 * Routine: pmap_extract
927 * Extract the physical page address associated
928 * with the given map/virtual_address pair.
931 pmap_extract(pmap_t pmap, vm_offset_t va)
933 pt_entry_t *pte, tpte;
940 * Find the block or page map for this virtual address. pmap_pte
941 * will return either a valid block/page entry, or NULL.
943 pte = pmap_pte(pmap, va, &lvl);
945 tpte = pmap_load(pte);
946 pa = tpte & ~ATTR_MASK;
949 KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
950 ("pmap_extract: Invalid L1 pte found: %lx",
951 tpte & ATTR_DESCR_MASK));
952 pa |= (va & L1_OFFSET);
955 KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
956 ("pmap_extract: Invalid L2 pte found: %lx",
957 tpte & ATTR_DESCR_MASK));
958 pa |= (va & L2_OFFSET);
961 KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
962 ("pmap_extract: Invalid L3 pte found: %lx",
963 tpte & ATTR_DESCR_MASK));
964 pa |= (va & L3_OFFSET);
973 * Routine: pmap_extract_and_hold
975 * Atomically extract and hold the physical page
976 * with the given pmap and virtual address pair
977 * if that mapping permits the given protection.
980 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
982 pt_entry_t *pte, tpte;
991 pte = pmap_pte(pmap, va, &lvl);
993 tpte = pmap_load(pte);
995 KASSERT(lvl > 0 && lvl <= 3,
996 ("pmap_extract_and_hold: Invalid level %d", lvl));
997 CTASSERT(L1_BLOCK == L2_BLOCK);
998 KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
999 (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
1000 ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
1001 tpte & ATTR_DESCR_MASK));
1002 if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
1003 ((prot & VM_PROT_WRITE) == 0)) {
1004 if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa))
1006 m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK);
1016 pmap_kextract(vm_offset_t va)
1018 pt_entry_t *pte, tpte;
1022 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
1023 pa = DMAP_TO_PHYS(va);
1026 pte = pmap_pte(kernel_pmap, va, &lvl);
1028 tpte = pmap_load(pte);
1029 pa = tpte & ~ATTR_MASK;
1032 KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
1033 ("pmap_kextract: Invalid L1 pte found: %lx",
1034 tpte & ATTR_DESCR_MASK));
1035 pa |= (va & L1_OFFSET);
1038 KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
1039 ("pmap_kextract: Invalid L2 pte found: %lx",
1040 tpte & ATTR_DESCR_MASK));
1041 pa |= (va & L2_OFFSET);
1044 KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
1045 ("pmap_kextract: Invalid L3 pte found: %lx",
1046 tpte & ATTR_DESCR_MASK));
1047 pa |= (va & L3_OFFSET);
1055 /***************************************************
1056 * Low level mapping routines.....
1057 ***************************************************/
1060 pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
1067 KASSERT((pa & L3_OFFSET) == 0,
1068 ("pmap_kenter: Invalid physical address"));
1069 KASSERT((sva & L3_OFFSET) == 0,
1070 ("pmap_kenter: Invalid virtual address"));
1071 KASSERT((size & PAGE_MASK) == 0,
1072 ("pmap_kenter: Mapping is not page-sized"));
1076 pde = pmap_pde(kernel_pmap, va, &lvl);
1077 KASSERT(pde != NULL,
1078 ("pmap_kenter: Invalid page entry, va: 0x%lx", va));
1079 KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl));
1081 pte = pmap_l2_to_l3(pde, va);
1082 pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT |
1083 ATTR_IDX(mode) | L3_PAGE);
1090 pmap_invalidate_range(kernel_pmap, sva, va);
1094 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
1097 pmap_kenter(sva, size, pa, DEVICE_MEMORY);
1101 * Remove a page from the kernel pagetables.
1104 pmap_kremove(vm_offset_t va)
1109 pte = pmap_pte(kernel_pmap, va, &lvl);
1110 KASSERT(pte != NULL, ("pmap_kremove: Invalid address"));
1111 KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl));
1113 if (pmap_l3_valid_cacheable(pmap_load(pte)))
1114 cpu_dcache_wb_range(va, L3_SIZE);
1115 pmap_load_clear(pte);
1117 pmap_invalidate_page(kernel_pmap, va);
1121 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
1127 KASSERT((sva & L3_OFFSET) == 0,
1128 ("pmap_kremove_device: Invalid virtual address"));
1129 KASSERT((size & PAGE_MASK) == 0,
1130 ("pmap_kremove_device: Mapping is not page-sized"));
1134 pte = pmap_pte(kernel_pmap, va, &lvl);
1135 KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va));
1137 ("Invalid device pagetable level: %d != 3", lvl));
1138 pmap_load_clear(pte);
1144 pmap_invalidate_range(kernel_pmap, sva, va);
1148 * Used to map a range of physical addresses into kernel
1149 * virtual address space.
1151 * The value passed in '*virt' is a suggested virtual address for
1152 * the mapping. Architectures which can support a direct-mapped
1153 * physical to virtual region can return the appropriate address
1154 * within that region, leaving '*virt' unchanged. Other
1155 * architectures should map the pages starting at '*virt' and
1156 * update '*virt' with the first usable address after the mapped
1160 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1162 return PHYS_TO_DMAP(start);
1167 * Add a list of wired pages to the kva
1168 * this routine is only used for temporary
1169 * kernel mappings that do not need to have
1170 * page modification or references recorded.
1171 * Note that old mappings are simply written
1172 * over. The page *must* be wired.
1173 * Note: SMP coherent. Uses a ranged shootdown IPI.
1176 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
1179 pt_entry_t *pte, pa;
1185 for (i = 0; i < count; i++) {
1186 pde = pmap_pde(kernel_pmap, va, &lvl);
1187 KASSERT(pde != NULL,
1188 ("pmap_qenter: Invalid page entry, va: 0x%lx", va));
1190 ("pmap_qenter: Invalid level %d", lvl));
1193 pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) |
1194 ATTR_IDX(m->md.pv_memattr) | L3_PAGE;
1195 pte = pmap_l2_to_l3(pde, va);
1196 pmap_load_store(pte, pa);
1201 pmap_invalidate_range(kernel_pmap, sva, va);
1205 * This routine tears out page mappings from the
1206 * kernel -- it is meant only for temporary mappings.
1209 pmap_qremove(vm_offset_t sva, int count)
1215 KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
1218 while (count-- > 0) {
1219 pte = pmap_pte(kernel_pmap, va, &lvl);
1221 ("Invalid device pagetable level: %d != 3", lvl));
1223 if (pmap_l3_valid_cacheable(pmap_load(pte)))
1224 cpu_dcache_wb_range(va, L3_SIZE);
1225 pmap_load_clear(pte);
1231 pmap_invalidate_range(kernel_pmap, sva, va);
1234 /***************************************************
1235 * Page table page management routines.....
1236 ***************************************************/
1237 static __inline void
1238 pmap_free_zero_pages(struct spglist *free)
1242 while ((m = SLIST_FIRST(free)) != NULL) {
1243 SLIST_REMOVE_HEAD(free, plinks.s.ss);
1244 /* Preserve the page's PG_ZERO setting. */
1245 vm_page_free_toq(m);
1250 * Schedule the specified unused page table page to be freed. Specifically,
1251 * add the page to the specified list of pages that will be released to the
1252 * physical memory manager after the TLB has been updated.
1254 static __inline void
1255 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
1256 boolean_t set_PG_ZERO)
1260 m->flags |= PG_ZERO;
1262 m->flags &= ~PG_ZERO;
1263 SLIST_INSERT_HEAD(free, m, plinks.s.ss);
1267 * Decrements a page table page's wire count, which is used to record the
1268 * number of valid page table entries within the page. If the wire count
1269 * drops to zero, then the page table page is unmapped. Returns TRUE if the
1270 * page table page was unmapped and FALSE otherwise.
1272 static inline boolean_t
1273 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1277 if (m->wire_count == 0) {
1278 _pmap_unwire_l3(pmap, va, m, free);
1285 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1288 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1290 * unmap the page table page
1292 if (m->pindex >= (NUL2E + NUL1E)) {
1296 l0 = pmap_l0(pmap, va);
1297 pmap_load_clear(l0);
1299 } else if (m->pindex >= NUL2E) {
1303 l1 = pmap_l1(pmap, va);
1304 pmap_load_clear(l1);
1310 l2 = pmap_l2(pmap, va);
1311 pmap_load_clear(l2);
1314 pmap_resident_count_dec(pmap, 1);
1315 if (m->pindex < NUL2E) {
1316 /* We just released an l3, unhold the matching l2 */
1317 pd_entry_t *l1, tl1;
1320 l1 = pmap_l1(pmap, va);
1321 tl1 = pmap_load(l1);
1322 l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
1323 pmap_unwire_l3(pmap, va, l2pg, free);
1324 } else if (m->pindex < (NUL2E + NUL1E)) {
1325 /* We just released an l2, unhold the matching l1 */
1326 pd_entry_t *l0, tl0;
1329 l0 = pmap_l0(pmap, va);
1330 tl0 = pmap_load(l0);
1331 l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
1332 pmap_unwire_l3(pmap, va, l1pg, free);
1334 pmap_invalidate_page(pmap, va);
1337 * This is a release store so that the ordinary store unmapping
1338 * the page table page is globally performed before TLB shoot-
1341 atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
1344 * Put page on a list so that it is released after
1345 * *ALL* TLB shootdown is done
1347 pmap_add_delayed_free_list(m, free, TRUE);
1351 * After removing an l3 entry, this routine is used to
1352 * conditionally free the page, and manage the hold/wire counts.
1355 pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
1356 struct spglist *free)
1360 if (va >= VM_MAXUSER_ADDRESS)
1362 KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
1363 mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK);
1364 return (pmap_unwire_l3(pmap, va, mpte, free));
1368 pmap_pinit0(pmap_t pmap)
1371 PMAP_LOCK_INIT(pmap);
1372 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1373 pmap->pm_l0 = kernel_pmap->pm_l0;
1377 pmap_pinit(pmap_t pmap)
1383 * allocate the l0 page
1385 while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
1386 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
1389 l0phys = VM_PAGE_TO_PHYS(l0pt);
1390 pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys);
1392 if ((l0pt->flags & PG_ZERO) == 0)
1393 pagezero(pmap->pm_l0);
1395 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1401 * This routine is called if the desired page table page does not exist.
1403 * If page table page allocation fails, this routine may sleep before
1404 * returning NULL. It sleeps only if a lock pointer was given.
1406 * Note: If a page allocation fails at page table level two or three,
1407 * one or two pages may be held during the wait, only to be released
1408 * afterwards. This conservative approach is easily argued to avoid
1412 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
1414 vm_page_t m, l1pg, l2pg;
1416 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1419 * Allocate a page table page.
1421 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
1422 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
1423 if (lockp != NULL) {
1424 RELEASE_PV_LIST_LOCK(lockp);
1431 * Indicate the need to retry. While waiting, the page table
1432 * page may have been allocated.
1436 if ((m->flags & PG_ZERO) == 0)
1440 * Map the pagetable page into the process address space, if
1441 * it isn't already there.
1444 if (ptepindex >= (NUL2E + NUL1E)) {
1446 vm_pindex_t l0index;
1448 l0index = ptepindex - (NUL2E + NUL1E);
1449 l0 = &pmap->pm_l0[l0index];
1450 pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE);
1452 } else if (ptepindex >= NUL2E) {
1453 vm_pindex_t l0index, l1index;
1454 pd_entry_t *l0, *l1;
1457 l1index = ptepindex - NUL2E;
1458 l0index = l1index >> L0_ENTRIES_SHIFT;
1460 l0 = &pmap->pm_l0[l0index];
1461 tl0 = pmap_load(l0);
1463 /* recurse for allocating page dir */
1464 if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
1467 /* XXX: release mem barrier? */
1468 atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1469 vm_page_free_zero(m);
1473 l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
1477 l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
1478 l1 = &l1[ptepindex & Ln_ADDR_MASK];
1479 pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
1482 vm_pindex_t l0index, l1index;
1483 pd_entry_t *l0, *l1, *l2;
1484 pd_entry_t tl0, tl1;
1486 l1index = ptepindex >> Ln_ENTRIES_SHIFT;
1487 l0index = l1index >> L0_ENTRIES_SHIFT;
1489 l0 = &pmap->pm_l0[l0index];
1490 tl0 = pmap_load(l0);
1492 /* recurse for allocating page dir */
1493 if (_pmap_alloc_l3(pmap, NUL2E + l1index,
1496 atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1497 vm_page_free_zero(m);
1500 tl0 = pmap_load(l0);
1501 l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
1502 l1 = &l1[l1index & Ln_ADDR_MASK];
1504 l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
1505 l1 = &l1[l1index & Ln_ADDR_MASK];
1506 tl1 = pmap_load(l1);
1508 /* recurse for allocating page dir */
1509 if (_pmap_alloc_l3(pmap, NUL2E + l1index,
1512 /* XXX: release mem barrier? */
1513 atomic_subtract_int(
1514 &vm_cnt.v_wire_count, 1);
1515 vm_page_free_zero(m);
1519 l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
1524 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
1525 l2 = &l2[ptepindex & Ln_ADDR_MASK];
1526 pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE);
1530 pmap_resident_count_inc(pmap, 1);
1536 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
1538 vm_pindex_t ptepindex;
1539 pd_entry_t *pde, tpde;
1544 * Calculate pagetable page index
1546 ptepindex = pmap_l2_pindex(va);
1549 * Get the page directory entry
1551 pde = pmap_pde(pmap, va, &lvl);
1554 * If the page table page is mapped, we just increment the hold count,
1555 * and activate it. If we get a level 2 pde it will point to a level 3
1559 tpde = pmap_load(pde);
1561 m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK);
1568 * Here if the pte page isn't mapped, or if it has been deallocated.
1570 m = _pmap_alloc_l3(pmap, ptepindex, lockp);
1571 if (m == NULL && lockp != NULL)
1578 /***************************************************
1579 * Pmap allocation/deallocation routines.
1580 ***************************************************/
1583 * Release any resources held by the given physical map.
1584 * Called when a pmap initialized by pmap_pinit is being released.
1585 * Should only be called if the map contains no valid mappings.
1588 pmap_release(pmap_t pmap)
1592 KASSERT(pmap->pm_stats.resident_count == 0,
1593 ("pmap_release: pmap resident count %ld != 0",
1594 pmap->pm_stats.resident_count));
1596 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0));
1599 atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1600 vm_page_free_zero(m);
1604 kvm_size(SYSCTL_HANDLER_ARGS)
1606 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
1608 return sysctl_handle_long(oidp, &ksize, 0, req);
1610 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
1611 0, 0, kvm_size, "LU", "Size of KVM");
1614 kvm_free(SYSCTL_HANDLER_ARGS)
1616 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
1618 return sysctl_handle_long(oidp, &kfree, 0, req);
1620 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
1621 0, 0, kvm_free, "LU", "Amount of KVM free");
1624 * grow the number of kernel page table entries, if needed
1627 pmap_growkernel(vm_offset_t addr)
1631 pd_entry_t *l0, *l1, *l2;
1633 mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1635 addr = roundup2(addr, L2_SIZE);
1636 if (addr - 1 >= kernel_map->max_offset)
1637 addr = kernel_map->max_offset;
1638 while (kernel_vm_end < addr) {
1639 l0 = pmap_l0(kernel_pmap, kernel_vm_end);
1640 KASSERT(pmap_load(l0) != 0,
1641 ("pmap_growkernel: No level 0 kernel entry"));
1643 l1 = pmap_l0_to_l1(l0, kernel_vm_end);
1644 if (pmap_load(l1) == 0) {
1645 /* We need a new PDP entry */
1646 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
1647 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
1648 VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1650 panic("pmap_growkernel: no memory to grow kernel");
1651 if ((nkpg->flags & PG_ZERO) == 0)
1652 pmap_zero_page(nkpg);
1653 paddr = VM_PAGE_TO_PHYS(nkpg);
1654 pmap_load_store(l1, paddr | L1_TABLE);
1656 continue; /* try again */
1658 l2 = pmap_l1_to_l2(l1, kernel_vm_end);
1659 if ((pmap_load(l2) & ATTR_AF) != 0) {
1660 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1661 if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1662 kernel_vm_end = kernel_map->max_offset;
1668 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
1669 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
1672 panic("pmap_growkernel: no memory to grow kernel");
1673 if ((nkpg->flags & PG_ZERO) == 0)
1674 pmap_zero_page(nkpg);
1675 paddr = VM_PAGE_TO_PHYS(nkpg);
1676 pmap_load_store(l2, paddr | L2_TABLE);
1678 pmap_invalidate_page(kernel_pmap, kernel_vm_end);
1680 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1681 if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1682 kernel_vm_end = kernel_map->max_offset;
1689 /***************************************************
1690 * page management routines.
1691 ***************************************************/
1693 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
1694 CTASSERT(_NPCM == 3);
1695 CTASSERT(_NPCPV == 168);
1697 static __inline struct pv_chunk *
1698 pv_to_chunk(pv_entry_t pv)
1701 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
1704 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
1706 #define PC_FREE0 0xfffffffffffffffful
1707 #define PC_FREE1 0xfffffffffffffffful
1708 #define PC_FREE2 0x000000fffffffffful
1710 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
1714 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
1716 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
1717 "Current number of pv entry chunks");
1718 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
1719 "Current number of pv entry chunks allocated");
1720 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
1721 "Current number of pv entry chunks frees");
1722 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
1723 "Number of times tried to get a chunk page but failed.");
1725 static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
1726 static int pv_entry_spare;
1728 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
1729 "Current number of pv entry frees");
1730 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
1731 "Current number of pv entry allocs");
1732 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
1733 "Current number of pv entries");
1734 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
1735 "Current number of spare pv entries");
1740 * We are in a serious low memory condition. Resort to
1741 * drastic measures to free some pages so we can allocate
1742 * another pv entry chunk.
1744 * Returns NULL if PV entries were reclaimed from the specified pmap.
1746 * We do not, however, unmap 2mpages because subsequent accesses will
1747 * allocate per-page pv entries until repromotion occurs, thereby
1748 * exacerbating the shortage of free pv entries.
1751 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
1754 panic("ARM64TODO: reclaim_pv_chunk");
1758 * free the pv_entry back to the free list
1761 free_pv_entry(pmap_t pmap, pv_entry_t pv)
1763 struct pv_chunk *pc;
1764 int idx, field, bit;
1766 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1767 PV_STAT(atomic_add_long(&pv_entry_frees, 1));
1768 PV_STAT(atomic_add_int(&pv_entry_spare, 1));
1769 PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
1770 pc = pv_to_chunk(pv);
1771 idx = pv - &pc->pc_pventry[0];
1774 pc->pc_map[field] |= 1ul << bit;
1775 if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
1776 pc->pc_map[2] != PC_FREE2) {
1777 /* 98% of the time, pc is already at the head of the list. */
1778 if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
1779 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1780 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1784 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1789 free_pv_chunk(struct pv_chunk *pc)
1793 mtx_lock(&pv_chunks_mutex);
1794 TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1795 mtx_unlock(&pv_chunks_mutex);
1796 PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
1797 PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
1798 PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
1799 /* entire chunk is free, return it */
1800 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
1801 dump_drop_page(m->phys_addr);
1802 vm_page_unwire(m, PQ_NONE);
1807 * Returns a new PV entry, allocating a new PV chunk from the system when
1808 * needed. If this PV chunk allocation fails and a PV list lock pointer was
1809 * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is
1812 * The given PV list lock may be released.
1815 get_pv_entry(pmap_t pmap, struct rwlock **lockp)
1819 struct pv_chunk *pc;
1822 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1823 PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
1825 pc = TAILQ_FIRST(&pmap->pm_pvchunk);
1827 for (field = 0; field < _NPCM; field++) {
1828 if (pc->pc_map[field]) {
1829 bit = ffsl(pc->pc_map[field]) - 1;
1833 if (field < _NPCM) {
1834 pv = &pc->pc_pventry[field * 64 + bit];
1835 pc->pc_map[field] &= ~(1ul << bit);
1836 /* If this was the last item, move it to tail */
1837 if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
1838 pc->pc_map[2] == 0) {
1839 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1840 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
1843 PV_STAT(atomic_add_long(&pv_entry_count, 1));
1844 PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
1848 /* No free items, allocate another chunk */
1849 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
1852 if (lockp == NULL) {
1853 PV_STAT(pc_chunk_tryfail++);
1856 m = reclaim_pv_chunk(pmap, lockp);
1860 PV_STAT(atomic_add_int(&pc_chunk_count, 1));
1861 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
1862 dump_add_page(m->phys_addr);
1863 pc = (void *)PHYS_TO_DMAP(m->phys_addr);
1865 pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */
1866 pc->pc_map[1] = PC_FREE1;
1867 pc->pc_map[2] = PC_FREE2;
1868 mtx_lock(&pv_chunks_mutex);
1869 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1870 mtx_unlock(&pv_chunks_mutex);
1871 pv = &pc->pc_pventry[0];
1872 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1873 PV_STAT(atomic_add_long(&pv_entry_count, 1));
1874 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
1879 * First find and then remove the pv entry for the specified pmap and virtual
1880 * address from the specified pv list. Returns the pv entry if found and NULL
1881 * otherwise. This operation can be performed on pv lists for either 4KB or
1882 * 2MB page mappings.
1884 static __inline pv_entry_t
1885 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1889 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
1890 if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
1891 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
1900 * First find and then destroy the pv entry for the specified pmap and virtual
1901 * address. This operation can be performed on pv lists for either 4KB or 2MB
1905 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1909 pv = pmap_pvh_remove(pvh, pmap, va);
1910 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
1911 free_pv_entry(pmap, pv);
1915 * Conditionally create the PV entry for a 4KB page mapping if the required
1916 * memory can be allocated without resorting to reclamation.
1919 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
1920 struct rwlock **lockp)
1924 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1925 /* Pass NULL instead of the lock pointer to disable reclamation. */
1926 if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
1928 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1929 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
1937 * pmap_remove_l3: do the things to unmap a page in a process
1940 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
1941 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
1946 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1947 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
1948 cpu_dcache_wb_range(va, L3_SIZE);
1949 old_l3 = pmap_load_clear(l3);
1951 pmap_invalidate_page(pmap, va);
1952 if (old_l3 & ATTR_SW_WIRED)
1953 pmap->pm_stats.wired_count -= 1;
1954 pmap_resident_count_dec(pmap, 1);
1955 if (old_l3 & ATTR_SW_MANAGED) {
1956 m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
1957 if (pmap_page_dirty(old_l3))
1959 if (old_l3 & ATTR_AF)
1960 vm_page_aflag_set(m, PGA_REFERENCED);
1961 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1962 pmap_pvh_free(&m->md, pmap, va);
1964 return (pmap_unuse_l3(pmap, va, l2e, free));
1968 * Remove the given range of addresses from the specified map.
1970 * It is assumed that the start and end are properly
1971 * rounded to the page size.
1974 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1976 struct rwlock *lock;
1977 vm_offset_t va, va_next;
1978 pd_entry_t *l0, *l1, *l2;
1979 pt_entry_t l3_paddr, *l3;
1980 struct spglist free;
1984 * Perform an unsynchronized read. This is, however, safe.
1986 if (pmap->pm_stats.resident_count == 0)
1995 for (; sva < eva; sva = va_next) {
1997 if (pmap->pm_stats.resident_count == 0)
2000 l0 = pmap_l0(pmap, sva);
2001 if (pmap_load(l0) == 0) {
2002 va_next = (sva + L0_SIZE) & ~L0_OFFSET;
2008 l1 = pmap_l0_to_l1(l0, sva);
2009 if (pmap_load(l1) == 0) {
2010 va_next = (sva + L1_SIZE) & ~L1_OFFSET;
2017 * Calculate index for next page table.
2019 va_next = (sva + L2_SIZE) & ~L2_OFFSET;
2023 l2 = pmap_l1_to_l2(l1, sva);
2027 l3_paddr = pmap_load(l2);
2029 if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) {
2030 KASSERT((l3_paddr & ATTR_SW_MANAGED) == 0,
2031 ("%s: TODO: Demote managed pages", __func__));
2032 if (pmap_demote_l2_locked(pmap, l2, sva & ~L2_OFFSET,
2035 l3_paddr = pmap_load(l2);
2039 * Weed out invalid mappings.
2041 if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE)
2045 * Limit our scan to either the end of the va represented
2046 * by the current page table page, or to the end of the
2047 * range being removed.
2053 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
2056 panic("l3 == NULL");
2057 if (pmap_load(l3) == 0) {
2058 if (va != va_next) {
2059 pmap_invalidate_range(pmap, va, sva);
2066 if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free,
2073 pmap_invalidate_range(pmap, va, sva);
2078 pmap_invalidate_all(pmap);
2080 pmap_free_zero_pages(&free);
2084 * Routine: pmap_remove_all
2086 * Removes this physical page from
2087 * all physical maps in which it resides.
2088 * Reflects back modify bits to the pager.
2091 * Original versions of this routine were very
2092 * inefficient because they iteratively called
2093 * pmap_remove (slow...)
2097 pmap_remove_all(vm_page_t m)
2101 struct rwlock *lock;
2102 pd_entry_t *pde, tpde;
2103 pt_entry_t *pte, tpte;
2104 struct spglist free;
2107 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2108 ("pmap_remove_all: page %p is not managed", m));
2110 lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2113 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
2115 if (!PMAP_TRYLOCK(pmap)) {
2116 md_gen = m->md.pv_gen;
2120 if (md_gen != m->md.pv_gen) {
2126 pmap_resident_count_dec(pmap, 1);
2128 pde = pmap_pde(pmap, pv->pv_va, &lvl);
2129 KASSERT(pde != NULL,
2130 ("pmap_remove_all: no page directory entry found"));
2132 ("pmap_remove_all: invalid pde level %d", lvl));
2133 tpde = pmap_load(pde);
2135 pte = pmap_l2_to_l3(pde, pv->pv_va);
2136 tpte = pmap_load(pte);
2137 if (pmap_is_current(pmap) &&
2138 pmap_l3_valid_cacheable(tpte))
2139 cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
2140 pmap_load_clear(pte);
2142 pmap_invalidate_page(pmap, pv->pv_va);
2143 if (tpte & ATTR_SW_WIRED)
2144 pmap->pm_stats.wired_count--;
2145 if ((tpte & ATTR_AF) != 0)
2146 vm_page_aflag_set(m, PGA_REFERENCED);
2149 * Update the vm_page_t clean and reference bits.
2151 if (pmap_page_dirty(tpte))
2153 pmap_unuse_l3(pmap, pv->pv_va, tpde, &free);
2154 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
2156 free_pv_entry(pmap, pv);
2159 vm_page_aflag_clear(m, PGA_WRITEABLE);
2161 pmap_free_zero_pages(&free);
2165 * Set the physical protection on the
2166 * specified range of this map as requested.
2169 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
2171 vm_offset_t va, va_next;
2172 pd_entry_t *l0, *l1, *l2;
2173 pt_entry_t *l3p, l3;
2175 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
2176 pmap_remove(pmap, sva, eva);
2180 if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE)
2184 for (; sva < eva; sva = va_next) {
2186 l0 = pmap_l0(pmap, sva);
2187 if (pmap_load(l0) == 0) {
2188 va_next = (sva + L0_SIZE) & ~L0_OFFSET;
2194 l1 = pmap_l0_to_l1(l0, sva);
2195 if (pmap_load(l1) == 0) {
2196 va_next = (sva + L1_SIZE) & ~L1_OFFSET;
2202 va_next = (sva + L2_SIZE) & ~L2_OFFSET;
2206 l2 = pmap_l1_to_l2(l1, sva);
2207 if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE)
2214 for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
2216 l3 = pmap_load(l3p);
2217 if (pmap_l3_valid(l3)) {
2218 pmap_set(l3p, ATTR_AP(ATTR_AP_RO));
2220 /* XXX: Use pmap_invalidate_range */
2221 pmap_invalidate_page(pmap, va);
2227 /* TODO: Only invalidate entries we are touching */
2228 pmap_invalidate_all(pmap);
2232 * Performs a break-before-make update of a pmap entry. This is needed when
2233 * either promoting or demoting pages to ensure the TLB doesn't get into an
2234 * inconsistent state.
2237 pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte,
2242 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2245 * Ensure we don't get switched out with the page table in an
2246 * inconsistent state. We also need to ensure no interrupts fire
2247 * as they may make use of an address we are about to invalidate.
2249 intr = intr_disable();
2252 /* Clear the old mapping */
2253 pmap_load_clear(pte);
2255 pmap_invalidate_page(pmap, va);
2257 /* Create the new mapping */
2258 pmap_load_store(pte, newpte);
2266 * Tries to promote the 512, contiguous 4KB page mappings that are within a
2267 * single level 2 table entry to a single 2MB page mapping. For promotion
2268 * to occur, two conditions must be met: (1) the 4KB page mappings must map
2269 * aligned, contiguous physical memory and (2) the 4KB page mappings must have
2270 * identical characteristics.
2273 pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
2274 struct rwlock **lockp)
2276 pt_entry_t *firstl3, *l3, newl2, oldl3, pa;
2279 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2281 firstl3 = (pt_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
2282 newl2 = pmap_load(firstl3);
2283 /* Ignore managed pages for now */
2284 if ((newl2 & ATTR_SW_MANAGED) != 0)
2287 /* Check the alingment is valid */
2288 if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0)
2291 pa = newl2 + L2_SIZE - PAGE_SIZE;
2292 for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) {
2293 oldl3 = pmap_load(l3);
2299 newl2 &= ~ATTR_DESCR_MASK;
2303 * Ensure we don't get switched out with the page table in an
2304 * inconsistent state. We also need to ensure no interrupts fire
2305 * as they may make use of an address we are about to invalidate.
2307 intr = intr_disable();
2310 /* Clear the old mapping */
2311 pmap_load_clear(l2);
2313 pmap_invalidate_range(pmap, rounddown2(va, L2_SIZE),
2314 roundup2(va, L2_SIZE));
2316 /* Create the new mapping */
2317 pmap_load_store(l2, newl2);
2325 * Insert the given physical page (p) at
2326 * the specified virtual address (v) in the
2327 * target physical map with the protection requested.
2329 * If specified, the page will be wired down, meaning
2330 * that the related pte can not be reclaimed.
2332 * NB: This is the only routine which MAY NOT lazy-evaluate
2333 * or lose information. That is, this routine must actually
2334 * insert this page into the given map NOW.
2337 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
2338 u_int flags, int8_t psind __unused)
2340 struct rwlock *lock;
2342 pt_entry_t new_l3, orig_l3;
2343 pt_entry_t *l2, *l3;
2345 vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa;
2346 vm_page_t mpte, om, l1_m, l2_m, l3_m;
2350 va = trunc_page(va);
2351 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
2352 VM_OBJECT_ASSERT_LOCKED(m->object);
2353 pa = VM_PAGE_TO_PHYS(m);
2354 new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
2356 if ((prot & VM_PROT_WRITE) == 0)
2357 new_l3 |= ATTR_AP(ATTR_AP_RO);
2358 if ((flags & PMAP_ENTER_WIRED) != 0)
2359 new_l3 |= ATTR_SW_WIRED;
2360 if ((va >> 63) == 0)
2361 new_l3 |= ATTR_AP(ATTR_AP_USER);
2363 CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
2370 pde = pmap_pde(pmap, va, &lvl);
2371 if (pde != NULL && lvl == 1) {
2372 l2 = pmap_l1_to_l2(pde, va);
2373 if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK &&
2374 (l3 = pmap_demote_l2_locked(pmap, l2, va, &lock)) != NULL) {
2375 if (va < VM_MAXUSER_ADDRESS) {
2376 mpte = PHYS_TO_VM_PAGE(
2377 pmap_load(l2) & ~ATTR_MASK);
2384 if (va < VM_MAXUSER_ADDRESS) {
2385 nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
2386 mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
2387 if (mpte == NULL && nosleep) {
2388 CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
2392 return (KERN_RESOURCE_SHORTAGE);
2394 pde = pmap_pde(pmap, va, &lvl);
2395 KASSERT(pde != NULL,
2396 ("pmap_enter: Invalid page entry, va: 0x%lx", va));
2398 ("pmap_enter: Invalid level %d", lvl));
2400 l3 = pmap_l2_to_l3(pde, va);
2402 pde = pmap_pde(pmap, va, &lvl);
2404 * If we get a level 2 pde it must point to a level 3 entry
2405 * otherwise we will need to create the intermediate tables
2411 /* Get the l0 pde to update */
2412 pde = pmap_l0(pmap, va);
2413 KASSERT(pde != NULL, ("..."));
2415 l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2416 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
2419 panic("pmap_enter: l1 pte_m == NULL");
2420 if ((l1_m->flags & PG_ZERO) == 0)
2421 pmap_zero_page(l1_m);
2423 l1_pa = VM_PAGE_TO_PHYS(l1_m);
2424 pmap_load_store(pde, l1_pa | L0_TABLE);
2428 /* Get the l1 pde to update */
2429 pde = pmap_l1_to_l2(pde, va);
2430 KASSERT(pde != NULL, ("..."));
2432 l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2433 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
2436 panic("pmap_enter: l2 pte_m == NULL");
2437 if ((l2_m->flags & PG_ZERO) == 0)
2438 pmap_zero_page(l2_m);
2440 l2_pa = VM_PAGE_TO_PHYS(l2_m);
2441 pmap_load_store(pde, l2_pa | L1_TABLE);
2445 /* Get the l2 pde to update */
2446 pde = pmap_l1_to_l2(pde, va);
2448 l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2449 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
2452 panic("pmap_enter: l3 pte_m == NULL");
2453 if ((l3_m->flags & PG_ZERO) == 0)
2454 pmap_zero_page(l3_m);
2456 l3_pa = VM_PAGE_TO_PHYS(l3_m);
2457 pmap_load_store(pde, l3_pa | L2_TABLE);
2462 l3 = pmap_l2_to_l3(pde, va);
2463 pmap_invalidate_page(pmap, va);
2468 orig_l3 = pmap_load(l3);
2469 opa = orig_l3 & ~ATTR_MASK;
2472 * Is the specified virtual address already mapped?
2474 if (pmap_l3_valid(orig_l3)) {
2476 * Wiring change, just update stats. We don't worry about
2477 * wiring PT pages as they remain resident as long as there
2478 * are valid mappings in them. Hence, if a user page is wired,
2479 * the PT page will be also.
2481 if ((flags & PMAP_ENTER_WIRED) != 0 &&
2482 (orig_l3 & ATTR_SW_WIRED) == 0)
2483 pmap->pm_stats.wired_count++;
2484 else if ((flags & PMAP_ENTER_WIRED) == 0 &&
2485 (orig_l3 & ATTR_SW_WIRED) != 0)
2486 pmap->pm_stats.wired_count--;
2489 * Remove the extra PT page reference.
2493 KASSERT(mpte->wire_count > 0,
2494 ("pmap_enter: missing reference to page table page,"
2499 * Has the physical page changed?
2503 * No, might be a protection or wiring change.
2505 if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
2506 new_l3 |= ATTR_SW_MANAGED;
2507 if ((new_l3 & ATTR_AP(ATTR_AP_RW)) ==
2508 ATTR_AP(ATTR_AP_RW)) {
2509 vm_page_aflag_set(m, PGA_WRITEABLE);
2515 /* Flush the cache, there might be uncommitted data in it */
2516 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
2517 cpu_dcache_wb_range(va, L3_SIZE);
2520 * Increment the counters.
2522 if ((new_l3 & ATTR_SW_WIRED) != 0)
2523 pmap->pm_stats.wired_count++;
2524 pmap_resident_count_inc(pmap, 1);
2527 * Enter on the PV list if part of our managed memory.
2529 if ((m->oflags & VPO_UNMANAGED) == 0) {
2530 new_l3 |= ATTR_SW_MANAGED;
2531 pv = get_pv_entry(pmap, &lock);
2533 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
2534 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
2536 if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
2537 vm_page_aflag_set(m, PGA_WRITEABLE);
2541 * Update the L3 entry.
2545 orig_l3 = pmap_load_store(l3, new_l3);
2546 opa = orig_l3 & ~ATTR_MASK;
2549 if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
2550 om = PHYS_TO_VM_PAGE(opa);
2551 if (pmap_page_dirty(orig_l3))
2553 if ((orig_l3 & ATTR_AF) != 0)
2554 vm_page_aflag_set(om, PGA_REFERENCED);
2555 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
2556 pmap_pvh_free(&om->md, pmap, va);
2558 } else if (pmap_page_dirty(orig_l3)) {
2559 if ((orig_l3 & ATTR_SW_MANAGED) != 0)
2563 pmap_load_store(l3, new_l3);
2567 pmap_invalidate_page(pmap, va);
2569 if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
2570 cpu_icache_sync_range(va, PAGE_SIZE);
2572 /* XXX: Not yet, not all demotions are handled */
2574 if ((mpte == NULL || mpte->wire_count == NL3PG) &&
2575 pmap_superpages_enabled() && (m->flags & PG_FICTITIOUS) == 0 &&
2576 vm_reserv_level_iffullpop(m) == 0) {
2577 KASSERT(lvl == 2, ("Invalid pde level %d", lvl));
2578 pmap_promote_l2(pmap, pde, va, &lock);
2585 return (KERN_SUCCESS);
2589 * Maps a sequence of resident pages belonging to the same object.
2590 * The sequence begins with the given page m_start. This page is
2591 * mapped at the given virtual address start. Each subsequent page is
2592 * mapped at a virtual address that is offset from start by the same
2593 * amount as the page is offset from m_start within the object. The
2594 * last page in the sequence is the page with the largest offset from
2595 * m_start that can be mapped at a virtual address less than the given
2596 * virtual address end. Not every virtual page between start and end
2597 * is mapped; only those for which a resident page exists with the
2598 * corresponding offset from m_start are mapped.
2601 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2602 vm_page_t m_start, vm_prot_t prot)
2604 struct rwlock *lock;
2607 vm_pindex_t diff, psize;
2609 VM_OBJECT_ASSERT_LOCKED(m_start->object);
2611 psize = atop(end - start);
2616 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2617 va = start + ptoa(diff);
2618 mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock);
2619 m = TAILQ_NEXT(m, listq);
2627 * this code makes some *MAJOR* assumptions:
2628 * 1. Current pmap & pmap exists.
2631 * 4. No page table pages.
2632 * but is *MUCH* faster than pmap_enter...
2636 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
2638 struct rwlock *lock;
2642 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
2649 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
2650 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
2652 struct spglist free;
2658 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
2659 (m->oflags & VPO_UNMANAGED) != 0,
2660 ("pmap_enter_quick_locked: managed mapping within the clean submap"));
2661 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2663 CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
2665 * In the case that a page table page is not
2666 * resident, we are creating it here.
2668 if (va < VM_MAXUSER_ADDRESS) {
2669 vm_pindex_t l2pindex;
2672 * Calculate pagetable page index
2674 l2pindex = pmap_l2_pindex(va);
2675 if (mpte && (mpte->pindex == l2pindex)) {
2681 pde = pmap_pde(pmap, va, &lvl);
2684 * If the page table page is mapped, we just increment
2685 * the hold count, and activate it. Otherwise, we
2686 * attempt to allocate a page table page. If this
2687 * attempt fails, we don't retry. Instead, we give up.
2689 if (lvl == 2 && pmap_load(pde) != 0) {
2691 PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK);
2695 * Pass NULL instead of the PV list lock
2696 * pointer, because we don't intend to sleep.
2698 mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
2703 l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
2704 l3 = &l3[pmap_l3_index(va)];
2707 pde = pmap_pde(kernel_pmap, va, &lvl);
2708 KASSERT(pde != NULL,
2709 ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx",
2712 ("pmap_enter_quick_locked: Invalid level %d", lvl));
2713 l3 = pmap_l2_to_l3(pde, va);
2716 if (pmap_load(l3) != 0) {
2725 * Enter on the PV list if part of our managed memory.
2727 if ((m->oflags & VPO_UNMANAGED) == 0 &&
2728 !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
2731 if (pmap_unwire_l3(pmap, va, mpte, &free)) {
2732 pmap_invalidate_page(pmap, va);
2733 pmap_free_zero_pages(&free);
2741 * Increment counters
2743 pmap_resident_count_inc(pmap, 1);
2745 pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
2746 ATTR_AP(ATTR_AP_RW) | L3_PAGE;
2749 * Now validate mapping with RO protection
2751 if ((m->oflags & VPO_UNMANAGED) == 0)
2752 pa |= ATTR_SW_MANAGED;
2753 pmap_load_store(l3, pa);
2755 pmap_invalidate_page(pmap, va);
2760 * This code maps large physical mmap regions into the
2761 * processor address space. Note that some shortcuts
2762 * are taken, but the code works.
2765 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
2766 vm_pindex_t pindex, vm_size_t size)
2769 VM_OBJECT_ASSERT_WLOCKED(object);
2770 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2771 ("pmap_object_init_pt: non-device object"));
2775 * Clear the wired attribute from the mappings for the specified range of
2776 * addresses in the given pmap. Every valid mapping within that range
2777 * must have the wired attribute set. In contrast, invalid mappings
2778 * cannot have the wired attribute set, so they are ignored.
2780 * The wired attribute of the page table entry is not a hardware feature,
2781 * so there is no need to invalidate any TLB entries.
2784 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2786 vm_offset_t va_next;
2787 pd_entry_t *l0, *l1, *l2;
2791 for (; sva < eva; sva = va_next) {
2792 l0 = pmap_l0(pmap, sva);
2793 if (pmap_load(l0) == 0) {
2794 va_next = (sva + L0_SIZE) & ~L0_OFFSET;
2800 l1 = pmap_l0_to_l1(l0, sva);
2801 if (pmap_load(l1) == 0) {
2802 va_next = (sva + L1_SIZE) & ~L1_OFFSET;
2808 va_next = (sva + L2_SIZE) & ~L2_OFFSET;
2812 l2 = pmap_l1_to_l2(l1, sva);
2813 if (pmap_load(l2) == 0)
2818 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
2820 if (pmap_load(l3) == 0)
2822 if ((pmap_load(l3) & ATTR_SW_WIRED) == 0)
2823 panic("pmap_unwire: l3 %#jx is missing "
2824 "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3));
2827 * PG_W must be cleared atomically. Although the pmap
2828 * lock synchronizes access to PG_W, another processor
2829 * could be setting PG_M and/or PG_A concurrently.
2831 atomic_clear_long(l3, ATTR_SW_WIRED);
2832 pmap->pm_stats.wired_count--;
2839 * Copy the range specified by src_addr/len
2840 * from the source map to the range dst_addr/len
2841 * in the destination map.
2843 * This routine is only advisory and need not do anything.
2847 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2848 vm_offset_t src_addr)
2853 * pmap_zero_page zeros the specified hardware page by mapping
2854 * the page into KVM and using bzero to clear its contents.
2857 pmap_zero_page(vm_page_t m)
2859 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2861 pagezero((void *)va);
2865 * pmap_zero_page_area zeros the specified hardware page by mapping
2866 * the page into KVM and using bzero to clear its contents.
2868 * off and size may not cover an area beyond a single hardware page.
2871 pmap_zero_page_area(vm_page_t m, int off, int size)
2873 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2875 if (off == 0 && size == PAGE_SIZE)
2876 pagezero((void *)va);
2878 bzero((char *)va + off, size);
2882 * pmap_zero_page_idle zeros the specified hardware page by mapping
2883 * the page into KVM and using bzero to clear its contents. This
2884 * is intended to be called from the vm_pagezero process only and
2888 pmap_zero_page_idle(vm_page_t m)
2890 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2892 pagezero((void *)va);
2896 * pmap_copy_page copies the specified (machine independent)
2897 * page by mapping the page into virtual memory and using
2898 * bcopy to copy the page, one machine dependent page at a
2902 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2904 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
2905 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
2907 pagecopy((void *)src, (void *)dst);
2910 int unmapped_buf_allowed = 1;
2913 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2914 vm_offset_t b_offset, int xfersize)
2918 vm_paddr_t p_a, p_b;
2919 vm_offset_t a_pg_offset, b_pg_offset;
2922 while (xfersize > 0) {
2923 a_pg_offset = a_offset & PAGE_MASK;
2924 m_a = ma[a_offset >> PAGE_SHIFT];
2925 p_a = m_a->phys_addr;
2926 b_pg_offset = b_offset & PAGE_MASK;
2927 m_b = mb[b_offset >> PAGE_SHIFT];
2928 p_b = m_b->phys_addr;
2929 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2930 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2931 if (__predict_false(!PHYS_IN_DMAP(p_a))) {
2932 panic("!DMAP a %lx", p_a);
2934 a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
2936 if (__predict_false(!PHYS_IN_DMAP(p_b))) {
2937 panic("!DMAP b %lx", p_b);
2939 b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
2941 bcopy(a_cp, b_cp, cnt);
2949 pmap_quick_enter_page(vm_page_t m)
2952 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
2956 pmap_quick_remove_page(vm_offset_t addr)
2961 * Returns true if the pmap's pv is one of the first
2962 * 16 pvs linked to from this page. This count may
2963 * be changed upwards or downwards in the future; it
2964 * is only necessary that true be returned for a small
2965 * subset of pmaps for proper page aging.
2968 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2970 struct rwlock *lock;
2975 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2976 ("pmap_page_exists_quick: page %p is not managed", m));
2978 lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2980 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2981 if (PV_PMAP(pv) == pmap) {
2994 * pmap_page_wired_mappings:
2996 * Return the number of managed mappings to the given physical page
3000 pmap_page_wired_mappings(vm_page_t m)
3002 struct rwlock *lock;
3006 int count, lvl, md_gen;
3008 if ((m->oflags & VPO_UNMANAGED) != 0)
3010 lock = VM_PAGE_TO_PV_LIST_LOCK(m);
3014 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
3016 if (!PMAP_TRYLOCK(pmap)) {
3017 md_gen = m->md.pv_gen;
3021 if (md_gen != m->md.pv_gen) {
3026 pte = pmap_pte(pmap, pv->pv_va, &lvl);
3027 if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0)
3036 * Destroy all managed, non-wired mappings in the given user-space
3037 * pmap. This pmap cannot be active on any processor besides the
3040 * This function cannot be applied to the kernel pmap. Moreover, it
3041 * is not intended for general use. It is only to be used during
3042 * process termination. Consequently, it can be implemented in ways
3043 * that make it faster than pmap_remove(). First, it can more quickly
3044 * destroy mappings by iterating over the pmap's collection of PV
3045 * entries, rather than searching the page table. Second, it doesn't
3046 * have to test and clear the page table entries atomically, because
3047 * no processor is currently accessing the user address space. In
3048 * particular, a page table entry's dirty bit won't change state once
3049 * this function starts.
3052 pmap_remove_pages(pmap_t pmap)
3055 pt_entry_t *pte, tpte;
3056 struct spglist free;
3059 struct pv_chunk *pc, *npc;
3060 struct rwlock *lock;
3062 uint64_t inuse, bitmask;
3063 int allfree, field, freed, idx, lvl;
3070 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
3073 for (field = 0; field < _NPCM; field++) {
3074 inuse = ~pc->pc_map[field] & pc_freemask[field];
3075 while (inuse != 0) {
3076 bit = ffsl(inuse) - 1;
3077 bitmask = 1UL << bit;
3078 idx = field * 64 + bit;
3079 pv = &pc->pc_pventry[idx];
3082 pde = pmap_pde(pmap, pv->pv_va, &lvl);
3083 KASSERT(pde != NULL,
3084 ("Attempting to remove an unmapped page"));
3086 ("Invalid page directory level: %d", lvl));
3088 pte = pmap_l2_to_l3(pde, pv->pv_va);
3089 KASSERT(pte != NULL,
3090 ("Attempting to remove an unmapped page"));
3092 tpte = pmap_load(pte);
3095 * We cannot remove wired pages from a process' mapping at this time
3097 if (tpte & ATTR_SW_WIRED) {
3102 pa = tpte & ~ATTR_MASK;
3104 m = PHYS_TO_VM_PAGE(pa);
3105 KASSERT(m->phys_addr == pa,
3106 ("vm_page_t %p phys_addr mismatch %016jx %016jx",
3107 m, (uintmax_t)m->phys_addr,
3110 KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
3111 m < &vm_page_array[vm_page_array_size],
3112 ("pmap_remove_pages: bad pte %#jx",
3115 /* XXX: assumes tpte is level 3 */
3116 if (pmap_is_current(pmap) &&
3117 pmap_l3_valid_cacheable(tpte))
3118 cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
3119 pmap_load_clear(pte);
3121 pmap_invalidate_page(pmap, pv->pv_va);
3124 * Update the vm_page_t clean/reference bits.
3126 if ((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
3129 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
3132 pc->pc_map[field] |= bitmask;
3134 pmap_resident_count_dec(pmap, 1);
3135 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
3138 pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde),
3143 PV_STAT(atomic_add_long(&pv_entry_frees, freed));
3144 PV_STAT(atomic_add_int(&pv_entry_spare, freed));
3145 PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
3147 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
3151 pmap_invalidate_all(pmap);
3155 pmap_free_zero_pages(&free);
3159 * This is used to check if a page has been accessed or modified. As we
3160 * don't have a bit to see if it has been modified we have to assume it
3161 * has been if the page is read/write.
3164 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
3166 struct rwlock *lock;
3168 pt_entry_t *pte, mask, value;
3174 lock = VM_PAGE_TO_PV_LIST_LOCK(m);
3177 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
3179 if (!PMAP_TRYLOCK(pmap)) {
3180 md_gen = m->md.pv_gen;
3184 if (md_gen != m->md.pv_gen) {
3189 pte = pmap_pte(pmap, pv->pv_va, &lvl);
3191 ("pmap_page_test_mappings: Invalid level %d", lvl));
3195 mask |= ATTR_AP_RW_BIT;
3196 value |= ATTR_AP(ATTR_AP_RW);
3199 mask |= ATTR_AF | ATTR_DESCR_MASK;
3200 value |= ATTR_AF | L3_PAGE;
3202 rv = (pmap_load(pte) & mask) == value;
3215 * Return whether or not the specified physical page was modified
3216 * in any physical maps.
3219 pmap_is_modified(vm_page_t m)
3222 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3223 ("pmap_is_modified: page %p is not managed", m));
3226 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
3227 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE
3228 * is clear, no PTEs can have PG_M set.
3230 VM_OBJECT_ASSERT_WLOCKED(m->object);
3231 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
3233 return (pmap_page_test_mappings(m, FALSE, TRUE));
3237 * pmap_is_prefaultable:
3239 * Return whether or not the specified virtual address is eligible
3243 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
3251 pte = pmap_pte(pmap, addr, &lvl);
3252 if (pte != NULL && pmap_load(pte) != 0) {
3260 * pmap_is_referenced:
3262 * Return whether or not the specified physical page was referenced
3263 * in any physical maps.
3266 pmap_is_referenced(vm_page_t m)
3269 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3270 ("pmap_is_referenced: page %p is not managed", m));
3271 return (pmap_page_test_mappings(m, TRUE, FALSE));
3275 * Clear the write and modified bits in each of the given page's mappings.
3278 pmap_remove_write(vm_page_t m)
3281 struct rwlock *lock;
3283 pt_entry_t oldpte, *pte;
3286 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3287 ("pmap_remove_write: page %p is not managed", m));
3290 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
3291 * set by another thread while the object is locked. Thus,
3292 * if PGA_WRITEABLE is clear, no page table entries need updating.
3294 VM_OBJECT_ASSERT_WLOCKED(m->object);
3295 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
3297 lock = VM_PAGE_TO_PV_LIST_LOCK(m);
3300 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
3302 if (!PMAP_TRYLOCK(pmap)) {
3303 md_gen = m->md.pv_gen;
3307 if (md_gen != m->md.pv_gen) {
3313 pte = pmap_pte(pmap, pv->pv_va, &lvl);
3315 oldpte = pmap_load(pte);
3316 if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) {
3317 if (!atomic_cmpset_long(pte, oldpte,
3318 oldpte | ATTR_AP(ATTR_AP_RO)))
3320 if ((oldpte & ATTR_AF) != 0)
3322 pmap_invalidate_page(pmap, pv->pv_va);
3327 vm_page_aflag_clear(m, PGA_WRITEABLE);
3330 static __inline boolean_t
3331 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
3337 #define PMAP_TS_REFERENCED_MAX 5
3340 * pmap_ts_referenced:
3342 * Return a count of reference bits for a page, clearing those bits.
3343 * It is not necessary for every reference bit to be cleared, but it
3344 * is necessary that 0 only be returned when there are truly no
3345 * reference bits set.
3347 * XXX: The exact number of bits to check and clear is a matter that
3348 * should be tested and standardized at some point in the future for
3349 * optimal aging of shared pages.
3352 pmap_ts_referenced(vm_page_t m)
3356 struct rwlock *lock;
3357 pd_entry_t *pde, tpde;
3358 pt_entry_t *pte, tpte;
3360 int cleared, md_gen, not_cleared, lvl;
3361 struct spglist free;
3363 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3364 ("pmap_ts_referenced: page %p is not managed", m));
3367 pa = VM_PAGE_TO_PHYS(m);
3368 lock = PHYS_TO_PV_LIST_LOCK(pa);
3372 if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
3379 if (!PMAP_TRYLOCK(pmap)) {
3380 md_gen = m->md.pv_gen;
3384 if (md_gen != m->md.pv_gen) {
3389 pde = pmap_pde(pmap, pv->pv_va, &lvl);
3390 KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found"));
3392 ("pmap_ts_referenced: invalid pde level %d", lvl));
3393 tpde = pmap_load(pde);
3394 KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE,
3395 ("pmap_ts_referenced: found an invalid l2 table"));
3396 pte = pmap_l2_to_l3(pde, pv->pv_va);
3397 tpte = pmap_load(pte);
3398 if ((tpte & ATTR_AF) != 0) {
3399 if (safe_to_clear_referenced(pmap, tpte)) {
3401 * TODO: We don't handle the access flag
3402 * at all. We need to be able to set it in
3403 * the exception handler.
3405 panic("ARM64TODO: safe_to_clear_referenced\n");
3406 } else if ((tpte & ATTR_SW_WIRED) == 0) {
3408 * Wired pages cannot be paged out so
3409 * doing accessed bit emulation for
3410 * them is wasted effort. We do the
3411 * hard work for unwired pages only.
3413 pmap_remove_l3(pmap, pte, pv->pv_va, tpde,
3415 pmap_invalidate_page(pmap, pv->pv_va);
3420 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
3421 ("inconsistent pv lock %p %p for page %p",
3422 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
3427 /* Rotate the PV list if it has more than one entry. */
3428 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
3429 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
3430 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
3433 } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
3434 not_cleared < PMAP_TS_REFERENCED_MAX);
3437 pmap_free_zero_pages(&free);
3438 return (cleared + not_cleared);
3442 * Apply the given advice to the specified range of addresses within the
3443 * given pmap. Depending on the advice, clear the referenced and/or
3444 * modified flags in each mapping and set the mapped page's dirty field.
3447 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
3452 * Clear the modify bits on the specified physical page.
3455 pmap_clear_modify(vm_page_t m)
3458 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3459 ("pmap_clear_modify: page %p is not managed", m));
3460 VM_OBJECT_ASSERT_WLOCKED(m->object);
3461 KASSERT(!vm_page_xbusied(m),
3462 ("pmap_clear_modify: page %p is exclusive busied", m));
3465 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
3466 * If the object containing the page is locked and the page is not
3467 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
3469 if ((m->aflags & PGA_WRITEABLE) == 0)
3472 /* ARM64TODO: We lack support for tracking if a page is modified */
3476 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
3479 return ((void *)PHYS_TO_DMAP(pa));
3483 pmap_unmapbios(vm_paddr_t pa, vm_size_t size)
3488 * Sets the memory attribute for the specified page.
3491 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
3494 m->md.pv_memattr = ma;
3497 * If "m" is a normal page, update its direct mapping. This update
3498 * can be relied upon to perform any cache operations that are
3499 * required for data coherence.
3501 if ((m->flags & PG_FICTITIOUS) == 0 &&
3502 pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE,
3503 m->md.pv_memattr) != 0)
3504 panic("memory attribute change on the direct map failed");
3508 * Changes the specified virtual address range's memory type to that given by
3509 * the parameter "mode". The specified virtual address range must be
3510 * completely contained within either the direct map or the kernel map. If
3511 * the virtual address range is contained within the kernel map, then the
3512 * memory type for each of the corresponding ranges of the direct map is also
3513 * changed. (The corresponding ranges of the direct map are those ranges that
3514 * map the same physical pages as the specified virtual address range.) These
3515 * changes to the direct map are necessary because Intel describes the
3516 * behavior of their processors as "undefined" if two or more mappings to the
3517 * same physical page have different memory types.
3519 * Returns zero if the change completed successfully, and either EINVAL or
3520 * ENOMEM if the change failed. Specifically, EINVAL is returned if some part
3521 * of the virtual address range was not mapped, and ENOMEM is returned if
3522 * there was insufficient memory available to complete the change. In the
3523 * latter case, the memory type may have been changed on some part of the
3524 * virtual address range or the direct map.
3527 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
3531 PMAP_LOCK(kernel_pmap);
3532 error = pmap_change_attr_locked(va, size, mode);
3533 PMAP_UNLOCK(kernel_pmap);
3538 pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
3540 vm_offset_t base, offset, tmpva;
3541 pt_entry_t l3, *pte, *newpte;
3544 PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED);
3545 base = trunc_page(va);
3546 offset = va & PAGE_MASK;
3547 size = round_page(offset + size);
3549 if (!VIRT_IN_DMAP(base))
3552 for (tmpva = base; tmpva < base + size; ) {
3553 pte = pmap_pte(kernel_pmap, va, &lvl);
3557 if ((pmap_load(pte) & ATTR_IDX_MASK) == ATTR_IDX(mode)) {
3559 * We already have the correct attribute,
3560 * ignore this entry.
3564 panic("Invalid DMAP table level: %d\n", lvl);
3566 tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE;
3569 tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE;
3577 * Split the entry to an level 3 table, then
3578 * set the new attribute.
3582 panic("Invalid DMAP table level: %d\n", lvl);
3584 newpte = pmap_demote_l1(kernel_pmap, pte,
3585 tmpva & ~L1_OFFSET);
3588 pte = pmap_l1_to_l2(pte, tmpva);
3590 newpte = pmap_demote_l2(kernel_pmap, pte,
3591 tmpva & ~L2_OFFSET);
3594 pte = pmap_l2_to_l3(pte, tmpva);
3596 /* Update the entry */
3597 l3 = pmap_load(pte);
3598 l3 &= ~ATTR_IDX_MASK;
3599 l3 |= ATTR_IDX(mode);
3601 pmap_update_entry(kernel_pmap, pte, l3, tmpva);
3604 * If moving to a non-cacheable entry flush
3607 if (mode == VM_MEMATTR_UNCACHEABLE)
3608 cpu_dcache_wbinv_range(tmpva, L3_SIZE);
3620 * Create an L2 table to map all addresses within an L1 mapping.
3623 pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va)
3625 pt_entry_t *l2, newl2, oldl1;
3627 vm_paddr_t l2phys, phys;
3631 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
3632 oldl1 = pmap_load(l1);
3633 KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK,
3634 ("pmap_demote_l1: Demoting a non-block entry"));
3635 KASSERT((va & L1_OFFSET) == 0,
3636 ("pmap_demote_l1: Invalid virtual address %#lx", va));
3639 if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) {
3640 tmpl1 = kva_alloc(PAGE_SIZE);
3645 if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
3646 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
3647 CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx"
3648 " in pmap %p", va, pmap);
3652 l2phys = VM_PAGE_TO_PHYS(ml2);
3653 l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys);
3655 /* Address the range points at */
3656 phys = oldl1 & ~ATTR_MASK;
3657 /* The attributed from the old l1 table to be copied */
3658 newl2 = oldl1 & ATTR_MASK;
3660 /* Create the new entries */
3661 for (i = 0; i < Ln_ENTRIES; i++) {
3662 l2[i] = newl2 | phys;
3665 cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
3668 pmap_kenter(tmpl1, PAGE_SIZE,
3669 DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET, CACHED_MEMORY);
3670 l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK));
3673 pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va);
3676 pmap_kremove(tmpl1);
3677 kva_free(tmpl1, PAGE_SIZE);
3684 * Create an L3 table to map all addresses within an L2 mapping.
3687 pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
3688 struct rwlock **lockp)
3690 pt_entry_t *l3, newl3, oldl2;
3692 vm_paddr_t l3phys, phys;
3696 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
3697 oldl2 = pmap_load(l2);
3698 KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK,
3699 ("pmap_demote_l2: Demoting a non-block entry"));
3700 KASSERT((va & L2_OFFSET) == 0,
3701 ("pmap_demote_l2: Invalid virtual address %#lx", va));
3702 KASSERT((oldl2 & ATTR_SW_MANAGED) == 0,
3703 ("pmap_demote_l2: TODO: Demote managed pages"));
3706 if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) {
3707 tmpl2 = kva_alloc(PAGE_SIZE);
3712 if ((ml3 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
3713 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
3714 CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx"
3715 " in pmap %p", va, pmap);
3719 l3phys = VM_PAGE_TO_PHYS(ml3);
3720 l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys);
3722 /* Address the range points at */
3723 phys = oldl2 & ~ATTR_MASK;
3724 /* The attributed from the old l2 table to be copied */
3725 newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE;
3727 /* Create the new entries */
3728 for (i = 0; i < Ln_ENTRIES; i++) {
3729 l3[i] = newl3 | phys;
3732 cpu_dcache_wb_range((vm_offset_t)l3, PAGE_SIZE);
3735 pmap_kenter(tmpl2, PAGE_SIZE,
3736 DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET, CACHED_MEMORY);
3737 l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK));
3740 pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va);
3743 pmap_kremove(tmpl2);
3744 kva_free(tmpl2, PAGE_SIZE);
3752 pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
3754 struct rwlock *lock;
3758 l3 = pmap_demote_l2_locked(pmap, l2, va, &lock);
3765 * perform the pmap work for mincore
3768 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
3770 pd_entry_t *l1p, l1;
3771 pd_entry_t *l2p, l2;
3772 pt_entry_t *l3p, l3;
3783 l1p = pmap_l1(pmap, addr);
3784 if (l1p == NULL) /* No l1 */
3787 l1 = pmap_load(l1p);
3788 if ((l1 & ATTR_DESCR_MASK) == L1_INVAL)
3791 if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) {
3792 pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET);
3793 managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
3794 val = MINCORE_SUPER | MINCORE_INCORE;
3795 if (pmap_page_dirty(l1))
3796 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3797 if ((l1 & ATTR_AF) == ATTR_AF)
3798 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3802 l2p = pmap_l1_to_l2(l1p, addr);
3803 if (l2p == NULL) /* No l2 */
3806 l2 = pmap_load(l2p);
3807 if ((l2 & ATTR_DESCR_MASK) == L2_INVAL)
3810 if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) {
3811 pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET);
3812 managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
3813 val = MINCORE_SUPER | MINCORE_INCORE;
3814 if (pmap_page_dirty(l2))
3815 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3816 if ((l2 & ATTR_AF) == ATTR_AF)
3817 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3821 l3p = pmap_l2_to_l3(l2p, addr);
3822 if (l3p == NULL) /* No l3 */
3825 l3 = pmap_load(l2p);
3826 if ((l3 & ATTR_DESCR_MASK) == L3_INVAL)
3829 if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) {
3830 pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET);
3831 managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
3832 val = MINCORE_INCORE;
3833 if (pmap_page_dirty(l3))
3834 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3835 if ((l3 & ATTR_AF) == ATTR_AF)
3836 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3840 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
3841 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
3842 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
3843 if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
3846 PA_UNLOCK_COND(*locked_pa);
3853 pmap_activate(struct thread *td)
3858 pmap = vmspace_pmap(td->td_proc->p_vmspace);
3859 td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0);
3860 __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr));
3861 pmap_invalidate_all(pmap);
3866 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
3869 if (va >= VM_MIN_KERNEL_ADDRESS) {
3870 cpu_icache_sync_range(va, sz);
3875 /* Find the length of data in this page to flush */
3876 offset = va & PAGE_MASK;
3877 len = imin(PAGE_SIZE - offset, sz);
3880 /* Extract the physical address & find it in the DMAP */
3881 pa = pmap_extract(pmap, va);
3883 cpu_icache_sync_range(PHYS_TO_DMAP(pa), len);
3885 /* Move to the next page */
3888 /* Set the length for the next iteration */
3889 len = imin(PAGE_SIZE, sz);
3895 pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far)
3901 switch (ESR_ELx_EXCEPTION(esr)) {
3902 case EXCP_DATA_ABORT_L:
3903 case EXCP_DATA_ABORT:
3906 return (KERN_FAILURE);
3911 switch (esr & ISS_DATA_DFSC_MASK) {
3912 case ISS_DATA_DFSC_TF_L0:
3913 case ISS_DATA_DFSC_TF_L1:
3914 case ISS_DATA_DFSC_TF_L2:
3915 case ISS_DATA_DFSC_TF_L3:
3916 /* Ask the MMU to check the address */
3917 if (pmap == kernel_pmap)
3918 par = arm64_address_translate_s1e1r(far);
3920 par = arm64_address_translate_s1e0r(far);
3923 * If the translation was successful the address was invalid
3924 * due to a break-before-make sequence. We can unlock and
3925 * return success to the trap handler.
3927 if (PAR_SUCCESS(par)) {
3929 return (KERN_SUCCESS);
3938 return (KERN_FAILURE);
3942 * Increase the starting virtual address of the given mapping if a
3943 * different alignment might result in more superpage mappings.
3946 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
3947 vm_offset_t *addr, vm_size_t size)
3949 vm_offset_t superpage_offset;
3953 if (object != NULL && (object->flags & OBJ_COLORED) != 0)
3954 offset += ptoa(object->pg_color);
3955 superpage_offset = offset & L2_OFFSET;
3956 if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE ||
3957 (*addr & L2_OFFSET) == superpage_offset)
3959 if ((*addr & L2_OFFSET) < superpage_offset)
3960 *addr = (*addr & ~L2_OFFSET) + superpage_offset;
3962 *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset;
3966 * Get the kernel virtual address of a set of physical pages. If there are
3967 * physical addresses not covered by the DMAP perform a transient mapping
3968 * that will be removed when calling pmap_unmap_io_transient.
3970 * \param page The pages the caller wishes to obtain the virtual
3971 * address on the kernel memory map.
3972 * \param vaddr On return contains the kernel virtual memory address
3973 * of the pages passed in the page parameter.
3974 * \param count Number of pages passed in.
3975 * \param can_fault TRUE if the thread using the mapped pages can take
3976 * page faults, FALSE otherwise.
3978 * \returns TRUE if the caller must call pmap_unmap_io_transient when
3979 * finished or FALSE otherwise.
3983 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
3984 boolean_t can_fault)
3987 boolean_t needs_mapping;
3991 * Allocate any KVA space that we need, this is done in a separate
3992 * loop to prevent calling vmem_alloc while pinned.
3994 needs_mapping = FALSE;
3995 for (i = 0; i < count; i++) {
3996 paddr = VM_PAGE_TO_PHYS(page[i]);
3997 if (__predict_false(!PHYS_IN_DMAP(paddr))) {
3998 error = vmem_alloc(kernel_arena, PAGE_SIZE,
3999 M_BESTFIT | M_WAITOK, &vaddr[i]);
4000 KASSERT(error == 0, ("vmem_alloc failed: %d", error));
4001 needs_mapping = TRUE;
4003 vaddr[i] = PHYS_TO_DMAP(paddr);
4007 /* Exit early if everything is covered by the DMAP */
4013 for (i = 0; i < count; i++) {
4014 paddr = VM_PAGE_TO_PHYS(page[i]);
4015 if (!PHYS_IN_DMAP(paddr)) {
4017 "pmap_map_io_transient: TODO: Map out of DMAP data");
4021 return (needs_mapping);
4025 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
4026 boolean_t can_fault)
4033 for (i = 0; i < count; i++) {
4034 paddr = VM_PAGE_TO_PHYS(page[i]);
4035 if (!PHYS_IN_DMAP(paddr)) {
4036 panic("ARM64TODO: pmap_unmap_io_transient: Unmap data");