From f8e64afc1d2a61db3c8a5861d3f5664fdec47138 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 16 Sep 2019 02:31:58 +0000 Subject: [PATCH] MFC r349003, r349031, r349042, r349129, r349290, r349618, r349798 Change pmap_demote_l2_locked() so that it removes the superpage mapping on a demotion failure. Otherwise, some callers to pmap_demote_l2_locked(), such as pmap_protect(), may leave an incorrect mapping in place on a demotion failure. Change pmap_demote_l2_locked() so that it handles addresses that are not superpage aligned. Some callers to pmap_demote_l2_locked(), such as pmap_protect(), may not pass a superpage aligned address. Optimize TLB invalidation in pmap_remove_l2(). Change the arm64 pmap so that updates to the global count of wired pages are not performed directly by the pmap. Instead, they are performed by vm_page_free_pages_toq(). Batch the TLB invalidations that are performed by pmap_protect() rather than performing them one at a time. Eliminate a redundant call to pmap_invalidate_page() from pmap_ts_referenced(). Introduce pmap_remove_l3_range() and use it in two places: (1) pmap_remove(), where it eliminates redundant TLB invalidations by pmap_remove() and pmap_remove_l3(), and (2) pmap_enter_l2(), where it may optimize the TLB invalidations by batching them. Implement pmap_copy(). Three changes to pmap_enter(): 1. Use _pmap_alloc_l3() instead of pmap_alloc_l3() in order to handle the possibility that a superpage mapping for "va" was created while we slept. 2. Eliminate code for allocating kernel page table pages. Kernel page table pages are preallocated by pmap_growkernel(). 3. Eliminate duplicated unlock operations when KERN_RESOURCE_SHORTAGE is returned. --- sys/arm64/arm64/pmap.c | 483 +++++++++++++++++++++++++++++------------ 1 file changed, 346 insertions(+), 137 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index cafe8d14ca7..9fed30d7482 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -1410,8 +1410,6 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) } pmap_invalidate_page(pmap, va); - vm_wire_sub(1); - /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done @@ -2030,9 +2028,8 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) SLIST_REMOVE_HEAD(&free, plinks.s.ss); /* Recycle a freed page table page. */ m_pc->wire_count = 1; - vm_wire_add(1); } - vm_page_free_pages_toq(&free, false); + vm_page_free_pages_toq(&free, true); return (m_pc); } @@ -2267,6 +2264,8 @@ pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, int bit, field; PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT((va & L2_OFFSET) == 0, + ("pmap_pv_demote_l2: va is not 2mpage aligned")); KASSERT((pa & L2_OFFSET) == 0, ("pmap_pv_demote_l2: pa is not 2mpage aligned")); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); @@ -2277,7 +2276,6 @@ pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, * must not be released until the last pv entry is reinstantiated. */ pvh = pa_to_pvh(pa); - va = va & ~L2_OFFSET; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); m = PHYS_TO_VM_PAGE(pa); @@ -2433,7 +2431,13 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, old_l2 = pmap_load_clear(l2); KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK, ("pmap_remove_l2: L2e %lx is not a block mapping", old_l2)); - pmap_invalidate_range(pmap, sva, sva + L2_SIZE); + + /* + * Since a promotion must break the 4KB page mappings before making + * the 2MB page mapping, a pmap_invalidate_page() suffices. + */ + pmap_invalidate_page(pmap, sva); + if (old_l2 & ATTR_SW_WIRED) pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE; pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE); @@ -2463,8 +2467,7 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pmap_resident_count_dec(pmap, 1); KASSERT(ml3->wire_count == NL3PG, ("pmap_remove_l2: l3 page wire count error")); - ml3->wire_count = 1; - vm_page_unwire_noq(ml3); + ml3->wire_count = 0; pmap_add_delayed_free_list(ml3, free, FALSE); } } @@ -2506,6 +2509,82 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, return (pmap_unuse_pt(pmap, va, l2e, free)); } +/* + * Remove the specified range of addresses from the L3 page table that is + * identified by the given L2 entry. + */ +static void +pmap_remove_l3_range(pmap_t pmap, pd_entry_t l2e, vm_offset_t sva, + vm_offset_t eva, struct spglist *free, struct rwlock **lockp) +{ + struct md_page *pvh; + struct rwlock *new_lock; + pt_entry_t *l3, old_l3; + vm_offset_t va; + vm_page_t m; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT(rounddown2(sva, L2_SIZE) + L2_SIZE == roundup2(eva, L2_SIZE), + ("pmap_remove_l3_range: range crosses an L3 page table boundary")); + va = eva; + for (l3 = pmap_l2_to_l3(&l2e, sva); sva != eva; l3++, sva += L3_SIZE) { + if (!pmap_l3_valid(pmap_load(l3))) { + if (va != eva) { + pmap_invalidate_range(pmap, va, sva); + va = eva; + } + continue; + } + old_l3 = pmap_load_clear(l3); + if ((old_l3 & ATTR_SW_WIRED) != 0) + pmap->pm_stats.wired_count--; + pmap_resident_count_dec(pmap, 1); + if ((old_l3 & ATTR_SW_MANAGED) != 0) { + m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); + if (pmap_page_dirty(old_l3)) + vm_page_dirty(m); + if ((old_l3 & ATTR_AF) != 0) + vm_page_aflag_set(m, PGA_REFERENCED); + new_lock = PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)); + if (new_lock != *lockp) { + if (*lockp != NULL) { + /* + * Pending TLB invalidations must be + * performed before the PV list lock is + * released. Otherwise, a concurrent + * pmap_remove_all() on a physical page + * could return while a stale TLB entry + * still provides access to that page. + */ + if (va != eva) { + pmap_invalidate_range(pmap, va, + sva); + va = eva; + } + rw_wunlock(*lockp); + } + *lockp = new_lock; + rw_wlock(*lockp); + } + pmap_pvh_free(&m->md, pmap, sva); + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + } + if (va == eva) + va = sva; + if (pmap_unuse_pt(pmap, sva, l2e, free)) { + sva += L3_SIZE; + break; + } + } + if (va != eva) + pmap_invalidate_range(pmap, va, sva); +} + /* * Remove the given range of addresses from the specified map. * @@ -2516,9 +2595,9 @@ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct rwlock *lock; - vm_offset_t va, va_next; + vm_offset_t va_next; pd_entry_t *l0, *l1, *l2; - pt_entry_t l3_paddr, *l3; + pt_entry_t l3_paddr; struct spglist free; /* @@ -2571,8 +2650,8 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) pmap_remove_l2(pmap, l2, sva, pmap_load(l1), &free, &lock); continue; - } else if (pmap_demote_l2_locked(pmap, l2, - sva &~L2_OFFSET, &lock) == NULL) + } else if (pmap_demote_l2_locked(pmap, l2, sva, + &lock) == NULL) continue; l3_paddr = pmap_load(l2); } @@ -2591,33 +2670,13 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) if (va_next > eva) va_next = eva; - va = va_next; - for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, - sva += L3_SIZE) { - if (l3 == NULL) - panic("l3 == NULL"); - if (pmap_load(l3) == 0) { - if (va != va_next) { - pmap_invalidate_range(pmap, va, sva); - va = va_next; - } - continue; - } - if (va == va_next) - va = sva; - if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, - &lock)) { - sva += L3_SIZE; - break; - } - } - if (va != va_next) - pmap_invalidate_range(pmap, va, sva); + pmap_remove_l3_range(pmap, l3_paddr, sva, va_next, &free, + &lock); } if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); - vm_page_free_pages_toq(&free, false); + vm_page_free_pages_toq(&free, true); } /* @@ -2722,7 +2781,7 @@ pmap_remove_all(vm_page_t m) } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(lock); - vm_page_free_pages_toq(&free, false); + vm_page_free_pages_toq(&free, true); } /* @@ -2788,8 +2847,15 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, sva += L3_SIZE) { l3 = pmap_load(l3p); - if (!pmap_l3_valid(l3)) + if (!pmap_l3_valid(l3)) { + if (va != va_next) { + pmap_invalidate_range(pmap, va, sva); + va = va_next; + } continue; + } + if (va == va_next) + va = sva; nbits = 0; if ((prot & VM_PROT_WRITE) == 0) { @@ -2804,9 +2870,9 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) nbits |= ATTR_XN; pmap_set(l3p, nbits); - /* XXX: Use pmap_invalidate_range */ - pmap_invalidate_page(pmap, sva); } + if (va != va_next) + pmap_invalidate_range(pmap, va, sva); } PMAP_UNLOCK(pmap); } @@ -3011,8 +3077,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, pt_entry_t new_l3, orig_l3; pt_entry_t *l2, *l3; pv_entry_t pv; - vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; - vm_page_t mpte, om, l1_m, l2_m, l3_m; + vm_paddr_t opa, pa; + vm_page_t mpte, om; boolean_t nosleep; int lvl, rv; @@ -3036,7 +3102,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); lock = NULL; - mpte = NULL; PMAP_LOCK(pmap); if (psind == 1) { /* Assert the required virtual and physical alignment. */ @@ -3046,13 +3111,25 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, flags, m, &lock); goto out; } + mpte = NULL; + /* + * In the case that a page table page is not + * resident, we are creating it here. + */ +retry: pde = pmap_pde(pmap, va, &lvl); - if (pde != NULL && lvl == 1) { + if (pde != NULL && lvl == 2) { + l3 = pmap_l2_to_l3(pde, va); + if (va < VM_MAXUSER_ADDRESS && mpte == NULL) { + mpte = PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); + mpte->wire_count++; + } + goto havel3; + } else if (pde != NULL && lvl == 1) { l2 = pmap_l1_to_l2(pde, va); if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK && - (l3 = pmap_demote_l2_locked(pmap, l2, va & ~L2_OFFSET, - &lock)) != NULL) { + (l3 = pmap_demote_l2_locked(pmap, l2, va, &lock)) != NULL) { l3 = &l3[pmap_l3_index(va)]; if (va < VM_MAXUSER_ADDRESS) { mpte = PHYS_TO_VM_PAGE( @@ -3061,83 +3138,26 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, } goto havel3; } + /* We need to allocate an L3 table. */ } - if (va < VM_MAXUSER_ADDRESS) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; - mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); - if (mpte == NULL && nosleep) { - CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); - if (lock != NULL) - rw_wunlock(lock); - PMAP_UNLOCK(pmap); - return (KERN_RESOURCE_SHORTAGE); - } - pde = pmap_pde(pmap, va, &lvl); - KASSERT(pde != NULL, - ("pmap_enter: Invalid page entry, va: 0x%lx", va)); - KASSERT(lvl == 2, - ("pmap_enter: Invalid level %d", lvl)); - } else { + /* - * If we get a level 2 pde it must point to a level 3 entry - * otherwise we will need to create the intermediate tables + * We use _pmap_alloc_l3() instead of pmap_alloc_l3() in order + * to handle the possibility that a superpage mapping for "va" + * was created while we slept. */ - if (lvl < 2) { - switch (lvl) { - default: - case -1: - /* Get the l0 pde to update */ - pde = pmap_l0(pmap, va); - KASSERT(pde != NULL, ("...")); - - l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | - VM_ALLOC_ZERO); - if (l1_m == NULL) - panic("pmap_enter: l1 pte_m == NULL"); - if ((l1_m->flags & PG_ZERO) == 0) - pmap_zero_page(l1_m); - - l1_pa = VM_PAGE_TO_PHYS(l1_m); - pmap_load_store(pde, l1_pa | L0_TABLE); - /* FALLTHROUGH */ - case 0: - /* Get the l1 pde to update */ - pde = pmap_l1_to_l2(pde, va); - KASSERT(pde != NULL, ("...")); - - l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | - VM_ALLOC_ZERO); - if (l2_m == NULL) - panic("pmap_enter: l2 pte_m == NULL"); - if ((l2_m->flags & PG_ZERO) == 0) - pmap_zero_page(l2_m); - - l2_pa = VM_PAGE_TO_PHYS(l2_m); - pmap_load_store(pde, l2_pa | L1_TABLE); - /* FALLTHROUGH */ - case 1: - /* Get the l2 pde to update */ - pde = pmap_l1_to_l2(pde, va); - KASSERT(pde != NULL, ("...")); - - l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | - VM_ALLOC_ZERO); - if (l3_m == NULL) - panic("pmap_enter: l3 pte_m == NULL"); - if ((l3_m->flags & PG_ZERO) == 0) - pmap_zero_page(l3_m); - - l3_pa = VM_PAGE_TO_PHYS(l3_m); - pmap_load_store(pde, l3_pa | L2_TABLE); - break; - } + mpte = _pmap_alloc_l3(pmap, pmap_l2_pindex(va), + nosleep ? NULL : &lock); + if (mpte == NULL && nosleep) { + CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); + rv = KERN_RESOURCE_SHORTAGE; + goto out; } - } - l3 = pmap_l2_to_l3(pde, va); + goto retry; + } else + panic("pmap_enter: missing L3 table for kernel va %#lx", va); havel3: orig_l3 = pmap_load(l3); @@ -3352,8 +3372,7 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, vm_page_t m, struct rwlock **lockp) { struct spglist free; - pd_entry_t *l2, *l3, old_l2; - vm_offset_t sva; + pd_entry_t *l2, old_l2; vm_page_t l2pg, mt; PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -3382,13 +3401,8 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, (void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), &free, lockp); else - for (sva = va; sva < va + L2_SIZE; sva += PAGE_SIZE) { - l3 = pmap_l2_to_l3(l2, sva); - if (pmap_l3_valid(pmap_load(l3)) && - pmap_remove_l3(pmap, l3, sva, old_l2, &free, - lockp) != 0) - break; - } + pmap_remove_l3_range(pmap, old_l2, va, va + L2_SIZE, + &free, lockp); vm_page_free_pages_toq(&free, true); if (va >= VM_MAXUSER_ADDRESS) { /* @@ -3605,7 +3619,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, SLIST_INIT(&free); if (pmap_unwire_l3(pmap, va, mpte, &free)) { pmap_invalidate_page(pmap, va); - vm_page_free_pages_toq(&free, false); + vm_page_free_pages_toq(&free, true); } mpte = NULL; } @@ -3734,12 +3748,153 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) * in the destination map. * * This routine is only advisory and need not do anything. + * + * Because the executable mappings created by this routine are copied, + * it should not have to flush the instruction cache. */ - void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { + struct rwlock *lock; + struct spglist free; + pd_entry_t *l0, *l1, *l2, srcptepaddr; + pt_entry_t *dst_pte, ptetemp, *src_pte; + vm_offset_t addr, end_addr, va_next; + vm_page_t dst_l2pg, dstmpte, srcmpte; + + if (dst_addr != src_addr) + return; + end_addr = src_addr + len; + lock = NULL; + if (dst_pmap < src_pmap) { + PMAP_LOCK(dst_pmap); + PMAP_LOCK(src_pmap); + } else { + PMAP_LOCK(src_pmap); + PMAP_LOCK(dst_pmap); + } + for (addr = src_addr; addr < end_addr; addr = va_next) { + l0 = pmap_l0(src_pmap, addr); + if (pmap_load(l0) == 0) { + va_next = (addr + L0_SIZE) & ~L0_OFFSET; + if (va_next < addr) + va_next = end_addr; + continue; + } + l1 = pmap_l0_to_l1(l0, addr); + if (pmap_load(l1) == 0) { + va_next = (addr + L1_SIZE) & ~L1_OFFSET; + if (va_next < addr) + va_next = end_addr; + continue; + } + va_next = (addr + L2_SIZE) & ~L2_OFFSET; + if (va_next < addr) + va_next = end_addr; + l2 = pmap_l1_to_l2(l1, addr); + srcptepaddr = pmap_load(l2); + if (srcptepaddr == 0) + continue; + if ((srcptepaddr & ATTR_DESCR_MASK) == L2_BLOCK) { + if ((addr & L2_OFFSET) != 0 || + addr + L2_SIZE > end_addr) + continue; + dst_l2pg = pmap_alloc_l2(dst_pmap, addr, NULL); + if (dst_l2pg == NULL) + break; + l2 = (pd_entry_t *) + PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dst_l2pg)); + l2 = &l2[pmap_l2_index(addr)]; + if (pmap_load(l2) == 0 && + ((srcptepaddr & ATTR_SW_MANAGED) == 0 || + pmap_pv_insert_l2(dst_pmap, addr, srcptepaddr, + PMAP_ENTER_NORECLAIM, &lock))) { + (void)pmap_load_store(l2, srcptepaddr & + ~ATTR_SW_WIRED); + pmap_resident_count_inc(dst_pmap, L2_SIZE / + PAGE_SIZE); + atomic_add_long(&pmap_l2_mappings, 1); + } else + dst_l2pg->wire_count--; + continue; + } + KASSERT((srcptepaddr & ATTR_DESCR_MASK) == L2_TABLE, + ("pmap_copy: invalid L2 entry")); + srcptepaddr &= ~ATTR_MASK; + srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); + KASSERT(srcmpte->wire_count > 0, + ("pmap_copy: source page table page is unused")); + if (va_next > end_addr) + va_next = end_addr; + src_pte = (pt_entry_t *)PHYS_TO_DMAP(srcptepaddr); + src_pte = &src_pte[pmap_l3_index(addr)]; + dstmpte = NULL; + for (; addr < va_next; addr += PAGE_SIZE, src_pte++) { + ptetemp = pmap_load(src_pte); + + /* + * We only virtual copy managed pages. + */ + if ((ptetemp & ATTR_SW_MANAGED) == 0) + continue; + + if (dstmpte != NULL) { + KASSERT(dstmpte->pindex == pmap_l2_pindex(addr), + ("dstmpte pindex/addr mismatch")); + dstmpte->wire_count++; + } else if ((dstmpte = pmap_alloc_l3(dst_pmap, addr, + NULL)) == NULL) + goto out; + dst_pte = (pt_entry_t *) + PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte)); + dst_pte = &dst_pte[pmap_l3_index(addr)]; + if (pmap_load(dst_pte) == 0 && + pmap_try_insert_pv_entry(dst_pmap, addr, + PHYS_TO_VM_PAGE(ptetemp & ~ATTR_MASK), &lock)) { + /* + * Clear the wired, modified, and accessed + * (referenced) bits during the copy. + * + * XXX not yet + */ + (void)pmap_load_store(dst_pte, ptetemp & + ~ATTR_SW_WIRED); + pmap_resident_count_inc(dst_pmap, 1); + } else { + SLIST_INIT(&free); + if (pmap_unwire_l3(dst_pmap, addr, dstmpte, + &free)) { + /* + * Although "addr" is not mapped, + * paging-structure caches could + * nonetheless have entries that refer + * to the freed page table pages. + * Invalidate those entries. + * + * XXX redundant invalidation + */ + pmap_invalidate_page(dst_pmap, addr); + vm_page_free_pages_toq(&free, true); + } + goto out; + } + /* Have we copied all of the valid mappings? */ + if (dstmpte->wire_count >= srcmpte->wire_count) + break; + } + } +out: + /* + * XXX This barrier may not be needed because the destination pmap is + * not active. + */ + dsb(ishst); + + if (lock != NULL) + rw_wunlock(lock); + PMAP_UNLOCK(src_pmap); + PMAP_UNLOCK(dst_pmap); } /* @@ -4088,8 +4243,7 @@ pmap_remove_pages(pmap_t pmap) pmap_resident_count_dec(pmap,1); KASSERT(ml3->wire_count == NL3PG, ("pmap_remove_pages: l3 page wire count error")); - ml3->wire_count = 1; - vm_page_unwire_noq(ml3); + ml3->wire_count = 0; pmap_add_delayed_free_list(ml3, &free, FALSE); } @@ -4127,7 +4281,7 @@ pmap_remove_pages(pmap_t pmap) if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); - vm_page_free_pages_toq(&free, false); + vm_page_free_pages_toq(&free, true); } /* @@ -4328,8 +4482,7 @@ pmap_remove_write(vm_page_t m) va = pv->pv_va; pte = pmap_pte(pmap, pv->pv_va, &lvl); if ((pmap_load(pte) & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) - pmap_demote_l2_locked(pmap, pte, va & ~L2_OFFSET, - &lock); + (void)pmap_demote_l2_locked(pmap, pte, va, &lock); KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); @@ -4566,7 +4719,6 @@ pmap_ts_referenced(vm_page_t m) */ pmap_remove_l3(pmap, pte, pv->pv_va, tpde, &free, &lock); - pmap_invalidate_page(pmap, pv->pv_va); cleared++; if (pvf == pv) pvf = NULL; @@ -4588,7 +4740,7 @@ pmap_ts_referenced(vm_page_t m) not_cleared < PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); - vm_page_free_pages_toq(&free, false); + vm_page_free_pages_toq(&free, true); return (cleared + not_cleared); } @@ -4891,7 +5043,7 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) pte = pmap_l1_to_l2(pte, tmpva); case 2: newpte = pmap_demote_l2(kernel_pmap, pte, - tmpva & ~L2_OFFSET); + tmpva); if (newpte == NULL) return (EINVAL); pte = pmap_l2_to_l3(pte, tmpva); @@ -4990,6 +5142,18 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) return (l2); } +static void +pmap_demote_l2_abort(pmap_t pmap, vm_offset_t va, pt_entry_t *l2, + struct rwlock **lockp) +{ + struct spglist free; + + SLIST_INIT(&free); + (void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), &free, + lockp); + vm_page_free_pages_toq(&free, true); +} + /* * Create an L3 table to map all addresses within an L2 mapping. */ @@ -5008,8 +5172,7 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, oldl2 = pmap_load(l2); KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK, ("pmap_demote_l2: Demoting a non-block entry")); - KASSERT((va & L2_OFFSET) == 0, - ("pmap_demote_l2: Invalid virtual address %#lx", va)); + va &= ~L2_OFFSET; tmpl2 = 0; if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) { @@ -5018,15 +5181,57 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, return (NULL); } + /* + * Invalidate the 2MB page mapping and return "failure" if the + * mapping was never accessed. + */ + if ((oldl2 & ATTR_AF) == 0) { + KASSERT((oldl2 & ATTR_SW_WIRED) == 0, + ("pmap_demote_l2: a wired mapping is missing ATTR_AF")); + pmap_demote_l2_abort(pmap, va, l2, lockp); + CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx in pmap %p", + va, pmap); + goto fail; + } + if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) { + KASSERT((oldl2 & ATTR_SW_WIRED) == 0, + ("pmap_demote_l2: page table page for a wired mapping" + " is missing")); + + /* + * If the page table page is missing and the mapping + * is for a kernel address, the mapping must belong to + * the direct map. Page table pages are preallocated + * for every other part of the kernel address space, + * so the direct map region is the only part of the + * kernel address space that must be handled here. + */ + KASSERT(va < VM_MAXUSER_ADDRESS || VIRT_IN_DMAP(va), + ("pmap_demote_l2: No saved mpte for va %#lx", va)); + + /* + * If the 2MB page mapping belongs to the direct map + * region of the kernel's address space, then the page + * allocation request specifies the highest possible + * priority (VM_ALLOC_INTERRUPT). Otherwise, the + * priority is normal. + */ ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va), (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); + + /* + * If the allocation of the new page table page fails, + * invalidate the 2MB page mapping and return "failure". + */ if (ml3 == NULL) { + pmap_demote_l2_abort(pmap, va, l2, lockp); CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx" " in pmap %p", va, pmap); goto fail; } + if (va < VM_MAXUSER_ADDRESS) { ml3->wire_count = NL3PG; pmap_resident_count_inc(pmap, 1); @@ -5075,6 +5280,10 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, if ((oldl2 & ATTR_SW_MANAGED) != 0) reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp); + /* + * Pass PAGE_SIZE so that a single TLB invalidation is performed on + * the 2MB page mapping. + */ pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE); /* -- 2.45.0