From 3e9228f4fcc86ec2353761e2cbee49f71fc36b7a Mon Sep 17 00:00:00 2001 From: alc Date: Mon, 2 Jul 2012 05:35:55 +0000 Subject: [PATCH] MFC r233290, r235598, r235973, r236045, r236240, r236291, r236378 Change pv_entry_count to a long on amd64. Rename pmap_collect() to pmap_pv_reclaim() and rewrite it such that it no longer uses the active and inactive paging queues. Eliminate some purely stylistic differences among the amd64, i386 native, and i386 xen PV entry allocators. Eliminate code duplication in free_pv_entry() and pmap_remove_pages() by introducing free_pv_chunk(). git-svn-id: svn://svn.freebsd.org/base/stable/9@237950 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- sys/amd64/amd64/pmap.c | 209 ++++++++++++++++++++++++------------- sys/amd64/include/pmap.h | 2 +- sys/i386/i386/pmap.c | 220 +++++++++++++++++++++++++-------------- sys/i386/include/pmap.h | 2 +- sys/i386/xen/pmap.c | 206 +++++++++++++++++++++++------------- 5 files changed, 414 insertions(+), 225 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index caeff1b69..60aa8f621 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -201,7 +201,8 @@ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ /* * Data for the pv entry allocation mechanism */ -static int pv_entry_count; +static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); +static long pv_entry_count; static struct md_page *pv_table; /* @@ -215,8 +216,9 @@ caddr_t CADDR1 = 0; */ static caddr_t crashdumpmap; +static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); -static pv_entry_t get_pv_entry(pmap_t locked_pmap, boolean_t try); +static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); @@ -1993,7 +1995,7 @@ static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { - return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK); + return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) @@ -2004,7 +2006,7 @@ pv_to_chunk(pv_entry_t pv) static uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; -SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); #ifdef PV_STATS @@ -2028,71 +2030,136 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0 "Current number of pv entry allocs"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); - -static int pmap_collect_inactive, pmap_collect_active; - -SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0, - "Current number times pmap_collect called on inactive queue"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0, - "Current number times pmap_collect called on active queue"); #endif /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate - * another pv entry chunk. This is normally called to - * unmap inactive pages, and if necessary, active pages. + * another pv entry chunk. * * We do not, however, unmap 2mpages because subsequent accesses will * allocate per-page pv entries until repromotion occurs, thereby * exacerbating the shortage of free pv entries. */ -static void -pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) +static vm_page_t +pmap_pv_reclaim(pmap_t locked_pmap) { + struct pch newtail; + struct pv_chunk *pc; + struct md_page *pvh; pd_entry_t *pde; pmap_t pmap; pt_entry_t *pte, tpte; - pv_entry_t next_pv, pv; + pv_entry_t pv; vm_offset_t va; - vm_page_t m, free; - - TAILQ_FOREACH(m, &vpq->pl, pageq) { - if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy) - continue; - TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { - va = pv->pv_va; - pmap = PV_PMAP(pv); + vm_page_t free, m, m_pc; + uint64_t inuse, freemask; + int bit, field, freed; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); + pmap = NULL; + free = m_pc = NULL; + TAILQ_INIT(&newtail); + while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && free == NULL) { + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + if (pmap != pc->pc_pmap) { + if (pmap != NULL) { + pmap_invalidate_all(pmap); + if (pmap != locked_pmap) + PMAP_UNLOCK(pmap); + } + pmap = pc->pc_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); - else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) + else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { + pmap = NULL; + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; - pmap_resident_count_dec(pmap, 1); - pde = pmap_pde(pmap, va); - KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found" - " a 2mpage in page %p's pv list", m)); - pte = pmap_pde_to_pte(pde, va); - tpte = pte_load_clear(pte); - KASSERT((tpte & PG_W) == 0, - ("pmap_collect: wired pte %#lx", tpte)); - if (tpte & PG_A) - vm_page_aflag_set(m, PGA_REFERENCED); - if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) - vm_page_dirty(m); - free = NULL; - pmap_unuse_pt(pmap, va, *pde, &free); - pmap_invalidate_page(pmap, va); - pmap_free_zero_pages(free); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - free_pv_entry(pmap, pv); - if (pmap != locked_pmap) - PMAP_UNLOCK(pmap); + } } - if (TAILQ_EMPTY(&m->md.pv_list) && - TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)) - vm_page_aflag_clear(m, PGA_WRITEABLE); + + /* + * Destroy every non-wired, 4 KB page mapping in the chunk. + */ + freed = 0; + for (field = 0; field < _NPCM; field++) { + freemask = 0; + for (inuse = ~pc->pc_map[field] & pc_freemask[field]; + inuse != 0; inuse &= ~(1UL << bit)) { + bit = bsfq(inuse); + pv = &pc->pc_pventry[field * 64 + bit]; + va = pv->pv_va; + pde = pmap_pde(pmap, va); + if ((*pde & PG_PS) != 0) + continue; + pte = pmap_pde_to_pte(pde, va); + if ((*pte & PG_W) != 0) + continue; + tpte = pte_load_clear(pte); + if ((tpte & PG_G) != 0) + pmap_invalidate_page(pmap, va); + m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); + if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) + vm_page_dirty(m); + if ((tpte & PG_A) != 0) + vm_page_aflag_set(m, PGA_REFERENCED); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) { + vm_page_aflag_clear(m, + PGA_WRITEABLE); + } + } + pmap_unuse_pt(pmap, va, *pde, &free); + freemask |= 1UL << bit; + freed++; + } + pc->pc_map[field] |= freemask; + } + if (freed == 0) { + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + continue; + } + pmap_resident_count_dec(pmap, freed); + PV_STAT(pv_entry_frees += freed); + PV_STAT(pv_entry_spare += freed); + pv_entry_count -= freed; + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && + pc->pc_map[2] == PC_FREE2) { + PV_STAT(pv_entry_spare -= _NPCPV); + PV_STAT(pc_chunk_count--); + PV_STAT(pc_chunk_frees++); + /* Entire chunk is free; return it. */ + m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); + dump_drop_page(m_pc->phys_addr); + break; + } + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + /* One freed pv entry in locked_pmap is sufficient. */ + if (pmap == locked_pmap) + break; + } + TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); + if (pmap != NULL) { + pmap_invalidate_all(pmap); + if (pmap != locked_pmap) + PMAP_UNLOCK(pmap); + } + if (m_pc == NULL && free != NULL) { + m_pc = free; + free = m_pc->right; + /* Recycle a freed page table page. */ + m_pc->wire_count = 1; + atomic_add_int(&cnt.v_wire_count, 1); } + pmap_free_zero_pages(free); + return (m_pc); } /* @@ -2101,7 +2168,6 @@ pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { - vm_page_t m; struct pv_chunk *pc; int idx, field, bit; @@ -2125,6 +2191,15 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv) return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + free_pv_chunk(pc); +} + +static void +free_pv_chunk(struct pv_chunk *pc) +{ + vm_page_t m; + + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); @@ -2142,7 +2217,6 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv) static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try) { - struct vpgqueues *pq; int bit, field; pv_entry_t pv; struct pv_chunk *pc; @@ -2151,7 +2225,6 @@ get_pv_entry(pmap_t pmap, boolean_t try) PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); PV_STAT(pv_entry_allocs++); - pq = NULL; retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { @@ -2171,35 +2244,26 @@ retry: TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } + if (pc != TAILQ_LAST(&pv_chunks, pch)) { + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); + } pv_entry_count++; PV_STAT(pv_entry_spare--); return (pv); } } /* No free items, allocate another chunk */ - m = vm_page_alloc(NULL, 0, (pq == &vm_page_queues[PQ_ACTIVE] ? - VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | + m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { if (try) { PV_STAT(pc_chunk_tryfail++); return (NULL); } - /* - * Reclaim pv entries: At first, destroy mappings to inactive - * pages. After that, if a pv chunk entry is still needed, - * destroy mappings to active pages. - */ - if (pq == NULL) { - PV_STAT(pmap_collect_inactive++); - pq = &vm_page_queues[PQ_INACTIVE]; - } else if (pq == &vm_page_queues[PQ_INACTIVE]) { - PV_STAT(pmap_collect_active++); - pq = &vm_page_queues[PQ_ACTIVE]; - } else - panic("get_pv_entry: allocation failed"); - pmap_collect(pmap, pq); - goto retry; + m = pmap_pv_reclaim(pmap); + if (m == NULL) + goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); @@ -2209,6 +2273,7 @@ retry: pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; + TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); pv_entry_count++; @@ -4150,14 +4215,8 @@ pmap_remove_pages(pmap_t pmap) } } if (allfree) { - PV_STAT(pv_entry_spare -= _NPCPV); - PV_STAT(pc_chunk_count--); - PV_STAT(pc_chunk_frees++); TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); - dump_drop_page(m->phys_addr); - vm_page_unwire(m, 0); - vm_page_free(m); + free_pv_chunk(pc); } } pmap_invalidate_all(pmap); diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 1b8108a3e..48758ef8d 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -295,7 +295,7 @@ struct pv_chunk { pmap_t pc_pmap; TAILQ_ENTRY(pv_chunk) pc_list; uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */ - uint64_t pc_spare[2]; + TAILQ_ENTRY(pv_chunk) pc_lru; struct pv_entry pc_pventry[_NPCPV]; }; diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 5947cc787..bb751645e 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -233,6 +233,7 @@ static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */ /* * Data for the pv entry allocation mechanism */ +static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static struct md_page *pv_table; static int shpgperproc = PMAP_SHPGPERPROC; @@ -283,8 +284,9 @@ SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, "Number of times pmap_pte_quick didn't change PMAP1"); static struct mtx PMAP2mutex; +static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); -static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); +static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); @@ -2143,6 +2145,7 @@ pmap_growkernel(vm_offset_t addr) CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 11); +CTASSERT(_NPCPV == 336); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) @@ -2156,7 +2159,7 @@ pv_to_chunk(pv_entry_t pv) #define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ #define PC_FREE10 0x0000fffful /* Free values for index 10 */ -static uint32_t pc_freemask[11] = { +static uint32_t pc_freemask[_NPCM] = { PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, @@ -2187,79 +2190,152 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0 "Current number of pv entry allocs"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); - -static int pmap_collect_inactive, pmap_collect_active; - -SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0, - "Current number times pmap_collect called on inactive queue"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0, - "Current number times pmap_collect called on active queue"); #endif /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate - * another pv entry chunk. This is normally called to - * unmap inactive pages, and if necessary, active pages. + * another pv entry chunk. */ -static void -pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) +static vm_page_t +pmap_pv_reclaim(pmap_t locked_pmap) { + struct pch newtail; + struct pv_chunk *pc; + struct md_page *pvh; pd_entry_t *pde; pmap_t pmap; pt_entry_t *pte, tpte; - pv_entry_t next_pv, pv; + pv_entry_t pv; vm_offset_t va; - vm_page_t m, free; - + vm_page_t free, m, m_pc; + uint32_t inuse, freemask; + int bit, field, freed; + + PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); + pmap = NULL; + free = m_pc = NULL; + TAILQ_INIT(&newtail); sched_pin(); - TAILQ_FOREACH(m, &vpq->pl, pageq) { - if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy) - continue; - TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { - va = pv->pv_va; - pmap = PV_PMAP(pv); + while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || + free == NULL)) { + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + if (pmap != pc->pc_pmap) { + if (pmap != NULL) { + pmap_invalidate_all(pmap); + if (pmap != locked_pmap) + PMAP_UNLOCK(pmap); + } + pmap = pc->pc_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); - else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) + else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { + pmap = NULL; + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; - pmap->pm_stats.resident_count--; - pde = pmap_pde(pmap, va); - KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found" - " a 4mpage in page %p's pv list", m)); - pte = pmap_pte_quick(pmap, va); - tpte = pte_load_clear(pte); - KASSERT((tpte & PG_W) == 0, - ("pmap_collect: wired pte %#jx", (uintmax_t)tpte)); - if (tpte & PG_A) - vm_page_aflag_set(m, PGA_REFERENCED); - if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) - vm_page_dirty(m); - free = NULL; - pmap_unuse_pt(pmap, va, &free); - pmap_invalidate_page(pmap, va); - pmap_free_zero_pages(free); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - free_pv_entry(pmap, pv); - if (pmap != locked_pmap) - PMAP_UNLOCK(pmap); + } + } + + /* + * Destroy every non-wired, 4 KB page mapping in the chunk. + */ + freed = 0; + for (field = 0; field < _NPCM; field++) { + freemask = 0; + for (inuse = ~pc->pc_map[field] & pc_freemask[field]; + inuse != 0; inuse &= ~(1UL << bit)) { + bit = bsfl(inuse); + pv = &pc->pc_pventry[field * 32 + bit]; + va = pv->pv_va; + pde = pmap_pde(pmap, va); + if ((*pde & PG_PS) != 0) + continue; + pte = pmap_pte_quick(pmap, va); + if ((*pte & PG_W) != 0) + continue; + tpte = pte_load_clear(pte); + if ((tpte & PG_G) != 0) + pmap_invalidate_page(pmap, va); + m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); + if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) + vm_page_dirty(m); + if ((tpte & PG_A) != 0) + vm_page_aflag_set(m, PGA_REFERENCED); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) { + vm_page_aflag_clear(m, + PGA_WRITEABLE); + } + } + pmap_unuse_pt(pmap, va, &free); + freemask |= 1UL << bit; + freed++; + } + pc->pc_map[field] |= freemask; + } + if (freed == 0) { + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + continue; + } + pmap->pm_stats.resident_count -= freed; + PV_STAT(pv_entry_frees += freed); + PV_STAT(pv_entry_spare += freed); + pv_entry_count -= freed; + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + for (field = 0; field < _NPCM; field++) + if (pc->pc_map[field] != pc_freemask[field]) { + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, + pc_list); + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + + /* + * One freed pv entry in locked_pmap is + * sufficient. + */ + if (pmap == locked_pmap) + goto out; + break; + } + if (field == _NPCM) { + PV_STAT(pv_entry_spare -= _NPCPV); + PV_STAT(pc_chunk_count--); + PV_STAT(pc_chunk_frees++); + /* Entire chunk is free; return it. */ + m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); + pmap_qremove((vm_offset_t)pc, 1); + pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); + break; } - if (TAILQ_EMPTY(&m->md.pv_list) && - TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)) - vm_page_aflag_clear(m, PGA_WRITEABLE); } +out: sched_unpin(); + TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); + if (pmap != NULL) { + pmap_invalidate_all(pmap); + if (pmap != locked_pmap) + PMAP_UNLOCK(pmap); + } + if (m_pc == NULL && pv_vafree != 0 && free != NULL) { + m_pc = free; + free = m_pc->right; + /* Recycle a freed page table page. */ + m_pc->wire_count = 1; + atomic_add_int(&cnt.v_wire_count, 1); + } + pmap_free_zero_pages(free); + return (m_pc); } - /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { - vm_page_t m; struct pv_chunk *pc; int idx, field, bit; @@ -2280,6 +2356,15 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv) TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); return; } + free_pv_chunk(pc); +} + +static void +free_pv_chunk(struct pv_chunk *pc) +{ + vm_page_t m; + + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); @@ -2296,11 +2381,10 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv) * when needed. */ static pv_entry_t -get_pv_entry(pmap_t pmap, int try) +get_pv_entry(pmap_t pmap, boolean_t try) { static const struct timeval printinterval = { 60, 0 }; static struct timeval lastprint; - struct vpgqueues *pq; int bit, field; pv_entry_t pv; struct pv_chunk *pc; @@ -2315,7 +2399,6 @@ get_pv_entry(pmap_t pmap, int try) printf("Approaching the limit on PV entries, consider " "increasing either the vm.pmap.shpgperproc or the " "vm.pmap.pv_entry_max tunable.\n"); - pq = NULL; retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { @@ -2336,6 +2419,10 @@ retry: } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + if (pc != TAILQ_LAST(&pv_chunks, pch)) { + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); + } PV_STAT(pv_entry_spare--); return (pv); } @@ -2345,29 +2432,16 @@ retry: * queues lock. If "pv_vafree" is currently non-empty, it will * remain non-empty until pmap_ptelist_alloc() completes. */ - if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, (pq == - &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | + if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); return (NULL); } - /* - * Reclaim pv entries: At first, destroy mappings to - * inactive pages. After that, if a pv chunk entry - * is still needed, destroy mappings to active pages. - */ - if (pq == NULL) { - PV_STAT(pmap_collect_inactive++); - pq = &vm_page_queues[PQ_INACTIVE]; - } else if (pq == &vm_page_queues[PQ_INACTIVE]) { - PV_STAT(pmap_collect_active++); - pq = &vm_page_queues[PQ_ACTIVE]; - } else - panic("get_pv_entry: increase vm.pmap.shpgperproc"); - pmap_collect(pmap, pq); - goto retry; + m = pmap_pv_reclaim(pmap); + if (m == NULL) + goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); @@ -2377,6 +2451,7 @@ retry: pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ for (field = 1; field < _NPCM; field++) pc->pc_map[field] = pc_freemask[field]; + TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); @@ -4347,15 +4422,8 @@ pmap_remove_pages(pmap_t pmap) } } if (allfree) { - PV_STAT(pv_entry_spare -= _NPCPV); - PV_STAT(pc_chunk_count--); - PV_STAT(pc_chunk_frees++); TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); - m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); - pmap_qremove((vm_offset_t)pc, 1); - vm_page_unwire(m, 0); - vm_page_free(m); - pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); + free_pv_chunk(pc); } } sched_unpin(); diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index 3012a000a..3e0ba6f99 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -481,7 +481,7 @@ struct pv_chunk { pmap_t pc_pmap; TAILQ_ENTRY(pv_chunk) pc_list; uint32_t pc_map[_NPCM]; /* bitmap; 1 = free */ - uint32_t pc_spare[2]; + TAILQ_ENTRY(pv_chunk) pc_lru; struct pv_entry pc_pventry[_NPCPV]; }; diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c index 189f311da..38e6097e9 100644 --- a/sys/i386/xen/pmap.c +++ b/sys/i386/xen/pmap.c @@ -230,6 +230,7 @@ static int pat_works; /* Is page attribute table sane? */ /* * Data for the pv entry allocation mechanism */ +static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static int shpgperproc = PMAP_SHPGPERPROC; @@ -277,8 +278,9 @@ SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, "Number of times pmap_pte_quick didn't change PMAP1"); static struct mtx PMAP2mutex; +static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); -static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); +static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); @@ -1914,6 +1916,7 @@ pmap_growkernel(vm_offset_t addr) CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 11); +CTASSERT(_NPCPV == 336); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) @@ -1927,7 +1930,7 @@ pv_to_chunk(pv_entry_t pv) #define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ #define PC_FREE10 0x0000fffful /* Free values for index 10 */ -static uint32_t pc_freemask[11] = { +static uint32_t pc_freemask[_NPCM] = { PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, @@ -1958,74 +1961,141 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0 "Current number of pv entry allocs"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); - -static int pmap_collect_inactive, pmap_collect_active; - -SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0, - "Current number times pmap_collect called on inactive queue"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0, - "Current number times pmap_collect called on active queue"); #endif /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate - * another pv entry chunk. This is normally called to - * unmap inactive pages, and if necessary, active pages. + * another pv entry chunk. */ -static void -pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) +static vm_page_t +pmap_pv_reclaim(pmap_t locked_pmap) { + struct pch newtail; + struct pv_chunk *pc; pmap_t pmap; pt_entry_t *pte, tpte; - pv_entry_t next_pv, pv; + pv_entry_t pv; vm_offset_t va; - vm_page_t m, free; - + vm_page_t free, m, m_pc; + uint32_t inuse, freemask; + int bit, field, freed; + + PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); + pmap = NULL; + free = m_pc = NULL; + TAILQ_INIT(&newtail); sched_pin(); - TAILQ_FOREACH(m, &vpq->pl, pageq) { - if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy) - continue; - TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { - va = pv->pv_va; - pmap = PV_PMAP(pv); + while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || + free == NULL)) { + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + if (pmap != pc->pc_pmap) { + if (pmap != NULL) { + pmap_invalidate_all(pmap); + if (pmap != locked_pmap) + PMAP_UNLOCK(pmap); + } + pmap = pc->pc_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); - else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) + else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { + pmap = NULL; + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; - pmap->pm_stats.resident_count--; - pte = pmap_pte_quick(pmap, va); - tpte = pte_load_clear(pte); - KASSERT((tpte & PG_W) == 0, - ("pmap_collect: wired pte %#jx", (uintmax_t)tpte)); - if (tpte & PG_A) - vm_page_aflag_set(m, PGA_REFERENCED); - if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) - vm_page_dirty(m); - free = NULL; - pmap_unuse_pt(pmap, va, &free); - pmap_invalidate_page(pmap, va); - pmap_free_zero_pages(free); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - free_pv_entry(pmap, pv); - if (pmap != locked_pmap) - PMAP_UNLOCK(pmap); + } + } + + /* + * Destroy every non-wired, 4 KB page mapping in the chunk. + */ + freed = 0; + for (field = 0; field < _NPCM; field++) { + freemask = 0; + for (inuse = ~pc->pc_map[field] & pc_freemask[field]; + inuse != 0; inuse &= ~(1UL << bit)) { + bit = bsfl(inuse); + pv = &pc->pc_pventry[field * 32 + bit]; + va = pv->pv_va; + pte = pmap_pte_quick(pmap, va); + if ((*pte & PG_W) != 0) + continue; + tpte = pte_load_clear(pte); + if ((tpte & PG_G) != 0) + pmap_invalidate_page(pmap, va); + m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); + if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) + vm_page_dirty(m); + if ((tpte & PG_A) != 0) + vm_page_aflag_set(m, PGA_REFERENCED); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + pmap_unuse_pt(pmap, va, &free); + freemask |= 1UL << bit; + freed++; + } + pc->pc_map[field] |= freemask; + } + if (freed == 0) { + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + continue; + } + pmap->pm_stats.resident_count -= freed; + PV_STAT(pv_entry_frees += freed); + PV_STAT(pv_entry_spare += freed); + pv_entry_count -= freed; + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + for (field = 0; field < _NPCM; field++) + if (pc->pc_map[field] != pc_freemask[field]) { + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, + pc_list); + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + + /* + * One freed pv entry in locked_pmap is + * sufficient. + */ + if (pmap == locked_pmap) + goto out; + break; + } + if (field == _NPCM) { + PV_STAT(pv_entry_spare -= _NPCPV); + PV_STAT(pc_chunk_count--); + PV_STAT(pc_chunk_frees++); + /* Entire chunk is free; return it. */ + m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); + pmap_qremove((vm_offset_t)pc, 1); + pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); + break; } - if (TAILQ_EMPTY(&m->md.pv_list)) - vm_page_aflag_clear(m, PGA_WRITEABLE); } +out: sched_unpin(); + TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); + if (pmap != NULL) { + pmap_invalidate_all(pmap); + if (pmap != locked_pmap) + PMAP_UNLOCK(pmap); + } + if (m_pc == NULL && pv_vafree != 0 && free != NULL) { + m_pc = free; + free = m_pc->right; + /* Recycle a freed page table page. */ + m_pc->wire_count = 1; + atomic_add_int(&cnt.v_wire_count, 1); + } + pmap_free_zero_pages(free); + return (m_pc); } - /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { - vm_page_t m; struct pv_chunk *pc; int idx, field, bit; @@ -2046,6 +2116,15 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv) TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); return; } + free_pv_chunk(pc); +} + +static void +free_pv_chunk(struct pv_chunk *pc) +{ + vm_page_t m; + + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); @@ -2062,11 +2141,10 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv) * when needed. */ static pv_entry_t -get_pv_entry(pmap_t pmap, int try) +get_pv_entry(pmap_t pmap, boolean_t try) { static const struct timeval printinterval = { 60, 0 }; static struct timeval lastprint; - struct vpgqueues *pq; int bit, field; pv_entry_t pv; struct pv_chunk *pc; @@ -2081,7 +2159,6 @@ get_pv_entry(pmap_t pmap, int try) printf("Approaching the limit on PV entries, consider " "increasing either the vm.pmap.shpgperproc or the " "vm.pmap.pv_entry_max tunable.\n"); - pq = NULL; retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { @@ -2102,6 +2179,10 @@ retry: } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + if (pc != TAILQ_LAST(&pv_chunks, pch)) { + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); + } PV_STAT(pv_entry_spare--); return (pv); } @@ -2111,29 +2192,16 @@ retry: * queues lock. If "pv_vafree" is currently non-empty, it will * remain non-empty until pmap_ptelist_alloc() completes. */ - if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, (pq == - &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | + if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); return (NULL); } - /* - * Reclaim pv entries: At first, destroy mappings to - * inactive pages. After that, if a pv chunk entry - * is still needed, destroy mappings to active pages. - */ - if (pq == NULL) { - PV_STAT(pmap_collect_inactive++); - pq = &vm_page_queues[PQ_INACTIVE]; - } else if (pq == &vm_page_queues[PQ_INACTIVE]) { - PV_STAT(pmap_collect_active++); - pq = &vm_page_queues[PQ_ACTIVE]; - } else - panic("get_pv_entry: increase vm.pmap.shpgperproc"); - pmap_collect(pmap, pq); - goto retry; + m = pmap_pv_reclaim(pmap); + if (m == NULL) + goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); @@ -2145,6 +2213,7 @@ retry: pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ for (field = 1; field < _NPCM; field++) pc->pc_map[field] = pc_freemask[field]; + TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); @@ -3531,15 +3600,8 @@ pmap_remove_pages(pmap_t pmap) } PT_UPDATES_FLUSH(); if (allfree) { - PV_STAT(pv_entry_spare -= _NPCPV); - PV_STAT(pc_chunk_count--); - PV_STAT(pc_chunk_frees++); TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); - m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); - pmap_qremove((vm_offset_t)pc, 1); - vm_page_unwire(m, 0); - vm_page_free(m); - pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); + free_pv_chunk(pc); } } PT_UPDATES_FLUSH(); -- 2.45.0