2 * Copyright (c) 1991 Regents of the University of California.
4 * Copyright (c) 1994 John S. Dyson
6 * Copyright (c) 1994 David Greenman
8 * Copyright (c) 1998,2000 Doug Rabson
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 * must display the following acknowledgement:
25 * This product includes software developed by the University of
26 * California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
44 * from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 * with some ideas from NetBSD's alpha pmap
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
53 #include <sys/param.h>
55 #include <sys/kernel.h>
59 #include <sys/mutex.h>
61 #include <sys/rwlock.h>
63 #include <sys/sysctl.h>
64 #include <sys/systm.h>
67 #include <vm/vm_param.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_map.h>
70 #include <vm/vm_object.h>
71 #include <vm/vm_pageout.h>
74 #include <machine/bootinfo.h>
75 #include <machine/md_var.h>
76 #include <machine/pal.h>
79 * Manages physical address maps.
81 * Since the information managed by this module is
82 * also stored by the logical address mapping module,
83 * this module may throw away valid virtual-to-physical
84 * mappings at almost any time. However, invalidations
85 * of virtual-to-physical mappings must be done as
88 * In order to cope with hardware architectures which
89 * make virtual-to-physical map invalidates expensive,
90 * this module may delay invalidate or reduced protection
91 * operations until such time as they are actually
92 * necessary. This module is given full information as
93 * to which processors are currently using which maps,
94 * and to when physical maps must be made correct.
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
105 * Region 0-3: User virtually mapped
106 * Region 4: PBVM and special mappings
107 * Region 5: Kernel virtual memory
108 * Region 6: Direct-mapped uncacheable
109 * Region 7: Direct-mapped cacheable
112 /* XXX move to a header. */
113 extern uint64_t ia64_gateway_page[];
115 #if !defined(DIAGNOSTIC)
116 #define PMAP_INLINE __inline
122 #define PV_STAT(x) do { x ; } while (0)
124 #define PV_STAT(x) do { } while (0)
127 #define pmap_accessed(lpte) ((lpte)->pte & PTE_ACCESSED)
128 #define pmap_dirty(lpte) ((lpte)->pte & PTE_DIRTY)
129 #define pmap_exec(lpte) ((lpte)->pte & PTE_AR_RX)
130 #define pmap_managed(lpte) ((lpte)->pte & PTE_MANAGED)
131 #define pmap_ppn(lpte) ((lpte)->pte & PTE_PPN_MASK)
132 #define pmap_present(lpte) ((lpte)->pte & PTE_PRESENT)
133 #define pmap_prot(lpte) (((lpte)->pte & PTE_PROT_MASK) >> 56)
134 #define pmap_wired(lpte) ((lpte)->pte & PTE_WIRED)
136 #define pmap_clear_accessed(lpte) (lpte)->pte &= ~PTE_ACCESSED
137 #define pmap_clear_dirty(lpte) (lpte)->pte &= ~PTE_DIRTY
138 #define pmap_clear_present(lpte) (lpte)->pte &= ~PTE_PRESENT
139 #define pmap_clear_wired(lpte) (lpte)->pte &= ~PTE_WIRED
141 #define pmap_set_wired(lpte) (lpte)->pte |= PTE_WIRED
144 * Individual PV entries are stored in per-pmap chunks. This saves
145 * space by eliminating the need to record the pmap within every PV
148 #if PAGE_SIZE == 8192
152 #elif PAGE_SIZE == 16384
159 TAILQ_ENTRY(pv_chunk) pc_list;
160 u_long pc_map[_NPCM]; /* bitmap; 1 = free */
161 TAILQ_ENTRY(pv_chunk) pc_lru;
162 u_long pc_spare[_NPCS];
163 struct pv_entry pc_pventry[_NPCPV];
167 * The VHPT bucket head structure.
176 * Statically allocated kernel pmap
178 struct pmap kernel_pmap_store;
180 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
181 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
184 * Kernel virtual memory management.
187 extern struct ia64_lpte ***ia64_kptdir;
189 #define KPTE_DIR0_INDEX(va) \
190 (((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
191 #define KPTE_DIR1_INDEX(va) \
192 (((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
193 #define KPTE_PTE_INDEX(va) \
194 (((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
195 #define NKPTEPG (PAGE_SIZE / sizeof(struct ia64_lpte))
197 vm_offset_t kernel_vm_end;
199 /* Defaults for ptc.e. */
200 static uint64_t pmap_ptc_e_base = 0;
201 static uint32_t pmap_ptc_e_count1 = 1;
202 static uint32_t pmap_ptc_e_count2 = 1;
203 static uint32_t pmap_ptc_e_stride1 = 0;
204 static uint32_t pmap_ptc_e_stride2 = 0;
206 struct mtx pmap_ptc_mutex;
209 * Data for the RID allocator
211 static int pmap_ridcount;
212 static int pmap_rididx;
213 static int pmap_ridmapsz;
214 static int pmap_ridmax;
215 static uint64_t *pmap_ridmap;
216 struct mtx pmap_ridmutex;
218 static struct rwlock_padalign pvh_global_lock;
221 * Data for the pv entry allocation mechanism
223 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
224 static int pv_entry_count;
227 * Data for allocating PTEs for user processes.
229 static uma_zone_t ptezone;
232 * Virtual Hash Page Table (VHPT) data.
234 /* SYSCTL_DECL(_machdep); */
235 static SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
237 struct ia64_bucket *pmap_vhpt_bucket;
239 int pmap_vhpt_nbuckets;
240 SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
241 &pmap_vhpt_nbuckets, 0, "");
243 int pmap_vhpt_log2size = 0;
244 TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
245 SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
246 &pmap_vhpt_log2size, 0, "");
248 static int pmap_vhpt_inserts;
249 SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
250 &pmap_vhpt_inserts, 0, "");
252 static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
253 SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
254 NULL, 0, pmap_vhpt_population, "I", "");
256 static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
258 static void free_pv_chunk(struct pv_chunk *pc);
259 static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
260 static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
261 static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
263 static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
264 vm_page_t m, vm_prot_t prot);
265 static void pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
266 static int pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
267 vm_offset_t va, pv_entry_t pv, int freepte);
268 static int pmap_remove_vhpt(vm_offset_t va);
269 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
273 pmap_initialize_vhpt(vm_offset_t vhpt)
275 struct ia64_lpte *pte;
278 pte = (struct ia64_lpte *)vhpt;
279 for (i = 0; i < pmap_vhpt_nbuckets; i++) {
282 pte[i].tag = 1UL << 63; /* Invalid tag */
283 pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
289 pmap_alloc_vhpt(void)
295 size = 1UL << pmap_vhpt_log2size;
296 m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ |
297 VM_ALLOC_WIRED, atop(size), 0UL, ~0UL, size, 0UL,
300 vhpt = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
301 pmap_initialize_vhpt(vhpt);
309 * Bootstrap the system enough to run with virtual memory.
314 struct ia64_pal_result res;
320 * Query the PAL Code to find the loop parameters for the
323 res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
324 if (res.pal_status != 0)
325 panic("Can't configure ptc.e parameters");
326 pmap_ptc_e_base = res.pal_result[0];
327 pmap_ptc_e_count1 = res.pal_result[1] >> 32;
328 pmap_ptc_e_count2 = res.pal_result[1];
329 pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
330 pmap_ptc_e_stride2 = res.pal_result[2];
332 printf("ptc.e base=0x%lx, count1=%u, count2=%u, "
333 "stride1=0x%x, stride2=0x%x\n",
340 mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN);
343 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
345 * We currently need at least 19 bits in the RID because PID_MAX
346 * can only be encoded in 17 bits and we need RIDs for 4 regions
347 * per process. With PID_MAX equalling 99999 this means that we
348 * need to be able to encode 399996 (=4*PID_MAX).
349 * The Itanium processor only has 18 bits and the architected
350 * minimum is exactly that. So, we cannot use a PID based scheme
351 * in those cases. Enter pmap_ridmap...
352 * We should avoid the map when running on a processor that has
353 * implemented enough bits. This means that we should pass the
354 * process/thread ID to pmap. This we currently don't do, so we
355 * use the map anyway. However, we don't want to allocate a map
356 * that is large enough to cover the range dictated by the number
357 * of bits in the RID, because that may result in a RID map of
358 * 2MB in size for a 24-bit RID. A 64KB map is enough.
359 * The bottomline: we create a 32KB map when the processor only
360 * implements 18 bits (or when we can't figure it out). Otherwise
361 * we create a 64KB map.
363 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
364 if (res.pal_status != 0) {
366 printf("Can't read VM Summary - assuming 18 Region ID bits\n");
367 ridbits = 18; /* guaranteed minimum */
369 ridbits = (res.pal_result[1] >> 8) & 0xff;
371 printf("Processor supports %d Region ID bits\n",
377 pmap_ridmax = (1 << ridbits);
378 pmap_ridmapsz = pmap_ridmax / 64;
379 pmap_ridmap = ia64_physmem_alloc(pmap_ridmax / 8, PAGE_SIZE);
380 pmap_ridmap[0] |= 0xff;
383 mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
386 * Allocate some memory for initial kernel 'page tables'.
388 ia64_kptdir = ia64_physmem_alloc(PAGE_SIZE, PAGE_SIZE);
390 kernel_vm_end = VM_INIT_KERNEL_ADDRESS;
393 * Determine a valid (mappable) VHPT size.
395 TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
396 if (pmap_vhpt_log2size == 0)
397 pmap_vhpt_log2size = 20;
398 else if (pmap_vhpt_log2size < 16)
399 pmap_vhpt_log2size = 16;
400 else if (pmap_vhpt_log2size > 28)
401 pmap_vhpt_log2size = 28;
402 if (pmap_vhpt_log2size & 1)
403 pmap_vhpt_log2size--;
405 size = 1UL << pmap_vhpt_log2size;
406 base = (uintptr_t)ia64_physmem_alloc(size, size);
408 panic("Unable to allocate VHPT");
410 PCPU_SET(md.vhpt, base);
412 printf("VHPT: address=%#lx, size=%#lx\n", base, size);
414 pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
415 pmap_vhpt_bucket = ia64_physmem_alloc(pmap_vhpt_nbuckets *
416 sizeof(struct ia64_bucket), PAGE_SIZE);
417 for (i = 0; i < pmap_vhpt_nbuckets; i++) {
418 /* Stolen memory is zeroed. */
419 mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
420 MTX_NOWITNESS | MTX_SPIN);
423 pmap_initialize_vhpt(base);
425 ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
428 virtual_avail = VM_INIT_KERNEL_ADDRESS;
429 virtual_end = VM_MAX_KERNEL_ADDRESS;
432 * Initialize the kernel pmap (which is statically allocated).
434 PMAP_LOCK_INIT(kernel_pmap);
435 for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
436 kernel_pmap->pm_rid[i] = 0;
437 TAILQ_INIT(&kernel_pmap->pm_pvchunk);
438 PCPU_SET(md.current_pmap, kernel_pmap);
441 * Initialize the global pv list lock.
443 rw_init(&pvh_global_lock, "pmap pv global");
445 /* Region 5 is mapped via the VHPT. */
446 ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1);
449 * Clear out any random TLB entries left over from booting.
451 pmap_invalidate_all();
457 pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
462 for (i = 0; i < pmap_vhpt_nbuckets; i++)
463 count += pmap_vhpt_bucket[i].length;
465 error = SYSCTL_OUT(req, &count, sizeof(count));
470 pmap_page_to_va(vm_page_t m)
475 pa = VM_PAGE_TO_PHYS(m);
476 va = (m->md.memattr == VM_MEMATTR_UNCACHEABLE) ? IA64_PHYS_TO_RR6(pa) :
477 IA64_PHYS_TO_RR7(pa);
482 * Initialize a vm_page's machine-dependent fields.
485 pmap_page_init(vm_page_t m)
488 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
490 TAILQ_INIT(&m->md.pv_list);
491 m->md.memattr = VM_MEMATTR_DEFAULT;
495 * Initialize the pmap module.
496 * Called by vm_init, to initialize any structures that the pmap
497 * system needs to map virtual memory.
503 CTR1(KTR_PMAP, "%s()", __func__);
505 ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
506 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
510 /***************************************************
511 * Manipulate TLBs for a pmap
512 ***************************************************/
515 pmap_invalidate_page(vm_offset_t va)
517 struct ia64_lpte *pte;
524 vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
526 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
527 pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
528 atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
531 mtx_lock_spin(&pmap_ptc_mutex);
533 ia64_ptc_ga(va, PAGE_SHIFT << 2);
537 mtx_unlock_spin(&pmap_ptc_mutex);
545 pmap_invalidate_all(void)
550 addr = pmap_ptc_e_base;
551 for (i = 0; i < pmap_ptc_e_count1; i++) {
552 for (j = 0; j < pmap_ptc_e_count2; j++) {
554 addr += pmap_ptc_e_stride2;
556 addr += pmap_ptc_e_stride1;
562 pmap_allocate_rid(void)
567 mtx_lock(&pmap_ridmutex);
568 if (pmap_ridcount == pmap_ridmax)
569 panic("pmap_allocate_rid: All Region IDs used");
571 /* Find an index with a free bit. */
572 while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
574 if (pmap_rididx == pmap_ridmapsz)
577 rid = pmap_rididx * 64;
579 /* Find a free bit. */
586 pmap_ridmap[pmap_rididx] |= bit;
588 mtx_unlock(&pmap_ridmutex);
594 pmap_free_rid(uint32_t rid)
600 bit = ~(1UL << (rid & 63));
602 mtx_lock(&pmap_ridmutex);
603 pmap_ridmap[idx] &= bit;
605 mtx_unlock(&pmap_ridmutex);
608 /***************************************************
609 * Page table page management routines.....
610 ***************************************************/
613 pmap_pinit_common(pmap_t pmap)
617 for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
618 pmap->pm_rid[i] = pmap_allocate_rid();
619 TAILQ_INIT(&pmap->pm_pvchunk);
620 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
624 pmap_pinit0(pmap_t pmap)
627 CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
629 PMAP_LOCK_INIT(pmap);
630 pmap_pinit_common(pmap);
634 * Initialize a preallocated and zeroed pmap structure,
635 * such as one in a vmspace structure.
638 pmap_pinit(pmap_t pmap)
641 CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
643 pmap_pinit_common(pmap);
647 /***************************************************
648 * Pmap allocation/deallocation routines.
649 ***************************************************/
652 * Release any resources held by the given physical map.
653 * Called when a pmap initialized by pmap_pinit is being released.
654 * Should only be called if the map contains no valid mappings.
657 pmap_release(pmap_t pmap)
661 CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
663 for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
665 pmap_free_rid(pmap->pm_rid[i]);
669 * grow the number of kernel page table entries, if needed
672 pmap_growkernel(vm_offset_t addr)
674 struct ia64_lpte **dir1;
675 struct ia64_lpte *leaf;
678 CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, addr);
680 while (kernel_vm_end <= addr) {
681 if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
682 panic("%s: out of kernel address space", __func__);
684 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
686 nkpg = vm_page_alloc(NULL, nkpt++,
687 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
689 panic("%s: cannot add dir. page", __func__);
691 dir1 = (struct ia64_lpte **)pmap_page_to_va(nkpg);
692 bzero(dir1, PAGE_SIZE);
693 ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
696 nkpg = vm_page_alloc(NULL, nkpt++,
697 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
699 panic("%s: cannot add PTE page", __func__);
701 leaf = (struct ia64_lpte *)pmap_page_to_va(nkpg);
702 bzero(leaf, PAGE_SIZE);
703 dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
705 kernel_vm_end += PAGE_SIZE * NKPTEPG;
709 /***************************************************
710 * page management routines.
711 ***************************************************/
713 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
715 static __inline struct pv_chunk *
716 pv_to_chunk(pv_entry_t pv)
719 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
722 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
724 #define PC_FREE_FULL 0xfffffffffffffffful
725 #define PC_FREE_PARTIAL \
726 ((1UL << (_NPCPV - sizeof(u_long) * 8 * (_NPCM - 1))) - 1)
728 #if PAGE_SIZE == 8192
729 static const u_long pc_freemask[_NPCM] = {
730 PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
731 PC_FREE_FULL, PC_FREE_FULL, PC_FREE_PARTIAL
733 #elif PAGE_SIZE == 16384
734 static const u_long pc_freemask[_NPCM] = {
735 PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
736 PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
737 PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
738 PC_FREE_FULL, PC_FREE_PARTIAL
742 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
744 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
745 "Current number of pv entries");
748 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
750 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
751 "Current number of pv entry chunks");
752 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
753 "Current number of pv entry chunks allocated");
754 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
755 "Current number of pv entry chunks frees");
756 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
757 "Number of times tried to get a chunk page but failed.");
759 static long pv_entry_frees, pv_entry_allocs;
760 static int pv_entry_spare;
762 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
763 "Current number of pv entry frees");
764 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
765 "Current number of pv entry allocs");
766 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
767 "Current number of spare pv entries");
771 * We are in a serious low memory condition. Resort to
772 * drastic measures to free some pages so we can allocate
773 * another pv entry chunk.
776 pmap_pv_reclaim(pmap_t locked_pmap)
780 struct ia64_lpte *pte;
786 int bit, field, freed, idx;
788 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
791 TAILQ_INIT(&newtail);
792 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
793 TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
794 if (pmap != pc->pc_pmap) {
796 if (pmap != locked_pmap) {
797 pmap_switch(locked_pmap);
802 /* Avoid deadlock and lock recursion. */
803 if (pmap > locked_pmap)
805 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
807 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
814 * Destroy every non-wired, 8 KB page mapping in the chunk.
817 for (field = 0; field < _NPCM; field++) {
818 for (inuse = ~pc->pc_map[field] & pc_freemask[field];
819 inuse != 0; inuse &= ~(1UL << bit)) {
820 bit = ffsl(inuse) - 1;
821 idx = field * sizeof(inuse) * NBBY + bit;
822 pv = &pc->pc_pventry[idx];
824 pte = pmap_find_vhpt(va);
825 KASSERT(pte != NULL, ("pte"));
828 pmap_remove_vhpt(va);
829 pmap_invalidate_page(va);
830 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
831 if (pmap_accessed(pte))
832 vm_page_aflag_set(m, PGA_REFERENCED);
835 pmap_free_pte(pte, va);
836 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
837 if (TAILQ_EMPTY(&m->md.pv_list))
838 vm_page_aflag_clear(m, PGA_WRITEABLE);
839 pc->pc_map[field] |= 1UL << bit;
844 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
847 /* Every freed mapping is for a 8 KB page. */
848 pmap->pm_stats.resident_count -= freed;
849 PV_STAT(pv_entry_frees += freed);
850 PV_STAT(pv_entry_spare += freed);
851 pv_entry_count -= freed;
852 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
853 for (field = 0; field < _NPCM; field++)
854 if (pc->pc_map[field] != pc_freemask[field]) {
855 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
857 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
860 * One freed pv entry in locked_pmap is
863 if (pmap == locked_pmap)
867 if (field == _NPCM) {
868 PV_STAT(pv_entry_spare -= _NPCPV);
869 PV_STAT(pc_chunk_count--);
870 PV_STAT(pc_chunk_frees++);
871 /* Entire chunk is free; return it. */
872 m_pc = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
877 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
879 if (pmap != locked_pmap) {
880 pmap_switch(locked_pmap);
888 * free the pv_entry back to the free list
891 free_pv_entry(pmap_t pmap, pv_entry_t pv)
896 rw_assert(&pvh_global_lock, RA_WLOCKED);
897 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
898 PV_STAT(pv_entry_frees++);
899 PV_STAT(pv_entry_spare++);
901 pc = pv_to_chunk(pv);
902 idx = pv - &pc->pc_pventry[0];
903 field = idx / (sizeof(u_long) * NBBY);
904 bit = idx % (sizeof(u_long) * NBBY);
905 pc->pc_map[field] |= 1ul << bit;
906 for (idx = 0; idx < _NPCM; idx++)
907 if (pc->pc_map[idx] != pc_freemask[idx]) {
909 * 98% of the time, pc is already at the head of the
910 * list. If it isn't already, move it to the head.
912 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
914 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
915 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
920 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
925 free_pv_chunk(struct pv_chunk *pc)
929 TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
930 PV_STAT(pv_entry_spare -= _NPCPV);
931 PV_STAT(pc_chunk_count--);
932 PV_STAT(pc_chunk_frees++);
933 /* entire chunk is free, return it */
934 m = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
935 vm_page_unwire(m, 0);
940 * get a new pv_entry, allocating a block from the system
944 get_pv_entry(pmap_t pmap, boolean_t try)
951 rw_assert(&pvh_global_lock, RA_WLOCKED);
952 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
953 PV_STAT(pv_entry_allocs++);
956 pc = TAILQ_FIRST(&pmap->pm_pvchunk);
958 for (field = 0; field < _NPCM; field++) {
959 if (pc->pc_map[field]) {
960 bit = ffsl(pc->pc_map[field]) - 1;
965 idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
966 pv = &pc->pc_pventry[idx];
967 pc->pc_map[field] &= ~(1ul << bit);
968 /* If this was the last item, move it to tail */
969 for (field = 0; field < _NPCM; field++)
970 if (pc->pc_map[field] != 0) {
971 PV_STAT(pv_entry_spare--);
972 return (pv); /* not full, return */
974 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
975 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
976 PV_STAT(pv_entry_spare--);
980 /* No free items, allocate another chunk */
981 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
986 PV_STAT(pc_chunk_tryfail++);
989 m = pmap_pv_reclaim(pmap);
993 PV_STAT(pc_chunk_count++);
994 PV_STAT(pc_chunk_allocs++);
995 pc = (struct pv_chunk *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
997 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
998 for (field = 1; field < _NPCM; field++)
999 pc->pc_map[field] = pc_freemask[field];
1000 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1001 pv = &pc->pc_pventry[0];
1002 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1003 PV_STAT(pv_entry_spare += _NPCPV - 1);
1008 * Conditionally create a pv entry.
1011 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1015 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1016 rw_assert(&pvh_global_lock, RA_WLOCKED);
1017 if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
1019 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1026 * Add an ia64_lpte to the VHPT.
1029 pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1031 struct ia64_bucket *bckt;
1032 struct ia64_lpte *vhpte;
1035 /* Can fault, so get it out of the way. */
1036 pte_pa = ia64_tpa((vm_offset_t)pte);
1038 vhpte = (struct ia64_lpte *)ia64_thash(va);
1039 bckt = (struct ia64_bucket *)vhpte->chain;
1041 mtx_lock_spin(&bckt->mutex);
1042 pte->chain = bckt->chain;
1044 bckt->chain = pte_pa;
1046 pmap_vhpt_inserts++;
1048 mtx_unlock_spin(&bckt->mutex);
1052 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1053 * worked or an appropriate error code otherwise.
1056 pmap_remove_vhpt(vm_offset_t va)
1058 struct ia64_bucket *bckt;
1059 struct ia64_lpte *pte;
1060 struct ia64_lpte *lpte;
1061 struct ia64_lpte *vhpte;
1062 uint64_t chain, tag;
1064 tag = ia64_ttag(va);
1065 vhpte = (struct ia64_lpte *)ia64_thash(va);
1066 bckt = (struct ia64_bucket *)vhpte->chain;
1069 mtx_lock_spin(&bckt->mutex);
1070 chain = bckt->chain;
1071 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1072 while (chain != 0 && pte->tag != tag) {
1075 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1078 mtx_unlock_spin(&bckt->mutex);
1082 /* Snip this pv_entry out of the collision chain. */
1084 bckt->chain = pte->chain;
1086 lpte->chain = pte->chain;
1090 mtx_unlock_spin(&bckt->mutex);
1095 * Find the ia64_lpte for the given va, if any.
1097 static struct ia64_lpte *
1098 pmap_find_vhpt(vm_offset_t va)
1100 struct ia64_bucket *bckt;
1101 struct ia64_lpte *pte;
1102 uint64_t chain, tag;
1104 tag = ia64_ttag(va);
1105 pte = (struct ia64_lpte *)ia64_thash(va);
1106 bckt = (struct ia64_bucket *)pte->chain;
1108 mtx_lock_spin(&bckt->mutex);
1109 chain = bckt->chain;
1110 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1111 while (chain != 0 && pte->tag != tag) {
1113 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1115 mtx_unlock_spin(&bckt->mutex);
1116 return ((chain != 0) ? pte : NULL);
1120 * Remove an entry from the list of managed mappings.
1123 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1126 rw_assert(&pvh_global_lock, RA_WLOCKED);
1128 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1129 if (pmap == PV_PMAP(pv) && va == pv->pv_va)
1135 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1136 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1137 vm_page_aflag_clear(m, PGA_WRITEABLE);
1139 free_pv_entry(pmap, pv);
1147 * Create a pv entry for page at pa for
1151 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1155 rw_assert(&pvh_global_lock, RA_WLOCKED);
1156 pv = get_pv_entry(pmap, FALSE);
1158 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1162 * Routine: pmap_extract
1164 * Extract the physical page address associated
1165 * with the given map/virtual_address pair.
1168 pmap_extract(pmap_t pmap, vm_offset_t va)
1170 struct ia64_lpte *pte;
1174 CTR3(KTR_PMAP, "%s(pm=%p, va=%#lx)", __func__, pmap, va);
1178 oldpmap = pmap_switch(pmap);
1179 pte = pmap_find_vhpt(va);
1180 if (pte != NULL && pmap_present(pte))
1182 pmap_switch(oldpmap);
1188 * Routine: pmap_extract_and_hold
1190 * Atomically extract and hold the physical page
1191 * with the given pmap and virtual address pair
1192 * if that mapping permits the given protection.
1195 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1197 struct ia64_lpte *pte;
1202 CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, prot=%#x)", __func__, pmap, va,
1208 oldpmap = pmap_switch(pmap);
1210 pte = pmap_find_vhpt(va);
1211 if (pte != NULL && pmap_present(pte) &&
1212 (pmap_prot(pte) & prot) == prot) {
1213 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1214 if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa))
1219 pmap_switch(oldpmap);
1224 /***************************************************
1225 * Low level mapping routines.....
1226 ***************************************************/
1229 * Find the kernel lpte for mapping the given virtual address, which
1230 * must be in the part of region 5 which we can cover with our kernel
1233 static struct ia64_lpte *
1234 pmap_find_kpte(vm_offset_t va)
1236 struct ia64_lpte **dir1;
1237 struct ia64_lpte *leaf;
1239 KASSERT((va >> 61) == 5,
1240 ("kernel mapping 0x%lx not in region 5", va));
1241 KASSERT(va < kernel_vm_end,
1242 ("kernel mapping 0x%lx out of range", va));
1244 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1245 leaf = dir1[KPTE_DIR1_INDEX(va)];
1246 return (&leaf[KPTE_PTE_INDEX(va)]);
1250 * Find a pte suitable for mapping a user-space address. If one exists
1251 * in the VHPT, that one will be returned, otherwise a new pte is
1254 static struct ia64_lpte *
1255 pmap_find_pte(vm_offset_t va)
1257 struct ia64_lpte *pte;
1259 if (va >= VM_MAXUSER_ADDRESS)
1260 return pmap_find_kpte(va);
1262 pte = pmap_find_vhpt(va);
1264 pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1265 pte->tag = 1UL << 63;
1271 * Free a pte which is now unused. This simply returns it to the zone
1272 * allocator if it is a user mapping. For kernel mappings, clear the
1273 * valid bit to make it clear that the mapping is not currently used.
1276 pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1278 if (va < VM_MAXUSER_ADDRESS)
1279 uma_zfree(ptezone, pte);
1281 pmap_clear_present(pte);
1284 static PMAP_INLINE void
1285 pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1287 static long prot2ar[4] = {
1288 PTE_AR_R, /* VM_PROT_NONE */
1289 PTE_AR_RW, /* VM_PROT_WRITE */
1290 PTE_AR_RX|PTE_ED, /* VM_PROT_EXECUTE */
1291 PTE_AR_RWX|PTE_ED /* VM_PROT_WRITE|VM_PROT_EXECUTE */
1294 pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1295 pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1296 pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1297 ? PTE_PL_KERN : PTE_PL_USER;
1298 pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1301 static PMAP_INLINE void
1302 pmap_pte_attr(struct ia64_lpte *pte, vm_memattr_t ma)
1305 pte->pte &= ~PTE_MA_MASK;
1306 pte->pte |= (ma & PTE_MA_MASK);
1310 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1311 * the pte was orginally valid, then its assumed to already be in the
1313 * This functions does not set the protection bits. It's expected
1314 * that those have been set correctly prior to calling this function.
1317 pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1318 boolean_t wired, boolean_t managed)
1321 pte->pte &= PTE_PROT_MASK | PTE_MA_MASK | PTE_PL_MASK |
1322 PTE_AR_MASK | PTE_ED;
1323 pte->pte |= PTE_PRESENT;
1324 pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1325 pte->pte |= (wired) ? PTE_WIRED : 0;
1326 pte->pte |= pa & PTE_PPN_MASK;
1328 pte->itir = PAGE_SHIFT << 2;
1332 pte->tag = ia64_ttag(va);
1336 * Remove the (possibly managed) mapping represented by pte from the
1340 pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1341 pv_entry_t pv, int freepte)
1347 * First remove from the VHPT.
1349 error = pmap_remove_vhpt(va);
1350 KASSERT(error == 0, ("%s: pmap_remove_vhpt returned %d",
1353 pmap_invalidate_page(va);
1355 if (pmap_wired(pte))
1356 pmap->pm_stats.wired_count -= 1;
1358 pmap->pm_stats.resident_count -= 1;
1359 if (pmap_managed(pte)) {
1360 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1361 if (pmap_dirty(pte))
1363 if (pmap_accessed(pte))
1364 vm_page_aflag_set(m, PGA_REFERENCED);
1366 error = pmap_remove_entry(pmap, m, va, pv);
1369 pmap_free_pte(pte, va);
1375 * Extract the physical page address associated with a kernel
1379 pmap_kextract(vm_offset_t va)
1381 struct ia64_lpte *pte;
1382 uint64_t *pbvm_pgtbl;
1386 CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, va);
1388 KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA"));
1390 /* Regions 6 and 7 are direct mapped. */
1391 if (va >= IA64_RR_BASE(6)) {
1392 pa = IA64_RR_MASK(va);
1396 /* Region 5 is our KVA. Bail out if the VA is beyond our limits. */
1397 if (va >= kernel_vm_end)
1399 if (va >= VM_INIT_KERNEL_ADDRESS) {
1400 pte = pmap_find_kpte(va);
1401 pa = pmap_present(pte) ? pmap_ppn(pte) | (va & PAGE_MASK) : 0;
1405 /* The PBVM page table. */
1406 if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz)
1408 if (va >= IA64_PBVM_PGTBL) {
1409 pa = (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl;
1413 /* The PBVM itself. */
1414 if (va >= IA64_PBVM_BASE) {
1415 pbvm_pgtbl = (void *)IA64_PBVM_PGTBL;
1416 idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT;
1417 if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3))
1419 if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0)
1421 pa = (pbvm_pgtbl[idx] & PTE_PPN_MASK) +
1422 (va & IA64_PBVM_PAGE_MASK);
1427 printf("XXX: %s: va=%#lx is invalid\n", __func__, va);
1436 * Add a list of wired pages to the kva this routine is only used for
1437 * temporary kernel mappings that do not need to have page modification
1438 * or references recorded. Note that old mappings are simply written
1439 * over. The page is effectively wired, but it's customary to not have
1440 * the PTE reflect that, nor update statistics.
1443 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1445 struct ia64_lpte *pte;
1448 CTR4(KTR_PMAP, "%s(va=%#lx, m_p=%p, cnt=%d)", __func__, va, m, count);
1450 for (i = 0; i < count; i++) {
1451 pte = pmap_find_kpte(va);
1452 if (pmap_present(pte))
1453 pmap_invalidate_page(va);
1455 pmap_enter_vhpt(pte, va);
1456 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1457 pmap_pte_attr(pte, m[i]->md.memattr);
1458 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1464 * this routine jerks page mappings from the
1465 * kernel -- it is meant only for temporary mappings.
1468 pmap_qremove(vm_offset_t va, int count)
1470 struct ia64_lpte *pte;
1473 CTR3(KTR_PMAP, "%s(va=%#lx, cnt=%d)", __func__, va, count);
1475 for (i = 0; i < count; i++) {
1476 pte = pmap_find_kpte(va);
1477 if (pmap_present(pte)) {
1478 pmap_remove_vhpt(va);
1479 pmap_invalidate_page(va);
1480 pmap_clear_present(pte);
1487 * Add a wired page to the kva. As for pmap_qenter(), it's customary
1488 * to not have the PTE reflect that, nor update statistics.
1491 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
1493 struct ia64_lpte *pte;
1495 CTR3(KTR_PMAP, "%s(va=%#lx, pa=%#lx)", __func__, va, pa);
1497 pte = pmap_find_kpte(va);
1498 if (pmap_present(pte))
1499 pmap_invalidate_page(va);
1501 pmap_enter_vhpt(pte, va);
1502 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1503 pmap_pte_attr(pte, VM_MEMATTR_DEFAULT);
1504 pmap_set_pte(pte, va, pa, FALSE, FALSE);
1508 * Remove a page from the kva
1511 pmap_kremove(vm_offset_t va)
1513 struct ia64_lpte *pte;
1515 CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, va);
1517 pte = pmap_find_kpte(va);
1518 if (pmap_present(pte)) {
1519 pmap_remove_vhpt(va);
1520 pmap_invalidate_page(va);
1521 pmap_clear_present(pte);
1526 * Used to map a range of physical addresses into kernel
1527 * virtual address space.
1529 * The value passed in '*virt' is a suggested virtual address for
1530 * the mapping. Architectures which can support a direct-mapped
1531 * physical to virtual region can return the appropriate address
1532 * within that region, leaving '*virt' unchanged. Other
1533 * architectures should map the pages starting at '*virt' and
1534 * update '*virt' with the first usable address after the mapped
1538 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1541 CTR5(KTR_PMAP, "%s(va_p=%p, sva=%#lx, eva=%#lx, prot=%#x)", __func__,
1542 virt, start, end, prot);
1544 return IA64_PHYS_TO_RR7(start);
1548 * Remove the given range of addresses from the specified map.
1550 * It is assumed that the start and end are properly
1551 * rounded to the page size.
1553 * Sparsely used ranges are inefficiently removed. The VHPT is
1554 * probed for every page within the range. XXX
1557 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1561 struct ia64_lpte *pte;
1563 CTR4(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx)", __func__, pmap, sva,
1567 * Perform an unsynchronized read. This is, however, safe.
1569 if (pmap->pm_stats.resident_count == 0)
1572 rw_wlock(&pvh_global_lock);
1574 oldpmap = pmap_switch(pmap);
1575 for (va = sva; va < eva; va += PAGE_SIZE) {
1576 pte = pmap_find_vhpt(va);
1578 pmap_remove_pte(pmap, pte, va, 0, 1);
1580 rw_wunlock(&pvh_global_lock);
1581 pmap_switch(oldpmap);
1586 * Routine: pmap_remove_all
1588 * Removes this physical page from
1589 * all physical maps in which it resides.
1590 * Reflects back modify bits to the pager.
1593 * Original versions of this routine were very
1594 * inefficient because they iteratively called
1595 * pmap_remove (slow...)
1598 pmap_remove_all(vm_page_t m)
1603 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
1605 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1606 ("pmap_remove_all: page %p is not managed", m));
1607 rw_wlock(&pvh_global_lock);
1608 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1609 struct ia64_lpte *pte;
1610 pmap_t pmap = PV_PMAP(pv);
1611 vm_offset_t va = pv->pv_va;
1614 oldpmap = pmap_switch(pmap);
1615 pte = pmap_find_vhpt(va);
1616 KASSERT(pte != NULL, ("pte"));
1617 if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1618 panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1619 pmap_remove_pte(pmap, pte, va, pv, 1);
1620 pmap_switch(oldpmap);
1623 vm_page_aflag_clear(m, PGA_WRITEABLE);
1624 rw_wunlock(&pvh_global_lock);
1628 * Set the physical protection on the
1629 * specified range of this map as requested.
1632 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1635 struct ia64_lpte *pte;
1637 CTR5(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, prot=%#x)", __func__,
1638 pmap, sva, eva, prot);
1640 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1641 pmap_remove(pmap, sva, eva);
1645 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1646 (VM_PROT_WRITE|VM_PROT_EXECUTE))
1649 if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1650 panic("pmap_protect: unaligned addresses");
1653 oldpmap = pmap_switch(pmap);
1654 for ( ; sva < eva; sva += PAGE_SIZE) {
1655 /* If page is invalid, skip this page */
1656 pte = pmap_find_vhpt(sva);
1660 /* If there's no change, skip it too */
1661 if (pmap_prot(pte) == prot)
1664 if ((prot & VM_PROT_WRITE) == 0 &&
1665 pmap_managed(pte) && pmap_dirty(pte)) {
1666 vm_paddr_t pa = pmap_ppn(pte);
1667 vm_page_t m = PHYS_TO_VM_PAGE(pa);
1670 pmap_clear_dirty(pte);
1673 if (prot & VM_PROT_EXECUTE)
1674 ia64_sync_icache(sva, PAGE_SIZE);
1676 pmap_pte_prot(pmap, pte, prot);
1677 pmap_invalidate_page(sva);
1679 pmap_switch(oldpmap);
1684 * Insert the given physical page (p) at
1685 * the specified virtual address (v) in the
1686 * target physical map with the protection requested.
1688 * If specified, the page will be wired down, meaning
1689 * that the related pte can not be reclaimed.
1691 * NB: This is the only routine which MAY NOT lazy-evaluate
1692 * or lose information. That is, this routine must actually
1693 * insert this page into the given map NOW.
1696 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1697 u_int flags, int8_t psind __unused)
1702 struct ia64_lpte origpte;
1703 struct ia64_lpte *pte;
1704 boolean_t icache_inval, managed, wired;
1706 CTR5(KTR_PMAP, "pmap_enter(pm=%p, va=%#lx, m=%p, prot=%#x, "
1707 "flags=%u)", pmap, va, m, prot, flags);
1709 wired = (flags & PMAP_ENTER_WIRED) != 0;
1710 rw_wlock(&pvh_global_lock);
1712 oldpmap = pmap_switch(pmap);
1715 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1716 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
1717 VM_OBJECT_ASSERT_LOCKED(m->object);
1720 * Find (or create) a pte for the given mapping.
1722 while ((pte = pmap_find_pte(va)) == NULL) {
1723 pmap_switch(oldpmap);
1725 rw_wunlock(&pvh_global_lock);
1726 if ((flags & PMAP_ENTER_NOSLEEP) != 0)
1727 return (KERN_RESOURCE_SHORTAGE);
1729 rw_wlock(&pvh_global_lock);
1731 oldpmap = pmap_switch(pmap);
1734 if (!pmap_present(pte)) {
1736 pmap_enter_vhpt(pte, va);
1738 opa = pmap_ppn(pte);
1740 pa = VM_PAGE_TO_PHYS(m);
1742 icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1745 * Mapping has not changed, must be protection or wiring change.
1749 * Wiring change, just update stats. We don't worry about
1750 * wiring PT pages as they remain resident as long as there
1751 * are valid mappings in them. Hence, if a user page is wired,
1752 * the PT page will be also.
1754 if (wired && !pmap_wired(&origpte))
1755 pmap->pm_stats.wired_count++;
1756 else if (!wired && pmap_wired(&origpte))
1757 pmap->pm_stats.wired_count--;
1759 managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1762 * We might be turning off write access to the page,
1763 * so we go ahead and sense modify status. Otherwise,
1764 * we can avoid I-cache invalidation if the page
1765 * already allowed execution.
1767 if (managed && pmap_dirty(&origpte))
1769 else if (pmap_exec(&origpte))
1770 icache_inval = FALSE;
1772 pmap_invalidate_page(va);
1777 * Mapping has changed, invalidate old range and fall
1778 * through to handle validating new mapping.
1781 pmap_remove_pte(pmap, pte, va, 0, 0);
1782 pmap_enter_vhpt(pte, va);
1786 * Enter on the PV list if part of our managed memory.
1788 if ((m->oflags & VPO_UNMANAGED) == 0) {
1789 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1790 ("pmap_enter: managed mapping within the clean submap"));
1791 pmap_insert_entry(pmap, va, m);
1796 * Increment counters
1798 pmap->pm_stats.resident_count++;
1800 pmap->pm_stats.wired_count++;
1805 * Now validate mapping with desired protection/wiring. This
1806 * adds the pte to the VHPT if necessary.
1808 pmap_pte_prot(pmap, pte, prot);
1809 pmap_pte_attr(pte, m->md.memattr);
1810 pmap_set_pte(pte, va, pa, wired, managed);
1812 /* Invalidate the I-cache when needed. */
1814 ia64_sync_icache(va, PAGE_SIZE);
1816 if ((prot & VM_PROT_WRITE) != 0 && managed)
1817 vm_page_aflag_set(m, PGA_WRITEABLE);
1818 rw_wunlock(&pvh_global_lock);
1819 pmap_switch(oldpmap);
1821 return (KERN_SUCCESS);
1825 * Maps a sequence of resident pages belonging to the same object.
1826 * The sequence begins with the given page m_start. This page is
1827 * mapped at the given virtual address start. Each subsequent page is
1828 * mapped at a virtual address that is offset from start by the same
1829 * amount as the page is offset from m_start within the object. The
1830 * last page in the sequence is the page with the largest offset from
1831 * m_start that can be mapped at a virtual address less than the given
1832 * virtual address end. Not every virtual page between start and end
1833 * is mapped; only those for which a resident page exists with the
1834 * corresponding offset from m_start are mapped.
1837 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1838 vm_page_t m_start, vm_prot_t prot)
1842 vm_pindex_t diff, psize;
1844 CTR6(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, m=%p, prot=%#x)",
1845 __func__, pmap, start, end, m_start, prot);
1847 VM_OBJECT_ASSERT_LOCKED(m_start->object);
1849 psize = atop(end - start);
1851 rw_wlock(&pvh_global_lock);
1853 oldpmap = pmap_switch(pmap);
1854 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1855 pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1856 m = TAILQ_NEXT(m, listq);
1858 rw_wunlock(&pvh_global_lock);
1859 pmap_switch(oldpmap);
1864 * this code makes some *MAJOR* assumptions:
1865 * 1. Current pmap & pmap exists.
1868 * 4. No page table pages.
1869 * but is *MUCH* faster than pmap_enter...
1872 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1876 CTR5(KTR_PMAP, "%s(pm=%p, va=%#lx, m=%p, prot=%#x)", __func__, pmap,
1879 rw_wlock(&pvh_global_lock);
1881 oldpmap = pmap_switch(pmap);
1882 pmap_enter_quick_locked(pmap, va, m, prot);
1883 rw_wunlock(&pvh_global_lock);
1884 pmap_switch(oldpmap);
1889 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1892 struct ia64_lpte *pte;
1895 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1896 (m->oflags & VPO_UNMANAGED) != 0,
1897 ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1898 rw_assert(&pvh_global_lock, RA_WLOCKED);
1899 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1901 if ((pte = pmap_find_pte(va)) == NULL)
1904 if (!pmap_present(pte)) {
1905 /* Enter on the PV list if the page is managed. */
1906 if ((m->oflags & VPO_UNMANAGED) == 0) {
1907 if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1908 pmap_free_pte(pte, va);
1915 /* Increment counters. */
1916 pmap->pm_stats.resident_count++;
1918 /* Initialise with R/O protection and enter into VHPT. */
1919 pmap_enter_vhpt(pte, va);
1920 pmap_pte_prot(pmap, pte,
1921 prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1922 pmap_pte_attr(pte, m->md.memattr);
1923 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1925 if (prot & VM_PROT_EXECUTE)
1926 ia64_sync_icache(va, PAGE_SIZE);
1931 * pmap_object_init_pt preloads the ptes for a given object
1932 * into the specified pmap. This eliminates the blast of soft
1933 * faults on process startup and immediately after an mmap.
1936 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
1937 vm_pindex_t pindex, vm_size_t size)
1940 CTR6(KTR_PMAP, "%s(pm=%p, va=%#lx, obj=%p, idx=%lu, sz=%#lx)",
1941 __func__, pmap, addr, object, pindex, size);
1943 VM_OBJECT_ASSERT_WLOCKED(object);
1944 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1945 ("pmap_object_init_pt: non-device object"));
1949 * Clear the wired attribute from the mappings for the specified range of
1950 * addresses in the given pmap. Every valid mapping within that range
1951 * must have the wired attribute set. In contrast, invalid mappings
1952 * cannot have the wired attribute set, so they are ignored.
1954 * The wired attribute of the page table entry is not a hardware feature,
1955 * so there is no need to invalidate any TLB entries.
1958 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1961 struct ia64_lpte *pte;
1963 CTR4(KTR_PMAP, "%s(%p, %#x, %#x)", __func__, pmap, sva, eva);
1966 oldpmap = pmap_switch(pmap);
1967 for (; sva < eva; sva += PAGE_SIZE) {
1968 pte = pmap_find_vhpt(sva);
1971 if (!pmap_wired(pte))
1972 panic("pmap_unwire: pte %p isn't wired", pte);
1973 pmap->pm_stats.wired_count--;
1974 pmap_clear_wired(pte);
1976 pmap_switch(oldpmap);
1981 * Copy the range specified by src_addr/len
1982 * from the source map to the range dst_addr/len
1983 * in the destination map.
1985 * This routine is only advisory and need not do anything.
1988 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_va, vm_size_t len,
1992 CTR6(KTR_PMAP, "%s(dpm=%p, spm=%p, dva=%#lx, sz=%#lx, sva=%#lx)",
1993 __func__, dst_pmap, src_pmap, dst_va, len, src_va);
1997 * pmap_zero_page zeros the specified hardware page by
1998 * mapping it into virtual memory and using bzero to clear
2002 pmap_zero_page(vm_page_t m)
2006 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2008 p = (void *)pmap_page_to_va(m);
2009 bzero(p, PAGE_SIZE);
2013 * pmap_zero_page_area zeros the specified hardware page by
2014 * mapping it into virtual memory and using bzero to clear
2017 * off and size must reside within a single page.
2020 pmap_zero_page_area(vm_page_t m, int off, int size)
2024 CTR4(KTR_PMAP, "%s(m=%p, ofs=%d, len=%d)", __func__, m, off, size);
2026 p = (void *)pmap_page_to_va(m);
2027 bzero(p + off, size);
2031 * pmap_zero_page_idle zeros the specified hardware page by
2032 * mapping it into virtual memory and using bzero to clear
2033 * its contents. This is for the vm_idlezero process.
2036 pmap_zero_page_idle(vm_page_t m)
2040 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2042 p = (void *)pmap_page_to_va(m);
2043 bzero(p, PAGE_SIZE);
2047 * pmap_copy_page copies the specified (machine independent)
2048 * page by mapping the page into virtual memory and using
2049 * bcopy to copy the page, one machine dependent page at a
2053 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2057 CTR3(KTR_PMAP, "%s(sm=%p, dm=%p)", __func__, msrc, mdst);
2059 src = (void *)pmap_page_to_va(msrc);
2060 dst = (void *)pmap_page_to_va(mdst);
2061 bcopy(src, dst, PAGE_SIZE);
2065 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2066 vm_offset_t b_offset, int xfersize)
2069 vm_offset_t a_pg_offset, b_pg_offset;
2072 CTR6(KTR_PMAP, "%s(m0=%p, va0=%#lx, m1=%p, va1=%#lx, sz=%#x)",
2073 __func__, ma, a_offset, mb, b_offset, xfersize);
2075 while (xfersize > 0) {
2076 a_pg_offset = a_offset & PAGE_MASK;
2077 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2078 a_cp = (char *)pmap_page_to_va(ma[a_offset >> PAGE_SHIFT]) +
2080 b_pg_offset = b_offset & PAGE_MASK;
2081 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2082 b_cp = (char *)pmap_page_to_va(mb[b_offset >> PAGE_SHIFT]) +
2084 bcopy(a_cp, b_cp, cnt);
2092 * Returns true if the pmap's pv is one of the first
2093 * 16 pvs linked to from this page. This count may
2094 * be changed upwards or downwards in the future; it
2095 * is only necessary that true be returned for a small
2096 * subset of pmaps for proper page aging.
2099 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2105 CTR3(KTR_PMAP, "%s(pm=%p, m=%p)", __func__, pmap, m);
2107 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2108 ("pmap_page_exists_quick: page %p is not managed", m));
2110 rw_wlock(&pvh_global_lock);
2111 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2112 if (PV_PMAP(pv) == pmap) {
2120 rw_wunlock(&pvh_global_lock);
2125 * pmap_page_wired_mappings:
2127 * Return the number of managed mappings to the given physical page
2131 pmap_page_wired_mappings(vm_page_t m)
2133 struct ia64_lpte *pte;
2134 pmap_t oldpmap, pmap;
2138 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2141 if ((m->oflags & VPO_UNMANAGED) != 0)
2143 rw_wlock(&pvh_global_lock);
2144 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2147 oldpmap = pmap_switch(pmap);
2148 pte = pmap_find_vhpt(pv->pv_va);
2149 KASSERT(pte != NULL, ("pte"));
2150 if (pmap_wired(pte))
2152 pmap_switch(oldpmap);
2155 rw_wunlock(&pvh_global_lock);
2160 * Remove all pages from specified address space
2161 * this aids process exit speeds. Also, this code
2162 * is special cased for current process only, but
2163 * can have the more generic (and slightly slower)
2164 * mode enabled. This is much faster than pmap_remove
2165 * in the case of running down an entire address space.
2168 pmap_remove_pages(pmap_t pmap)
2170 struct pv_chunk *pc, *npc;
2171 struct ia64_lpte *pte;
2176 u_long inuse, bitmask;
2177 int allfree, bit, field, idx;
2179 CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
2181 rw_wlock(&pvh_global_lock);
2183 oldpmap = pmap_switch(pmap);
2184 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2186 for (field = 0; field < _NPCM; field++) {
2187 inuse = ~pc->pc_map[field] & pc_freemask[field];
2188 while (inuse != 0) {
2189 bit = ffsl(inuse) - 1;
2190 bitmask = 1UL << bit;
2191 idx = field * sizeof(inuse) * NBBY + bit;
2192 pv = &pc->pc_pventry[idx];
2195 pte = pmap_find_vhpt(va);
2196 KASSERT(pte != NULL, ("pte"));
2197 if (pmap_wired(pte)) {
2201 pmap_remove_vhpt(va);
2202 pmap_invalidate_page(va);
2203 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2204 if (pmap_dirty(pte))
2206 pmap_free_pte(pte, va);
2208 PV_STAT(pv_entry_frees++);
2209 PV_STAT(pv_entry_spare++);
2211 pc->pc_map[field] |= bitmask;
2212 pmap->pm_stats.resident_count--;
2213 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2214 if (TAILQ_EMPTY(&m->md.pv_list))
2215 vm_page_aflag_clear(m, PGA_WRITEABLE);
2219 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2223 pmap_switch(oldpmap);
2225 rw_wunlock(&pvh_global_lock);
2229 * pmap_ts_referenced:
2231 * Return a count of reference bits for a page, clearing those bits.
2232 * It is not necessary for every reference bit to be cleared, but it
2233 * is necessary that 0 only be returned when there are truly no
2234 * reference bits set.
2236 * XXX: The exact number of bits to check and clear is a matter that
2237 * should be tested and standardized at some point in the future for
2238 * optimal aging of shared pages.
2241 pmap_ts_referenced(vm_page_t m)
2243 struct ia64_lpte *pte;
2244 pmap_t oldpmap, pmap;
2248 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2250 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2251 ("pmap_ts_referenced: page %p is not managed", m));
2252 rw_wlock(&pvh_global_lock);
2253 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2256 oldpmap = pmap_switch(pmap);
2257 pte = pmap_find_vhpt(pv->pv_va);
2258 KASSERT(pte != NULL, ("pte"));
2259 if (pmap_accessed(pte)) {
2261 pmap_clear_accessed(pte);
2262 pmap_invalidate_page(pv->pv_va);
2264 pmap_switch(oldpmap);
2267 rw_wunlock(&pvh_global_lock);
2274 * Return whether or not the specified physical page was modified
2275 * in any physical maps.
2278 pmap_is_modified(vm_page_t m)
2280 struct ia64_lpte *pte;
2281 pmap_t oldpmap, pmap;
2285 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2287 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2288 ("pmap_is_modified: page %p is not managed", m));
2292 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2293 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE
2294 * is clear, no PTEs can be dirty.
2296 VM_OBJECT_ASSERT_WLOCKED(m->object);
2297 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2299 rw_wlock(&pvh_global_lock);
2300 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2303 oldpmap = pmap_switch(pmap);
2304 pte = pmap_find_vhpt(pv->pv_va);
2305 pmap_switch(oldpmap);
2306 KASSERT(pte != NULL, ("pte"));
2307 rv = pmap_dirty(pte) ? TRUE : FALSE;
2312 rw_wunlock(&pvh_global_lock);
2317 * pmap_is_prefaultable:
2319 * Return whether or not the specified virtual address is elgible
2323 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2325 struct ia64_lpte *pte;
2327 CTR3(KTR_PMAP, "%s(pm=%p, va=%#lx)", __func__, pmap, addr);
2329 pte = pmap_find_vhpt(addr);
2330 if (pte != NULL && pmap_present(pte))
2336 * pmap_is_referenced:
2338 * Return whether or not the specified physical page was referenced
2339 * in any physical maps.
2342 pmap_is_referenced(vm_page_t m)
2344 struct ia64_lpte *pte;
2345 pmap_t oldpmap, pmap;
2349 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2351 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2352 ("pmap_is_referenced: page %p is not managed", m));
2354 rw_wlock(&pvh_global_lock);
2355 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2358 oldpmap = pmap_switch(pmap);
2359 pte = pmap_find_vhpt(pv->pv_va);
2360 pmap_switch(oldpmap);
2361 KASSERT(pte != NULL, ("pte"));
2362 rv = pmap_accessed(pte) ? TRUE : FALSE;
2367 rw_wunlock(&pvh_global_lock);
2372 * Apply the given advice to the specified range of addresses within the
2373 * given pmap. Depending on the advice, clear the referenced and/or
2374 * modified flags in each mapping and set the mapped page's dirty field.
2377 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2379 struct ia64_lpte *pte;
2383 CTR5(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, adv=%d)", __func__,
2384 pmap, sva, eva, advice);
2387 oldpmap = pmap_switch(pmap);
2388 for (; sva < eva; sva += PAGE_SIZE) {
2389 /* If page is invalid, skip this page. */
2390 pte = pmap_find_vhpt(sva);
2394 /* If it isn't managed, skip it too. */
2395 if (!pmap_managed(pte))
2398 /* Clear its modified and referenced bits. */
2399 if (pmap_dirty(pte)) {
2400 if (advice == MADV_DONTNEED) {
2402 * Future calls to pmap_is_modified() can be
2403 * avoided by making the page dirty now.
2405 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2408 pmap_clear_dirty(pte);
2409 } else if (!pmap_accessed(pte))
2411 pmap_clear_accessed(pte);
2412 pmap_invalidate_page(sva);
2414 pmap_switch(oldpmap);
2419 * Clear the modify bits on the specified physical page.
2422 pmap_clear_modify(vm_page_t m)
2424 struct ia64_lpte *pte;
2425 pmap_t oldpmap, pmap;
2428 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2430 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2431 ("pmap_clear_modify: page %p is not managed", m));
2432 VM_OBJECT_ASSERT_WLOCKED(m->object);
2433 KASSERT(!vm_page_xbusied(m),
2434 ("pmap_clear_modify: page %p is exclusive busied", m));
2437 * If the page is not PGA_WRITEABLE, then no PTEs can be modified.
2438 * If the object containing the page is locked and the page is not
2439 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2441 if ((m->aflags & PGA_WRITEABLE) == 0)
2443 rw_wlock(&pvh_global_lock);
2444 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2447 oldpmap = pmap_switch(pmap);
2448 pte = pmap_find_vhpt(pv->pv_va);
2449 KASSERT(pte != NULL, ("pte"));
2450 if (pmap_dirty(pte)) {
2451 pmap_clear_dirty(pte);
2452 pmap_invalidate_page(pv->pv_va);
2454 pmap_switch(oldpmap);
2457 rw_wunlock(&pvh_global_lock);
2461 * Clear the write and modified bits in each of the given page's mappings.
2464 pmap_remove_write(vm_page_t m)
2466 struct ia64_lpte *pte;
2467 pmap_t oldpmap, pmap;
2471 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2473 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2474 ("pmap_remove_write: page %p is not managed", m));
2477 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2478 * set by another thread while the object is locked. Thus,
2479 * if PGA_WRITEABLE is clear, no page table entries need updating.
2481 VM_OBJECT_ASSERT_WLOCKED(m->object);
2482 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2484 rw_wlock(&pvh_global_lock);
2485 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2488 oldpmap = pmap_switch(pmap);
2489 pte = pmap_find_vhpt(pv->pv_va);
2490 KASSERT(pte != NULL, ("pte"));
2491 prot = pmap_prot(pte);
2492 if ((prot & VM_PROT_WRITE) != 0) {
2493 if (pmap_dirty(pte)) {
2495 pmap_clear_dirty(pte);
2497 prot &= ~VM_PROT_WRITE;
2498 pmap_pte_prot(pmap, pte, prot);
2499 pmap_pte_attr(pte, m->md.memattr);
2500 pmap_invalidate_page(pv->pv_va);
2502 pmap_switch(oldpmap);
2505 vm_page_aflag_clear(m, PGA_WRITEABLE);
2506 rw_wunlock(&pvh_global_lock);
2510 pmap_mapdev_priv(vm_paddr_t pa, vm_size_t sz, vm_memattr_t attr)
2512 static vm_offset_t last_va = 0;
2513 static vm_paddr_t last_pa = ~0UL;
2514 static vm_size_t last_sz = 0;
2517 if (pa == last_pa && sz == last_sz)
2520 md = efi_md_find(pa);
2522 printf("%s: [%#lx..%#lx] not covered by memory descriptor\n",
2523 __func__, pa, pa + sz - 1);
2524 return (IA64_PHYS_TO_RR6(pa));
2527 if (md->md_type == EFI_MD_TYPE_FREE) {
2528 printf("%s: [%#lx..%#lx] is in DRAM\n", __func__, pa,
2533 last_va = (md->md_attr & EFI_MD_ATTR_WB) ? IA64_PHYS_TO_RR7(pa) :
2534 IA64_PHYS_TO_RR6(pa);
2541 * Map a set of physical memory pages into the kernel virtual
2542 * address space. Return a pointer to where it is mapped. This
2543 * routine is intended to be used for mapping device memory,
2547 pmap_mapdev_attr(vm_paddr_t pa, vm_size_t sz, vm_memattr_t attr)
2551 CTR4(KTR_PMAP, "%s(pa=%#lx, sz=%#lx, attr=%#x)", __func__, pa, sz,
2554 va = pmap_mapdev_priv(pa, sz, attr);
2555 return ((void *)(uintptr_t)va);
2559 * 'Unmap' a range mapped by pmap_mapdev_attr().
2562 pmap_unmapdev(vm_offset_t va, vm_size_t size)
2565 CTR3(KTR_PMAP, "%s(va=%#lx, sz=%#lx)", __func__, va, size);
2569 * Sets the memory attribute for the specified page.
2572 pmap_page_set_memattr_1(void *arg)
2574 struct ia64_pal_result res;
2576 uintptr_t pp = (uintptr_t)arg;
2578 is = intr_disable();
2579 res = ia64_call_pal_static(pp, 0, 0, 0);
2584 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2586 struct ia64_lpte *pte;
2587 pmap_t oldpmap, pmap;
2591 CTR3(KTR_PMAP, "%s(m=%p, attr=%#x)", __func__, m, ma);
2593 rw_wlock(&pvh_global_lock);
2595 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2598 oldpmap = pmap_switch(pmap);
2599 pte = pmap_find_vhpt(pv->pv_va);
2600 KASSERT(pte != NULL, ("pte"));
2601 pmap_pte_attr(pte, ma);
2602 pmap_invalidate_page(pv->pv_va);
2603 pmap_switch(oldpmap);
2606 rw_wunlock(&pvh_global_lock);
2608 if (ma == VM_MEMATTR_UNCACHEABLE) {
2610 smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2611 (void *)PAL_PREFETCH_VISIBILITY);
2613 pmap_page_set_memattr_1((void *)PAL_PREFETCH_VISIBILITY);
2615 va = (void *)pmap_page_to_va(m);
2617 cpu_flush_dcache(va, PAGE_SIZE);
2620 smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2621 (void *)PAL_MC_DRAIN);
2623 pmap_page_set_memattr_1((void *)PAL_MC_DRAIN);
2629 * perform the pmap work for mincore
2632 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2635 struct ia64_lpte *pte, tpte;
2639 CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, pa_p=%p)", __func__, pmap, addr,
2644 oldpmap = pmap_switch(pmap);
2645 pte = pmap_find_vhpt(addr);
2650 pmap_switch(oldpmap);
2651 if (pte == NULL || !pmap_present(pte)) {
2655 val = MINCORE_INCORE;
2656 if (pmap_dirty(pte))
2657 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2658 if (pmap_accessed(pte))
2659 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2660 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2661 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
2662 pmap_managed(pte)) {
2664 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2665 if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2669 PA_UNLOCK_COND(*locked_pa);
2678 pmap_activate(struct thread *td)
2681 CTR2(KTR_PMAP, "%s(td=%p)", __func__, td);
2683 pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2687 pmap_switch(pmap_t pm)
2693 prevpm = PCPU_GET(md.current_pmap);
2697 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2698 ia64_set_rr(IA64_RR_BASE(i),
2699 (i << 8)|(PAGE_SHIFT << 2)|1);
2702 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2703 ia64_set_rr(IA64_RR_BASE(i),
2704 (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2707 PCPU_SET(md.current_pmap, pm);
2719 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2722 struct ia64_lpte *pte;
2726 CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, sz=%#lx)", __func__, pm, va, sz);
2730 sz = (sz + 31) & ~31;
2733 oldpm = pmap_switch(pm);
2735 lim = round_page(va);
2736 len = MIN(lim - va, sz);
2737 pte = pmap_find_vhpt(va);
2738 if (pte != NULL && pmap_present(pte))
2739 ia64_sync_icache(va, len);
2748 * Increase the starting virtual address of the given mapping if a
2749 * different alignment might result in more superpage mappings.
2752 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2753 vm_offset_t *addr, vm_size_t size)
2756 CTR5(KTR_PMAP, "%s(obj=%p, ofs=%#lx, va_p=%p, sz=%#lx)", __func__,
2757 object, offset, addr, size);
2760 #include "opt_ddb.h"
2764 #include <ddb/ddb.h>
2766 static const char* psnames[] = {
2767 "1B", "2B", "4B", "8B",
2768 "16B", "32B", "64B", "128B",
2769 "256B", "512B", "1K", "2K",
2770 "4K", "8K", "16K", "32K",
2771 "64K", "128K", "256K", "512K",
2772 "1M", "2M", "4M", "8M",
2773 "16M", "32M", "64M", "128M",
2774 "256M", "512M", "1G", "2G"
2780 struct ia64_pal_result res;
2788 static const char *manames[] = {
2789 "WB", "bad", "bad", "bad",
2790 "UC", "UCE", "WC", "NaT",
2793 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2794 if (res.pal_status != 0) {
2795 db_printf("Can't get VM summary\n");
2800 maxtr = (res.pal_result[0] >> 40) & 0xff;
2802 maxtr = (res.pal_result[0] >> 32) & 0xff;
2804 db_printf("V RID Virtual Page Physical Page PgSz ED AR PL D A MA P KEY\n");
2805 for (i = 0; i <= maxtr; i++) {
2806 bzero(&buf, sizeof(buf));
2807 res = ia64_pal_physical(PAL_VM_TR_READ, i, type,
2808 ia64_tpa((uint64_t)&buf));
2809 if (!(res.pal_result[0] & 1))
2810 buf.pte &= ~PTE_AR_MASK;
2811 if (!(res.pal_result[0] & 2))
2812 buf.pte &= ~PTE_PL_MASK;
2813 if (!(res.pal_result[0] & 4))
2814 pmap_clear_dirty(&buf);
2815 if (!(res.pal_result[0] & 8))
2816 buf.pte &= ~PTE_MA_MASK;
2817 db_printf("%d %06x %013lx %013lx %4s %d %d %d %d %d %-3s "
2818 "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2819 buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2820 psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2821 (buf.pte & PTE_ED) ? 1 : 0,
2822 (int)(buf.pte & PTE_AR_MASK) >> 9,
2823 (int)(buf.pte & PTE_PL_MASK) >> 7,
2824 (pmap_dirty(&buf)) ? 1 : 0,
2825 (pmap_accessed(&buf)) ? 1 : 0,
2826 manames[(buf.pte & PTE_MA_MASK) >> 2],
2827 (pmap_present(&buf)) ? 1 : 0,
2828 (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2832 DB_COMMAND(itr, db_itr)
2837 DB_COMMAND(dtr, db_dtr)
2842 DB_COMMAND(rr, db_rr)
2848 printf("RR RID PgSz VE\n");
2849 for (i = 0; i < 8; i++) {
2850 __asm __volatile ("mov %0=rr[%1]"
2852 : "r"(IA64_RR_BASE(i)));
2853 *(uint64_t *) &rr = t;
2854 printf("%d %06x %4s %d\n",
2855 i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2859 DB_COMMAND(thash, db_thash)
2864 db_printf("%p\n", (void *) ia64_thash(addr));
2867 DB_COMMAND(ttag, db_ttag)
2872 db_printf("0x%lx\n", ia64_ttag(addr));
2875 DB_COMMAND(kpte, db_kpte)
2877 struct ia64_lpte *pte;
2880 db_printf("usage: kpte <kva>\n");
2883 if (addr < VM_INIT_KERNEL_ADDRESS) {
2884 db_printf("kpte: error: invalid <kva>\n");
2887 pte = pmap_find_kpte(addr);
2888 db_printf("kpte at %p:\n", pte);
2889 db_printf(" pte =%016lx\n", pte->pte);
2890 db_printf(" itir =%016lx\n", pte->itir);
2891 db_printf(" tag =%016lx\n", pte->tag);
2892 db_printf(" chain=%016lx\n", pte->chain);