2 * Copyright (c) 1991 Regents of the University of California.
4 * Copyright (c) 1994 John S. Dyson
6 * Copyright (c) 1994 David Greenman
8 * Copyright (c) 1998,2000 Doug Rabson
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 * must display the following acknowledgement:
25 * This product includes software developed by the University of
26 * California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
44 * from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 * with some ideas from NetBSD's alpha pmap
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
51 #include <sys/param.h>
52 #include <sys/kernel.h>
55 #include <sys/mutex.h>
58 #include <sys/sysctl.h>
59 #include <sys/systm.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_map.h>
64 #include <vm/vm_object.h>
65 #include <vm/vm_pageout.h>
68 #include <machine/md_var.h>
69 #include <machine/pal.h>
72 * Manages physical address maps.
74 * In addition to hardware address maps, this
75 * module is called upon to provide software-use-only
76 * maps which may or may not be stored in the same
77 * form as hardware maps. These pseudo-maps are
78 * used to store intermediate results from copy
79 * operations to and from address spaces.
81 * Since the information managed by this module is
82 * also stored by the logical address mapping module,
83 * this module may throw away valid virtual-to-physical
84 * mappings at almost any time. However, invalidations
85 * of virtual-to-physical mappings must be done as
88 * In order to cope with hardware architectures which
89 * make virtual-to-physical map invalidates expensive,
90 * this module may delay invalidate or reduced protection
91 * operations until such time as they are actually
92 * necessary. This module is given full information as
93 * to which processors are currently using which maps,
94 * and to when physical maps must be made correct.
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
106 * User virtually mapped
109 * Kernel virtually mapped
112 * Kernel physically mapped uncacheable
115 * Kernel physically mapped cacheable
118 /* XXX move to a header. */
119 extern uint64_t ia64_gateway_page[];
121 #ifndef PMAP_SHPGPERPROC
122 #define PMAP_SHPGPERPROC 200
125 #if !defined(DIAGNOSTIC)
126 #define PMAP_INLINE __inline
131 #define pmap_accessed(lpte) ((lpte)->pte & PTE_ACCESSED)
132 #define pmap_dirty(lpte) ((lpte)->pte & PTE_DIRTY)
133 #define pmap_exec(lpte) ((lpte)->pte & PTE_AR_RX)
134 #define pmap_managed(lpte) ((lpte)->pte & PTE_MANAGED)
135 #define pmap_ppn(lpte) ((lpte)->pte & PTE_PPN_MASK)
136 #define pmap_present(lpte) ((lpte)->pte & PTE_PRESENT)
137 #define pmap_prot(lpte) (((lpte)->pte & PTE_PROT_MASK) >> 56)
138 #define pmap_wired(lpte) ((lpte)->pte & PTE_WIRED)
140 #define pmap_clear_accessed(lpte) (lpte)->pte &= ~PTE_ACCESSED
141 #define pmap_clear_dirty(lpte) (lpte)->pte &= ~PTE_DIRTY
142 #define pmap_clear_present(lpte) (lpte)->pte &= ~PTE_PRESENT
143 #define pmap_clear_wired(lpte) (lpte)->pte &= ~PTE_WIRED
145 #define pmap_set_wired(lpte) (lpte)->pte |= PTE_WIRED
148 * The VHPT bucket head structure.
157 * Statically allocated kernel pmap
159 struct pmap kernel_pmap_store;
161 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
162 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
165 * Kernel virtual memory management.
168 struct ia64_lpte ***ia64_kptdir;
169 #define KPTE_DIR0_INDEX(va) \
170 (((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
171 #define KPTE_DIR1_INDEX(va) \
172 (((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
173 #define KPTE_PTE_INDEX(va) \
174 (((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
175 #define NKPTEPG (PAGE_SIZE / sizeof(struct ia64_lpte))
177 vm_offset_t kernel_vm_end;
179 /* Values for ptc.e. XXX values for SKI. */
180 static uint64_t pmap_ptc_e_base = 0x100000000;
181 static uint64_t pmap_ptc_e_count1 = 3;
182 static uint64_t pmap_ptc_e_count2 = 2;
183 static uint64_t pmap_ptc_e_stride1 = 0x2000;
184 static uint64_t pmap_ptc_e_stride2 = 0x100000000;
186 volatile u_long pmap_ptc_g_sem;
189 * Data for the RID allocator
191 static int pmap_ridcount;
192 static int pmap_rididx;
193 static int pmap_ridmapsz;
194 static int pmap_ridmax;
195 static uint64_t *pmap_ridmap;
196 struct mtx pmap_ridmutex;
199 * Data for the pv entry allocation mechanism
201 static uma_zone_t pvzone;
202 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
205 * Data for allocating PTEs for user processes.
207 static uma_zone_t ptezone;
210 * Virtual Hash Page Table (VHPT) data.
212 /* SYSCTL_DECL(_machdep); */
213 SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
215 struct ia64_bucket *pmap_vhpt_bucket;
217 int pmap_vhpt_nbuckets;
218 SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
219 &pmap_vhpt_nbuckets, 0, "");
221 int pmap_vhpt_log2size = 0;
222 TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
223 SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
224 &pmap_vhpt_log2size, 0, "");
226 static int pmap_vhpt_inserts;
227 SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
228 &pmap_vhpt_inserts, 0, "");
230 static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
231 SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
232 NULL, 0, pmap_vhpt_population, "I", "");
234 static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
236 static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
237 static pv_entry_t get_pv_entry(pmap_t locked_pmap);
239 static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
240 vm_page_t m, vm_prot_t prot);
241 static void pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
242 static void pmap_invalidate_all(void);
243 static int pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
244 vm_offset_t va, pv_entry_t pv, int freepte);
245 static int pmap_remove_vhpt(vm_offset_t va);
246 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
250 pmap_steal_memory(vm_size_t size)
255 size = round_page(size);
257 bank_size = phys_avail[1] - phys_avail[0];
258 while (size > bank_size) {
260 for (i = 0; phys_avail[i+2]; i+= 2) {
261 phys_avail[i] = phys_avail[i+2];
262 phys_avail[i+1] = phys_avail[i+3];
267 panic("pmap_steal_memory: out of memory");
268 bank_size = phys_avail[1] - phys_avail[0];
272 phys_avail[0] += size;
274 va = IA64_PHYS_TO_RR7(pa);
275 bzero((caddr_t) va, size);
280 pmap_initialize_vhpt(vm_offset_t vhpt)
282 struct ia64_lpte *pte;
285 pte = (struct ia64_lpte *)vhpt;
286 for (i = 0; i < pmap_vhpt_nbuckets; i++) {
289 pte[i].tag = 1UL << 63; /* Invalid tag */
290 pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
295 MALLOC_DECLARE(M_SMP);
298 pmap_alloc_vhpt(void)
303 size = 1UL << pmap_vhpt_log2size;
304 vhpt = (uintptr_t)contigmalloc(size, M_SMP, 0, 0UL, ~0UL, size, 0UL);
306 vhpt = IA64_PHYS_TO_RR7(ia64_tpa(vhpt));
307 pmap_initialize_vhpt(vhpt);
314 * Bootstrap the system enough to run with virtual memory.
319 struct ia64_pal_result res;
322 int i, j, count, ridbits;
325 * Query the PAL Code to find the loop parameters for the
328 res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
329 if (res.pal_status != 0)
330 panic("Can't configure ptc.e parameters");
331 pmap_ptc_e_base = res.pal_result[0];
332 pmap_ptc_e_count1 = res.pal_result[1] >> 32;
333 pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
334 pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
335 pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
337 printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
338 "stride1=0x%lx, stride2=0x%lx\n",
346 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
348 * We currently need at least 19 bits in the RID because PID_MAX
349 * can only be encoded in 17 bits and we need RIDs for 5 regions
350 * per process. With PID_MAX equalling 99999 this means that we
351 * need to be able to encode 499995 (=5*PID_MAX).
352 * The Itanium processor only has 18 bits and the architected
353 * minimum is exactly that. So, we cannot use a PID based scheme
354 * in those cases. Enter pmap_ridmap...
355 * We should avoid the map when running on a processor that has
356 * implemented enough bits. This means that we should pass the
357 * process/thread ID to pmap. This we currently don't do, so we
358 * use the map anyway. However, we don't want to allocate a map
359 * that is large enough to cover the range dictated by the number
360 * of bits in the RID, because that may result in a RID map of
361 * 2MB in size for a 24-bit RID. A 64KB map is enough.
362 * The bottomline: we create a 32KB map when the processor only
363 * implements 18 bits (or when we can't figure it out). Otherwise
364 * we create a 64KB map.
366 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
367 if (res.pal_status != 0) {
369 printf("Can't read VM Summary - assuming 18 Region ID bits\n");
370 ridbits = 18; /* guaranteed minimum */
372 ridbits = (res.pal_result[1] >> 8) & 0xff;
374 printf("Processor supports %d Region ID bits\n",
380 pmap_ridmax = (1 << ridbits);
381 pmap_ridmapsz = pmap_ridmax / 64;
382 pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
383 pmap_ridmap[0] |= 0xff;
386 mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
389 * Allocate some memory for initial kernel 'page tables'.
391 ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
393 kernel_vm_end = VM_MIN_KERNEL_ADDRESS - VM_GATEWAY_SIZE;
395 for (i = 0; phys_avail[i+2]; i+= 2)
399 TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
400 if (pmap_vhpt_log2size == 0)
401 pmap_vhpt_log2size = 20;
402 else if (pmap_vhpt_log2size < 15)
403 pmap_vhpt_log2size = 15;
404 else if (pmap_vhpt_log2size > 61)
405 pmap_vhpt_log2size = 61;
408 size = 1UL << pmap_vhpt_log2size;
409 for (i = 0; i < count; i += 2) {
410 base = (phys_avail[i] + size - 1) & ~(size - 1);
411 if (base + size <= phys_avail[i+1])
415 panic("Unable to allocate VHPT");
417 if (base != phys_avail[i]) {
418 /* Split this region. */
419 for (j = count; j > i; j -= 2) {
420 phys_avail[j] = phys_avail[j-2];
421 phys_avail[j+1] = phys_avail[j-2+1];
423 phys_avail[i+1] = base;
424 phys_avail[i+2] = base + size;
426 phys_avail[i] = base + size;
428 base = IA64_PHYS_TO_RR7(base);
429 PCPU_SET(md.vhpt, base);
431 printf("VHPT: address=%#lx, size=%#lx\n", base, size);
433 pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
434 pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
435 sizeof(struct ia64_bucket));
436 for (i = 0; i < pmap_vhpt_nbuckets; i++) {
437 /* Stolen memory is zeroed. */
438 mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
439 MTX_NOWITNESS | MTX_SPIN);
442 pmap_initialize_vhpt(base);
444 ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
447 virtual_avail = VM_MIN_KERNEL_ADDRESS;
448 virtual_end = VM_MAX_KERNEL_ADDRESS;
451 * Initialize the kernel pmap (which is statically allocated).
453 PMAP_LOCK_INIT(kernel_pmap);
454 for (i = 0; i < 5; i++)
455 kernel_pmap->pm_rid[i] = 0;
456 TAILQ_INIT(&kernel_pmap->pm_pvlist);
457 PCPU_SET(md.current_pmap, kernel_pmap);
460 * Region 5 is mapped via the vhpt.
462 ia64_set_rr(IA64_RR_BASE(5),
463 (5 << 8) | (PAGE_SHIFT << 2) | 1);
466 * Region 6 is direct mapped UC and region 7 is direct mapped
467 * WC. The details of this is controlled by the Alt {I,D}TLB
468 * handlers. Here we just make sure that they have the largest
469 * possible page size to minimise TLB usage.
471 ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2));
472 ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2));
476 * Clear out any random TLB entries left over from booting.
478 pmap_invalidate_all();
484 pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
489 for (i = 0; i < pmap_vhpt_nbuckets; i++)
490 count += pmap_vhpt_bucket[i].length;
492 error = SYSCTL_OUT(req, &count, sizeof(count));
497 * Initialize a vm_page's machine-dependent fields.
500 pmap_page_init(vm_page_t m)
503 TAILQ_INIT(&m->md.pv_list);
504 m->md.pv_list_count = 0;
508 * Initialize the pmap module.
509 * Called by vm_init, to initialize any structures that the pmap
510 * system needs to map virtual memory.
515 int shpgperproc = PMAP_SHPGPERPROC;
518 * Initialize the address space (zone) for the pv entries. Set a
519 * high water mark so that the system can recover from excessive
520 * numbers of pv entries.
522 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
523 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
524 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
525 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
526 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
527 pv_entry_high_water = 9 * (pv_entry_max / 10);
529 ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
530 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
534 /***************************************************
535 * Manipulate TLBs for a pmap
536 ***************************************************/
539 pmap_invalidate_page(vm_offset_t va)
541 struct ia64_lpte *pte;
548 vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
550 SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
551 pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
552 atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
555 /* PTC.G enter exclusive */
558 /* Atomically assert writer after all writers have gone. */
560 /* Wait until there's no more writer. */
562 sem = atomic_load_acq_long(&pmap_ptc_g_sem);
563 tag = sem | (1ul << 63);
564 } while (sem == tag);
565 } while (!atomic_cmpset_rel_long(&pmap_ptc_g_sem, sem, tag));
567 /* Wait until all readers are gone. */
570 sem = atomic_load_acq_long(&pmap_ptc_g_sem);
571 } while (sem != tag);
573 ia64_ptc_ga(va, PAGE_SHIFT << 2);
575 /* PTC.G leave exclusive */
576 atomic_store_rel_long(&pmap_ptc_g_sem, 0);
583 pmap_invalidate_all_1(void *arg)
589 addr = pmap_ptc_e_base;
590 for (i = 0; i < pmap_ptc_e_count1; i++) {
591 for (j = 0; j < pmap_ptc_e_count2; j++) {
593 addr += pmap_ptc_e_stride2;
595 addr += pmap_ptc_e_stride1;
601 pmap_invalidate_all(void)
606 smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
610 pmap_invalidate_all_1(NULL);
614 pmap_allocate_rid(void)
619 mtx_lock(&pmap_ridmutex);
620 if (pmap_ridcount == pmap_ridmax)
621 panic("pmap_allocate_rid: All Region IDs used");
623 /* Find an index with a free bit. */
624 while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
626 if (pmap_rididx == pmap_ridmapsz)
629 rid = pmap_rididx * 64;
631 /* Find a free bit. */
638 pmap_ridmap[pmap_rididx] |= bit;
640 mtx_unlock(&pmap_ridmutex);
646 pmap_free_rid(uint32_t rid)
652 bit = ~(1UL << (rid & 63));
654 mtx_lock(&pmap_ridmutex);
655 pmap_ridmap[idx] &= bit;
657 mtx_unlock(&pmap_ridmutex);
660 /***************************************************
661 * Page table page management routines.....
662 ***************************************************/
665 pmap_pinit0(struct pmap *pmap)
667 /* kernel_pmap is the same as any other pmap. */
672 * Initialize a preallocated and zeroed pmap structure,
673 * such as one in a vmspace structure.
676 pmap_pinit(struct pmap *pmap)
680 PMAP_LOCK_INIT(pmap);
681 for (i = 0; i < 5; i++)
682 pmap->pm_rid[i] = pmap_allocate_rid();
683 TAILQ_INIT(&pmap->pm_pvlist);
684 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
688 /***************************************************
689 * Pmap allocation/deallocation routines.
690 ***************************************************/
693 * Release any resources held by the given physical map.
694 * Called when a pmap initialized by pmap_pinit is being released.
695 * Should only be called if the map contains no valid mappings.
698 pmap_release(pmap_t pmap)
702 for (i = 0; i < 5; i++)
704 pmap_free_rid(pmap->pm_rid[i]);
705 PMAP_LOCK_DESTROY(pmap);
709 * grow the number of kernel page table entries, if needed
712 pmap_growkernel(vm_offset_t addr)
714 struct ia64_lpte **dir1;
715 struct ia64_lpte *leaf;
718 while (kernel_vm_end <= addr) {
719 if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
720 panic("%s: out of kernel address space", __func__);
722 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
724 nkpg = vm_page_alloc(NULL, nkpt++,
725 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
727 panic("%s: cannot add dir. page", __func__);
729 dir1 = (struct ia64_lpte **)
730 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
731 bzero(dir1, PAGE_SIZE);
732 ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
735 nkpg = vm_page_alloc(NULL, nkpt++,
736 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
738 panic("%s: cannot add PTE page", __func__);
740 leaf = (struct ia64_lpte *)
741 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
742 bzero(leaf, PAGE_SIZE);
743 dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
745 kernel_vm_end += PAGE_SIZE * NKPTEPG;
749 /***************************************************
750 * page management routines.
751 ***************************************************/
754 * free the pv_entry back to the free list
756 static PMAP_INLINE void
757 free_pv_entry(pv_entry_t pv)
760 uma_zfree(pvzone, pv);
764 * get a new pv_entry, allocating a block from the system
768 get_pv_entry(pmap_t locked_pmap)
770 static const struct timeval printinterval = { 60, 0 };
771 static struct timeval lastprint;
772 struct vpgqueues *vpq;
773 struct ia64_lpte *pte;
774 pmap_t oldpmap, pmap;
775 pv_entry_t allocated_pv, next_pv, pv;
779 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
780 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
781 allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
782 if (allocated_pv != NULL) {
784 if (pv_entry_count > pv_entry_high_water)
787 return (allocated_pv);
791 * Reclaim pv entries: At first, destroy mappings to inactive
792 * pages. After that, if a pv entry is still needed, destroy
793 * mappings to active pages.
795 if (ratecheck(&lastprint, &printinterval))
796 printf("Approaching the limit on PV entries, "
797 "increase the vm.pmap.shpgperproc tunable.\n");
798 vpq = &vm_page_queues[PQ_INACTIVE];
800 TAILQ_FOREACH(m, &vpq->pl, pageq) {
801 if (m->hold_count || m->busy)
803 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
806 /* Avoid deadlock and lock recursion. */
807 if (pmap > locked_pmap)
809 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
811 pmap->pm_stats.resident_count--;
812 oldpmap = pmap_switch(pmap);
813 pte = pmap_find_vhpt(va);
814 KASSERT(pte != NULL, ("pte"));
815 pmap_remove_vhpt(va);
816 pmap_invalidate_page(va);
817 pmap_switch(oldpmap);
818 if (pmap_accessed(pte))
819 vm_page_flag_set(m, PG_REFERENCED);
822 pmap_free_pte(pte, va);
823 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
824 m->md.pv_list_count--;
825 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
826 if (pmap != locked_pmap)
828 if (allocated_pv == NULL)
833 if (TAILQ_EMPTY(&m->md.pv_list))
834 vm_page_flag_clear(m, PG_WRITEABLE);
836 if (allocated_pv == NULL) {
837 if (vpq == &vm_page_queues[PQ_INACTIVE]) {
838 vpq = &vm_page_queues[PQ_ACTIVE];
841 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
843 return (allocated_pv);
847 * Conditionally create a pv entry.
850 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
854 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
855 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
856 if (pv_entry_count < pv_entry_high_water &&
857 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
861 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
862 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
863 m->md.pv_list_count++;
870 * Add an ia64_lpte to the VHPT.
873 pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
875 struct ia64_bucket *bckt;
876 struct ia64_lpte *vhpte;
879 /* Can fault, so get it out of the way. */
880 pte_pa = ia64_tpa((vm_offset_t)pte);
882 vhpte = (struct ia64_lpte *)ia64_thash(va);
883 bckt = (struct ia64_bucket *)vhpte->chain;
885 mtx_lock_spin(&bckt->mutex);
886 pte->chain = bckt->chain;
888 bckt->chain = pte_pa;
892 mtx_unlock_spin(&bckt->mutex);
896 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
897 * worked or an appropriate error code otherwise.
900 pmap_remove_vhpt(vm_offset_t va)
902 struct ia64_bucket *bckt;
903 struct ia64_lpte *pte;
904 struct ia64_lpte *lpte;
905 struct ia64_lpte *vhpte;
909 vhpte = (struct ia64_lpte *)ia64_thash(va);
910 bckt = (struct ia64_bucket *)vhpte->chain;
913 mtx_lock_spin(&bckt->mutex);
915 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
916 while (chain != 0 && pte->tag != tag) {
919 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
922 mtx_unlock_spin(&bckt->mutex);
926 /* Snip this pv_entry out of the collision chain. */
928 bckt->chain = pte->chain;
930 lpte->chain = pte->chain;
934 mtx_unlock_spin(&bckt->mutex);
939 * Find the ia64_lpte for the given va, if any.
941 static struct ia64_lpte *
942 pmap_find_vhpt(vm_offset_t va)
944 struct ia64_bucket *bckt;
945 struct ia64_lpte *pte;
949 pte = (struct ia64_lpte *)ia64_thash(va);
950 bckt = (struct ia64_bucket *)pte->chain;
952 mtx_lock_spin(&bckt->mutex);
954 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
955 while (chain != 0 && pte->tag != tag) {
957 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
959 mtx_unlock_spin(&bckt->mutex);
960 return ((chain != 0) ? pte : NULL);
964 * Remove an entry from the list of managed mappings.
967 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
970 if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
971 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
972 if (pmap == pv->pv_pmap && va == pv->pv_va)
976 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
984 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
985 m->md.pv_list_count--;
986 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
987 vm_page_flag_clear(m, PG_WRITEABLE);
989 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
998 * Create a pv entry for page at pa for
1002 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1006 pv = get_pv_entry(pmap);
1010 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1011 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1012 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1013 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1014 m->md.pv_list_count++;
1018 * Routine: pmap_extract
1020 * Extract the physical page address associated
1021 * with the given map/virtual_address pair.
1024 pmap_extract(pmap_t pmap, vm_offset_t va)
1026 struct ia64_lpte *pte;
1032 oldpmap = pmap_switch(pmap);
1033 pte = pmap_find_vhpt(va);
1034 if (pte != NULL && pmap_present(pte))
1036 pmap_switch(oldpmap);
1042 * Routine: pmap_extract_and_hold
1044 * Atomically extract and hold the physical page
1045 * with the given pmap and virtual address pair
1046 * if that mapping permits the given protection.
1049 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1051 struct ia64_lpte *pte;
1056 vm_page_lock_queues();
1058 oldpmap = pmap_switch(pmap);
1059 pte = pmap_find_vhpt(va);
1060 if (pte != NULL && pmap_present(pte) &&
1061 (pmap_prot(pte) & prot) == prot) {
1062 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1065 vm_page_unlock_queues();
1066 pmap_switch(oldpmap);
1071 /***************************************************
1072 * Low level mapping routines.....
1073 ***************************************************/
1076 * Find the kernel lpte for mapping the given virtual address, which
1077 * must be in the part of region 5 which we can cover with our kernel
1080 static struct ia64_lpte *
1081 pmap_find_kpte(vm_offset_t va)
1083 struct ia64_lpte **dir1;
1084 struct ia64_lpte *leaf;
1086 KASSERT((va >> 61) == 5,
1087 ("kernel mapping 0x%lx not in region 5", va));
1088 KASSERT(va < kernel_vm_end,
1089 ("kernel mapping 0x%lx out of range", va));
1091 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1092 leaf = dir1[KPTE_DIR1_INDEX(va)];
1093 return (&leaf[KPTE_PTE_INDEX(va)]);
1097 * Find a pte suitable for mapping a user-space address. If one exists
1098 * in the VHPT, that one will be returned, otherwise a new pte is
1101 static struct ia64_lpte *
1102 pmap_find_pte(vm_offset_t va)
1104 struct ia64_lpte *pte;
1106 if (va >= VM_MAXUSER_ADDRESS)
1107 return pmap_find_kpte(va);
1109 pte = pmap_find_vhpt(va);
1111 pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1112 pte->tag = 1UL << 63;
1118 * Free a pte which is now unused. This simply returns it to the zone
1119 * allocator if it is a user mapping. For kernel mappings, clear the
1120 * valid bit to make it clear that the mapping is not currently used.
1123 pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1125 if (va < VM_MAXUSER_ADDRESS)
1126 uma_zfree(ptezone, pte);
1128 pmap_clear_present(pte);
1131 static PMAP_INLINE void
1132 pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1134 static long prot2ar[4] = {
1135 PTE_AR_R, /* VM_PROT_NONE */
1136 PTE_AR_RW, /* VM_PROT_WRITE */
1137 PTE_AR_RX|PTE_ED, /* VM_PROT_EXECUTE */
1138 PTE_AR_RWX|PTE_ED /* VM_PROT_WRITE|VM_PROT_EXECUTE */
1141 pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1142 pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1143 pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1144 ? PTE_PL_KERN : PTE_PL_USER;
1145 pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1149 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1150 * the pte was orginally valid, then its assumed to already be in the
1152 * This functions does not set the protection bits. It's expected
1153 * that those have been set correctly prior to calling this function.
1156 pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1157 boolean_t wired, boolean_t managed)
1160 pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED;
1161 pte->pte |= PTE_PRESENT | PTE_MA_WB;
1162 pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1163 pte->pte |= (wired) ? PTE_WIRED : 0;
1164 pte->pte |= pa & PTE_PPN_MASK;
1166 pte->itir = PAGE_SHIFT << 2;
1168 pte->tag = ia64_ttag(va);
1172 * Remove the (possibly managed) mapping represented by pte from the
1176 pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1177 pv_entry_t pv, int freepte)
1183 * First remove from the VHPT.
1185 error = pmap_remove_vhpt(va);
1189 pmap_invalidate_page(va);
1191 if (pmap_wired(pte))
1192 pmap->pm_stats.wired_count -= 1;
1194 pmap->pm_stats.resident_count -= 1;
1195 if (pmap_managed(pte)) {
1196 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1197 if (pmap_dirty(pte))
1199 if (pmap_accessed(pte))
1200 vm_page_flag_set(m, PG_REFERENCED);
1202 error = pmap_remove_entry(pmap, m, va, pv);
1205 pmap_free_pte(pte, va);
1211 * Extract the physical page address associated with a kernel
1215 pmap_kextract(vm_offset_t va)
1217 struct ia64_lpte *pte;
1220 KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1222 /* Regions 6 and 7 are direct mapped. */
1223 if (va >= IA64_RR_BASE(6))
1224 return (IA64_RR_MASK(va));
1226 /* EPC gateway page? */
1227 gwpage = (vm_offset_t)ia64_get_k5();
1228 if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1229 return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1231 /* Bail out if the virtual address is beyond our limits. */
1232 if (va >= kernel_vm_end)
1235 pte = pmap_find_kpte(va);
1236 if (!pmap_present(pte))
1238 return (pmap_ppn(pte) | (va & PAGE_MASK));
1242 * Add a list of wired pages to the kva this routine is only used for
1243 * temporary kernel mappings that do not need to have page modification
1244 * or references recorded. Note that old mappings are simply written
1245 * over. The page is effectively wired, but it's customary to not have
1246 * the PTE reflect that, nor update statistics.
1249 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1251 struct ia64_lpte *pte;
1254 for (i = 0; i < count; i++) {
1255 pte = pmap_find_kpte(va);
1256 if (pmap_present(pte))
1257 pmap_invalidate_page(va);
1259 pmap_enter_vhpt(pte, va);
1260 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1261 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1267 * this routine jerks page mappings from the
1268 * kernel -- it is meant only for temporary mappings.
1271 pmap_qremove(vm_offset_t va, int count)
1273 struct ia64_lpte *pte;
1276 for (i = 0; i < count; i++) {
1277 pte = pmap_find_kpte(va);
1278 if (pmap_present(pte)) {
1279 pmap_remove_vhpt(va);
1280 pmap_invalidate_page(va);
1281 pmap_clear_present(pte);
1288 * Add a wired page to the kva. As for pmap_qenter(), it's customary
1289 * to not have the PTE reflect that, nor update statistics.
1292 pmap_kenter(vm_offset_t va, vm_offset_t pa)
1294 struct ia64_lpte *pte;
1296 pte = pmap_find_kpte(va);
1297 if (pmap_present(pte))
1298 pmap_invalidate_page(va);
1300 pmap_enter_vhpt(pte, va);
1301 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1302 pmap_set_pte(pte, va, pa, FALSE, FALSE);
1306 * Remove a page from the kva
1309 pmap_kremove(vm_offset_t va)
1311 struct ia64_lpte *pte;
1313 pte = pmap_find_kpte(va);
1314 if (pmap_present(pte)) {
1315 pmap_remove_vhpt(va);
1316 pmap_invalidate_page(va);
1317 pmap_clear_present(pte);
1322 * Used to map a range of physical addresses into kernel
1323 * virtual address space.
1325 * The value passed in '*virt' is a suggested virtual address for
1326 * the mapping. Architectures which can support a direct-mapped
1327 * physical to virtual region can return the appropriate address
1328 * within that region, leaving '*virt' unchanged. Other
1329 * architectures should map the pages starting at '*virt' and
1330 * update '*virt' with the first usable address after the mapped
1334 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1336 return IA64_PHYS_TO_RR7(start);
1340 * Remove the given range of addresses from the specified map.
1342 * It is assumed that the start and end are properly
1343 * rounded to the page size.
1346 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1351 struct ia64_lpte *pte;
1353 if (pmap->pm_stats.resident_count == 0)
1356 vm_page_lock_queues();
1358 oldpmap = pmap_switch(pmap);
1361 * special handling of removing one page. a very
1362 * common operation and easy to short circuit some
1365 if (sva + PAGE_SIZE == eva) {
1366 pte = pmap_find_vhpt(sva);
1368 pmap_remove_pte(pmap, pte, sva, 0, 1);
1372 if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1373 TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1375 if (va >= sva && va < eva) {
1376 pte = pmap_find_vhpt(va);
1377 KASSERT(pte != NULL, ("pte"));
1378 pmap_remove_pte(pmap, pte, va, pv, 1);
1382 for (va = sva; va < eva; va += PAGE_SIZE) {
1383 pte = pmap_find_vhpt(va);
1385 pmap_remove_pte(pmap, pte, va, 0, 1);
1390 vm_page_unlock_queues();
1391 pmap_switch(oldpmap);
1396 * Routine: pmap_remove_all
1398 * Removes this physical page from
1399 * all physical maps in which it resides.
1400 * Reflects back modify bits to the pager.
1403 * Original versions of this routine were very
1404 * inefficient because they iteratively called
1405 * pmap_remove (slow...)
1409 pmap_remove_all(vm_page_t m)
1414 #if defined(DIAGNOSTIC)
1416 * XXX This makes pmap_remove_all() illegal for non-managed pages!
1418 if (m->flags & PG_FICTITIOUS) {
1419 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1422 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1423 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1424 struct ia64_lpte *pte;
1425 pmap_t pmap = pv->pv_pmap;
1426 vm_offset_t va = pv->pv_va;
1429 oldpmap = pmap_switch(pmap);
1430 pte = pmap_find_vhpt(va);
1431 KASSERT(pte != NULL, ("pte"));
1432 if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1433 panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1434 pmap_remove_pte(pmap, pte, va, pv, 1);
1435 pmap_switch(oldpmap);
1438 vm_page_flag_clear(m, PG_WRITEABLE);
1442 * Set the physical protection on the
1443 * specified range of this map as requested.
1446 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1449 struct ia64_lpte *pte;
1451 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1452 pmap_remove(pmap, sva, eva);
1456 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1457 (VM_PROT_WRITE|VM_PROT_EXECUTE))
1460 if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1461 panic("pmap_protect: unaligned addresses");
1463 vm_page_lock_queues();
1465 oldpmap = pmap_switch(pmap);
1466 for ( ; sva < eva; sva += PAGE_SIZE) {
1467 /* If page is invalid, skip this page */
1468 pte = pmap_find_vhpt(sva);
1472 /* If there's no change, skip it too */
1473 if (pmap_prot(pte) == prot)
1476 if (pmap_managed(pte)) {
1477 vm_offset_t pa = pmap_ppn(pte);
1478 vm_page_t m = PHYS_TO_VM_PAGE(pa);
1480 if (pmap_dirty(pte)) {
1482 pmap_clear_dirty(pte);
1485 if (pmap_accessed(pte)) {
1486 vm_page_flag_set(m, PG_REFERENCED);
1487 pmap_clear_accessed(pte);
1491 if (prot & VM_PROT_EXECUTE)
1492 ia64_sync_icache(sva, PAGE_SIZE);
1494 pmap_pte_prot(pmap, pte, prot);
1495 pmap_invalidate_page(sva);
1497 vm_page_unlock_queues();
1498 pmap_switch(oldpmap);
1503 * Insert the given physical page (p) at
1504 * the specified virtual address (v) in the
1505 * target physical map with the protection requested.
1507 * If specified, the page will be wired down, meaning
1508 * that the related pte can not be reclaimed.
1510 * NB: This is the only routine which MAY NOT lazy-evaluate
1511 * or lose information. That is, this routine must actually
1512 * insert this page into the given map NOW.
1515 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1516 vm_prot_t prot, boolean_t wired)
1521 struct ia64_lpte origpte;
1522 struct ia64_lpte *pte;
1523 boolean_t icache_inval, managed;
1525 vm_page_lock_queues();
1527 oldpmap = pmap_switch(pmap);
1531 if (va > VM_MAX_KERNEL_ADDRESS)
1532 panic("pmap_enter: toobig");
1536 * Find (or create) a pte for the given mapping.
1538 while ((pte = pmap_find_pte(va)) == NULL) {
1539 pmap_switch(oldpmap);
1541 vm_page_unlock_queues();
1543 vm_page_lock_queues();
1545 oldpmap = pmap_switch(pmap);
1548 if (!pmap_present(pte)) {
1550 pmap_enter_vhpt(pte, va);
1552 opa = pmap_ppn(pte);
1554 pa = VM_PAGE_TO_PHYS(m);
1556 icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1559 * Mapping has not changed, must be protection or wiring change.
1563 * Wiring change, just update stats. We don't worry about
1564 * wiring PT pages as they remain resident as long as there
1565 * are valid mappings in them. Hence, if a user page is wired,
1566 * the PT page will be also.
1568 if (wired && !pmap_wired(&origpte))
1569 pmap->pm_stats.wired_count++;
1570 else if (!wired && pmap_wired(&origpte))
1571 pmap->pm_stats.wired_count--;
1573 managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1576 * We might be turning off write access to the page,
1577 * so we go ahead and sense modify status. Otherwise,
1578 * we can avoid I-cache invalidation if the page
1579 * already allowed execution.
1581 if (managed && pmap_dirty(&origpte))
1583 else if (pmap_exec(&origpte))
1584 icache_inval = FALSE;
1586 pmap_invalidate_page(va);
1591 * Mapping has changed, invalidate old range and fall
1592 * through to handle validating new mapping.
1595 pmap_remove_pte(pmap, pte, va, 0, 0);
1596 pmap_enter_vhpt(pte, va);
1600 * Enter on the PV list if part of our managed memory.
1602 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1603 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1604 ("pmap_enter: managed mapping within the clean submap"));
1605 pmap_insert_entry(pmap, va, m);
1610 * Increment counters
1612 pmap->pm_stats.resident_count++;
1614 pmap->pm_stats.wired_count++;
1619 * Now validate mapping with desired protection/wiring. This
1620 * adds the pte to the VHPT if necessary.
1622 pmap_pte_prot(pmap, pte, prot);
1623 pmap_set_pte(pte, va, pa, wired, managed);
1625 /* Invalidate the I-cache when needed. */
1627 ia64_sync_icache(va, PAGE_SIZE);
1629 if ((prot & VM_PROT_WRITE) != 0)
1630 vm_page_flag_set(m, PG_WRITEABLE);
1631 vm_page_unlock_queues();
1632 pmap_switch(oldpmap);
1637 * Maps a sequence of resident pages belonging to the same object.
1638 * The sequence begins with the given page m_start. This page is
1639 * mapped at the given virtual address start. Each subsequent page is
1640 * mapped at a virtual address that is offset from start by the same
1641 * amount as the page is offset from m_start within the object. The
1642 * last page in the sequence is the page with the largest offset from
1643 * m_start that can be mapped at a virtual address less than the given
1644 * virtual address end. Not every virtual page between start and end
1645 * is mapped; only those for which a resident page exists with the
1646 * corresponding offset from m_start are mapped.
1649 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1650 vm_page_t m_start, vm_prot_t prot)
1654 vm_pindex_t diff, psize;
1656 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1657 psize = atop(end - start);
1660 oldpmap = pmap_switch(pmap);
1661 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1662 pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1663 m = TAILQ_NEXT(m, listq);
1665 pmap_switch(oldpmap);
1670 * this code makes some *MAJOR* assumptions:
1671 * 1. Current pmap & pmap exists.
1674 * 4. No page table pages.
1675 * but is *MUCH* faster than pmap_enter...
1679 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1684 oldpmap = pmap_switch(pmap);
1685 pmap_enter_quick_locked(pmap, va, m, prot);
1686 pmap_switch(oldpmap);
1691 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1694 struct ia64_lpte *pte;
1697 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1698 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1699 ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1700 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1701 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1703 if ((pte = pmap_find_pte(va)) == NULL)
1706 if (!pmap_present(pte)) {
1707 /* Enter on the PV list if the page is managed. */
1708 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1709 if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1710 pmap_free_pte(pte, va);
1717 /* Increment counters. */
1718 pmap->pm_stats.resident_count++;
1720 /* Initialise with R/O protection and enter into VHPT. */
1721 pmap_enter_vhpt(pte, va);
1722 pmap_pte_prot(pmap, pte,
1723 prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1724 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1726 if (prot & VM_PROT_EXECUTE)
1727 ia64_sync_icache(va, PAGE_SIZE);
1732 * pmap_object_init_pt preloads the ptes for a given object
1733 * into the specified pmap. This eliminates the blast of soft
1734 * faults on process startup and immediately after an mmap.
1737 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1738 vm_object_t object, vm_pindex_t pindex,
1742 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1743 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1744 ("pmap_object_init_pt: non-device object"));
1748 * Routine: pmap_change_wiring
1749 * Function: Change the wiring attribute for a map/virtual-address
1751 * In/out conditions:
1752 * The mapping must already exist in the pmap.
1755 pmap_change_wiring(pmap, va, wired)
1756 register pmap_t pmap;
1761 struct ia64_lpte *pte;
1764 oldpmap = pmap_switch(pmap);
1766 pte = pmap_find_vhpt(va);
1767 KASSERT(pte != NULL, ("pte"));
1768 if (wired && !pmap_wired(pte)) {
1769 pmap->pm_stats.wired_count++;
1770 pmap_set_wired(pte);
1771 } else if (!wired && pmap_wired(pte)) {
1772 pmap->pm_stats.wired_count--;
1773 pmap_clear_wired(pte);
1776 pmap_switch(oldpmap);
1783 * Copy the range specified by src_addr/len
1784 * from the source map to the range dst_addr/len
1785 * in the destination map.
1787 * This routine is only advisory and need not do anything.
1791 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1792 vm_offset_t src_addr)
1798 * pmap_zero_page zeros the specified hardware page by
1799 * mapping it into virtual memory and using bzero to clear
1804 pmap_zero_page(vm_page_t m)
1806 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1807 bzero((caddr_t) va, PAGE_SIZE);
1812 * pmap_zero_page_area zeros the specified hardware page by
1813 * mapping it into virtual memory and using bzero to clear
1816 * off and size must reside within a single page.
1820 pmap_zero_page_area(vm_page_t m, int off, int size)
1822 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1823 bzero((char *)(caddr_t)va + off, size);
1828 * pmap_zero_page_idle zeros the specified hardware page by
1829 * mapping it into virtual memory and using bzero to clear
1830 * its contents. This is for the vm_idlezero process.
1834 pmap_zero_page_idle(vm_page_t m)
1836 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1837 bzero((caddr_t) va, PAGE_SIZE);
1842 * pmap_copy_page copies the specified (machine independent)
1843 * page by mapping the page into virtual memory and using
1844 * bcopy to copy the page, one machine dependent page at a
1848 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1850 vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1851 vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1852 bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1856 * Returns true if the pmap's pv is one of the first
1857 * 16 pvs linked to from this page. This count may
1858 * be changed upwards or downwards in the future; it
1859 * is only necessary that true be returned for a small
1860 * subset of pmaps for proper page aging.
1863 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1868 if (m->flags & PG_FICTITIOUS)
1872 * Not found, check current mappings returning immediately if found.
1874 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1875 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1876 if (pv->pv_pmap == pmap) {
1887 * pmap_page_wired_mappings:
1889 * Return the number of managed mappings to the given physical page
1893 pmap_page_wired_mappings(vm_page_t m)
1895 struct ia64_lpte *pte;
1896 pmap_t oldpmap, pmap;
1901 if ((m->flags & PG_FICTITIOUS) != 0)
1903 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1904 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1907 oldpmap = pmap_switch(pmap);
1908 pte = pmap_find_vhpt(pv->pv_va);
1909 KASSERT(pte != NULL, ("pte"));
1910 if (pmap_wired(pte))
1912 pmap_switch(oldpmap);
1919 * Remove all pages from specified address space
1920 * this aids process exit speeds. Also, this code
1921 * is special cased for current process only, but
1922 * can have the more generic (and slightly slower)
1923 * mode enabled. This is much faster than pmap_remove
1924 * in the case of running down an entire address space.
1927 pmap_remove_pages(pmap_t pmap)
1932 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1933 printf("warning: %s called with non-current pmap\n",
1938 vm_page_lock_queues();
1940 oldpmap = pmap_switch(pmap);
1942 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1943 struct ia64_lpte *pte;
1945 npv = TAILQ_NEXT(pv, pv_plist);
1947 pte = pmap_find_vhpt(pv->pv_va);
1948 KASSERT(pte != NULL, ("pte"));
1949 if (!pmap_wired(pte))
1950 pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1953 pmap_switch(oldpmap);
1955 vm_page_unlock_queues();
1959 * pmap_ts_referenced:
1961 * Return a count of reference bits for a page, clearing those bits.
1962 * It is not necessary for every reference bit to be cleared, but it
1963 * is necessary that 0 only be returned when there are truly no
1964 * reference bits set.
1966 * XXX: The exact number of bits to check and clear is a matter that
1967 * should be tested and standardized at some point in the future for
1968 * optimal aging of shared pages.
1971 pmap_ts_referenced(vm_page_t m)
1973 struct ia64_lpte *pte;
1978 if (m->flags & PG_FICTITIOUS)
1981 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1982 PMAP_LOCK(pv->pv_pmap);
1983 oldpmap = pmap_switch(pv->pv_pmap);
1984 pte = pmap_find_vhpt(pv->pv_va);
1985 KASSERT(pte != NULL, ("pte"));
1986 if (pmap_accessed(pte)) {
1988 pmap_clear_accessed(pte);
1989 pmap_invalidate_page(pv->pv_va);
1991 pmap_switch(oldpmap);
1992 PMAP_UNLOCK(pv->pv_pmap);
2001 * Return whether or not the specified physical page was modified
2002 * in any physical maps.
2005 pmap_is_modified(vm_page_t m)
2007 struct ia64_lpte *pte;
2013 if (m->flags & PG_FICTITIOUS)
2016 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2017 PMAP_LOCK(pv->pv_pmap);
2018 oldpmap = pmap_switch(pv->pv_pmap);
2019 pte = pmap_find_vhpt(pv->pv_va);
2020 pmap_switch(oldpmap);
2021 KASSERT(pte != NULL, ("pte"));
2022 rv = pmap_dirty(pte) ? TRUE : FALSE;
2023 PMAP_UNLOCK(pv->pv_pmap);
2032 * pmap_is_prefaultable:
2034 * Return whether or not the specified virtual address is elgible
2038 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2040 struct ia64_lpte *pte;
2042 pte = pmap_find_vhpt(addr);
2043 if (pte != NULL && pmap_present(pte))
2049 * Clear the modify bits on the specified physical page.
2052 pmap_clear_modify(vm_page_t m)
2054 struct ia64_lpte *pte;
2058 if (m->flags & PG_FICTITIOUS)
2061 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2062 PMAP_LOCK(pv->pv_pmap);
2063 oldpmap = pmap_switch(pv->pv_pmap);
2064 pte = pmap_find_vhpt(pv->pv_va);
2065 KASSERT(pte != NULL, ("pte"));
2066 if (pmap_dirty(pte)) {
2067 pmap_clear_dirty(pte);
2068 pmap_invalidate_page(pv->pv_va);
2070 pmap_switch(oldpmap);
2071 PMAP_UNLOCK(pv->pv_pmap);
2076 * pmap_clear_reference:
2078 * Clear the reference bit on the specified physical page.
2081 pmap_clear_reference(vm_page_t m)
2083 struct ia64_lpte *pte;
2087 if (m->flags & PG_FICTITIOUS)
2090 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2091 PMAP_LOCK(pv->pv_pmap);
2092 oldpmap = pmap_switch(pv->pv_pmap);
2093 pte = pmap_find_vhpt(pv->pv_va);
2094 KASSERT(pte != NULL, ("pte"));
2095 if (pmap_accessed(pte)) {
2096 pmap_clear_accessed(pte);
2097 pmap_invalidate_page(pv->pv_va);
2099 pmap_switch(oldpmap);
2100 PMAP_UNLOCK(pv->pv_pmap);
2105 * Clear the write and modified bits in each of the given page's mappings.
2108 pmap_remove_write(vm_page_t m)
2110 struct ia64_lpte *pte;
2111 pmap_t oldpmap, pmap;
2115 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2116 if ((m->flags & PG_FICTITIOUS) != 0 ||
2117 (m->flags & PG_WRITEABLE) == 0)
2119 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2122 oldpmap = pmap_switch(pmap);
2123 pte = pmap_find_vhpt(pv->pv_va);
2124 KASSERT(pte != NULL, ("pte"));
2125 prot = pmap_prot(pte);
2126 if ((prot & VM_PROT_WRITE) != 0) {
2127 if (pmap_dirty(pte)) {
2129 pmap_clear_dirty(pte);
2131 prot &= ~VM_PROT_WRITE;
2132 pmap_pte_prot(pmap, pte, prot);
2133 pmap_invalidate_page(pv->pv_va);
2135 pmap_switch(oldpmap);
2138 vm_page_flag_clear(m, PG_WRITEABLE);
2142 * Map a set of physical memory pages into the kernel virtual
2143 * address space. Return a pointer to where it is mapped. This
2144 * routine is intended to be used for mapping device memory,
2148 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
2152 va = pa | IA64_RR_BASE(6);
2153 return ((void *)va);
2157 * 'Unmap' a range mapped by pmap_mapdev().
2160 pmap_unmapdev(vm_offset_t va, vm_size_t size)
2165 * perform the pmap work for mincore
2168 pmap_mincore(pmap_t pmap, vm_offset_t addr)
2171 struct ia64_lpte *pte, tpte;
2175 oldpmap = pmap_switch(pmap);
2176 pte = pmap_find_vhpt(addr);
2181 pmap_switch(oldpmap);
2187 if (pmap_present(pte)) {
2191 val = MINCORE_INCORE;
2192 if (!pmap_managed(pte))
2197 m = PHYS_TO_VM_PAGE(pa);
2202 if (pmap_dirty(pte))
2203 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2206 * Modified by someone
2208 vm_page_lock_queues();
2209 if (pmap_is_modified(m))
2210 val |= MINCORE_MODIFIED_OTHER;
2211 vm_page_unlock_queues();
2216 if (pmap_accessed(pte))
2217 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2220 * Referenced by someone
2222 vm_page_lock_queues();
2223 if (pmap_ts_referenced(m)) {
2224 val |= MINCORE_REFERENCED_OTHER;
2225 vm_page_flag_set(m, PG_REFERENCED);
2227 vm_page_unlock_queues();
2234 pmap_activate(struct thread *td)
2236 pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2240 pmap_switch(pmap_t pm)
2246 prevpm = PCPU_GET(md.current_pmap);
2250 for (i = 0; i < 5; i++) {
2251 ia64_set_rr(IA64_RR_BASE(i),
2252 (i << 8)|(PAGE_SHIFT << 2)|1);
2255 for (i = 0; i < 5; i++) {
2256 ia64_set_rr(IA64_RR_BASE(i),
2257 (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2260 PCPU_SET(md.current_pmap, pm);
2269 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2272 struct ia64_lpte *pte;
2278 sz = (sz + 31) & ~31;
2281 oldpm = pmap_switch(pm);
2283 lim = round_page(va);
2284 len = MIN(lim - va, sz);
2285 pte = pmap_find_vhpt(va);
2286 if (pte != NULL && pmap_present(pte))
2287 ia64_sync_icache(va, len);
2296 * Increase the starting virtual address of the given mapping if a
2297 * different alignment might result in more superpage mappings.
2300 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2301 vm_offset_t *addr, vm_size_t size)
2305 #include "opt_ddb.h"
2309 #include <ddb/ddb.h>
2311 static const char* psnames[] = {
2312 "1B", "2B", "4B", "8B",
2313 "16B", "32B", "64B", "128B",
2314 "256B", "512B", "1K", "2K",
2315 "4K", "8K", "16K", "32K",
2316 "64K", "128K", "256K", "512K",
2317 "1M", "2M", "4M", "8M",
2318 "16M", "32M", "64M", "128M",
2319 "256M", "512M", "1G", "2G"
2325 struct ia64_pal_result res;
2333 static const char *manames[] = {
2334 "WB", "bad", "bad", "bad",
2335 "UC", "UCE", "WC", "NaT",
2338 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2339 if (res.pal_status != 0) {
2340 db_printf("Can't get VM summary\n");
2345 maxtr = (res.pal_result[0] >> 40) & 0xff;
2347 maxtr = (res.pal_result[0] >> 32) & 0xff;
2349 db_printf("V RID Virtual Page Physical Page PgSz ED AR PL D A MA P KEY\n");
2350 for (i = 0; i <= maxtr; i++) {
2351 bzero(&buf, sizeof(buf));
2352 res = ia64_call_pal_stacked_physical
2353 (PAL_VM_TR_READ, i, type, ia64_tpa((uint64_t) &buf));
2354 if (!(res.pal_result[0] & 1))
2355 buf.pte &= ~PTE_AR_MASK;
2356 if (!(res.pal_result[0] & 2))
2357 buf.pte &= ~PTE_PL_MASK;
2358 if (!(res.pal_result[0] & 4))
2359 pmap_clear_dirty(&buf);
2360 if (!(res.pal_result[0] & 8))
2361 buf.pte &= ~PTE_MA_MASK;
2362 db_printf("%d %06x %013lx %013lx %4s %d %d %d %d %d %-3s "
2363 "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2364 buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2365 psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2366 (buf.pte & PTE_ED) ? 1 : 0,
2367 (int)(buf.pte & PTE_AR_MASK) >> 9,
2368 (int)(buf.pte & PTE_PL_MASK) >> 7,
2369 (pmap_dirty(&buf)) ? 1 : 0,
2370 (pmap_accessed(&buf)) ? 1 : 0,
2371 manames[(buf.pte & PTE_MA_MASK) >> 2],
2372 (pmap_present(&buf)) ? 1 : 0,
2373 (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2377 DB_COMMAND(itr, db_itr)
2382 DB_COMMAND(dtr, db_dtr)
2387 DB_COMMAND(rr, db_rr)
2393 printf("RR RID PgSz VE\n");
2394 for (i = 0; i < 8; i++) {
2395 __asm __volatile ("mov %0=rr[%1]"
2397 : "r"(IA64_RR_BASE(i)));
2398 *(uint64_t *) &rr = t;
2399 printf("%d %06x %4s %d\n",
2400 i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2404 DB_COMMAND(thash, db_thash)
2409 db_printf("%p\n", (void *) ia64_thash(addr));
2412 DB_COMMAND(ttag, db_ttag)
2417 db_printf("0x%lx\n", ia64_ttag(addr));
2420 DB_COMMAND(kpte, db_kpte)
2422 struct ia64_lpte *pte;
2425 db_printf("usage: kpte <kva>\n");
2428 if (addr < VM_MIN_KERNEL_ADDRESS) {
2429 db_printf("kpte: error: invalid <kva>\n");
2432 pte = pmap_find_kpte(addr);
2433 db_printf("kpte at %p:\n", pte);
2434 db_printf(" pte =%016lx\n", pte->pte);
2435 db_printf(" itir =%016lx\n", pte->itir);
2436 db_printf(" tag =%016lx\n", pte->tag);
2437 db_printf(" chain=%016lx\n", pte->chain);