2 * Copyright (c) 1991 Regents of the University of California.
4 * Copyright (c) 1994 John S. Dyson
6 * Copyright (c) 1994 David Greenman
8 * Copyright (c) 1998,2000 Doug Rabson
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 * must display the following acknowledgement:
25 * This product includes software developed by the University of
26 * California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
44 * from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 * with some ideas from NetBSD's alpha pmap
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
51 #include <sys/param.h>
52 #include <sys/kernel.h>
55 #include <sys/mutex.h>
58 #include <sys/sysctl.h>
59 #include <sys/systm.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_map.h>
64 #include <vm/vm_object.h>
65 #include <vm/vm_pageout.h>
68 #include <machine/md_var.h>
69 #include <machine/pal.h>
72 * Manages physical address maps.
74 * In addition to hardware address maps, this
75 * module is called upon to provide software-use-only
76 * maps which may or may not be stored in the same
77 * form as hardware maps. These pseudo-maps are
78 * used to store intermediate results from copy
79 * operations to and from address spaces.
81 * Since the information managed by this module is
82 * also stored by the logical address mapping module,
83 * this module may throw away valid virtual-to-physical
84 * mappings at almost any time. However, invalidations
85 * of virtual-to-physical mappings must be done as
88 * In order to cope with hardware architectures which
89 * make virtual-to-physical map invalidates expensive,
90 * this module may delay invalidate or reduced protection
91 * operations until such time as they are actually
92 * necessary. This module is given full information as
93 * to which processors are currently using which maps,
94 * and to when physical maps must be made correct.
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
106 * User virtually mapped
109 * Kernel virtually mapped
112 * Kernel physically mapped uncacheable
115 * Kernel physically mapped cacheable
118 /* XXX move to a header. */
119 extern uint64_t ia64_gateway_page[];
121 #ifndef PMAP_SHPGPERPROC
122 #define PMAP_SHPGPERPROC 200
125 #if !defined(DIAGNOSTIC)
126 #define PMAP_INLINE __inline
131 #define pmap_accessed(lpte) ((lpte)->pte & PTE_ACCESSED)
132 #define pmap_dirty(lpte) ((lpte)->pte & PTE_DIRTY)
133 #define pmap_exec(lpte) ((lpte)->pte & PTE_AR_RX)
134 #define pmap_managed(lpte) ((lpte)->pte & PTE_MANAGED)
135 #define pmap_ppn(lpte) ((lpte)->pte & PTE_PPN_MASK)
136 #define pmap_present(lpte) ((lpte)->pte & PTE_PRESENT)
137 #define pmap_prot(lpte) (((lpte)->pte & PTE_PROT_MASK) >> 56)
138 #define pmap_wired(lpte) ((lpte)->pte & PTE_WIRED)
140 #define pmap_clear_accessed(lpte) (lpte)->pte &= ~PTE_ACCESSED
141 #define pmap_clear_dirty(lpte) (lpte)->pte &= ~PTE_DIRTY
142 #define pmap_clear_present(lpte) (lpte)->pte &= ~PTE_PRESENT
143 #define pmap_clear_wired(lpte) (lpte)->pte &= ~PTE_WIRED
145 #define pmap_set_wired(lpte) (lpte)->pte |= PTE_WIRED
148 * The VHPT bucket head structure.
157 * Statically allocated kernel pmap
159 struct pmap kernel_pmap_store;
161 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
162 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
165 * Kernel virtual memory management.
168 struct ia64_lpte ***ia64_kptdir;
169 #define KPTE_DIR0_INDEX(va) \
170 (((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
171 #define KPTE_DIR1_INDEX(va) \
172 (((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
173 #define KPTE_PTE_INDEX(va) \
174 (((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
175 #define NKPTEPG (PAGE_SIZE / sizeof(struct ia64_lpte))
177 vm_offset_t kernel_vm_end;
179 /* Values for ptc.e. XXX values for SKI. */
180 static uint64_t pmap_ptc_e_base = 0x100000000;
181 static uint64_t pmap_ptc_e_count1 = 3;
182 static uint64_t pmap_ptc_e_count2 = 2;
183 static uint64_t pmap_ptc_e_stride1 = 0x2000;
184 static uint64_t pmap_ptc_e_stride2 = 0x100000000;
185 struct mtx pmap_ptcmutex;
188 * Data for the RID allocator
190 static int pmap_ridcount;
191 static int pmap_rididx;
192 static int pmap_ridmapsz;
193 static int pmap_ridmax;
194 static uint64_t *pmap_ridmap;
195 struct mtx pmap_ridmutex;
198 * Data for the pv entry allocation mechanism
200 static uma_zone_t pvzone;
201 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
204 * Data for allocating PTEs for user processes.
206 static uma_zone_t ptezone;
209 * Virtual Hash Page Table (VHPT) data.
211 /* SYSCTL_DECL(_machdep); */
212 SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
214 struct ia64_bucket *pmap_vhpt_bucket;
216 int pmap_vhpt_nbuckets;
217 SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
218 &pmap_vhpt_nbuckets, 0, "");
220 int pmap_vhpt_log2size = 0;
221 TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
222 SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
223 &pmap_vhpt_log2size, 0, "");
225 static int pmap_vhpt_inserts;
226 SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
227 &pmap_vhpt_inserts, 0, "");
229 static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
230 SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
231 NULL, 0, pmap_vhpt_population, "I", "");
233 static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
235 static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
236 static pv_entry_t get_pv_entry(pmap_t locked_pmap);
238 static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
239 vm_page_t m, vm_prot_t prot);
240 static void pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
241 static void pmap_invalidate_all(void);
242 static int pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
243 vm_offset_t va, pv_entry_t pv, int freepte);
244 static int pmap_remove_vhpt(vm_offset_t va);
245 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
249 pmap_steal_memory(vm_size_t size)
254 size = round_page(size);
256 bank_size = phys_avail[1] - phys_avail[0];
257 while (size > bank_size) {
259 for (i = 0; phys_avail[i+2]; i+= 2) {
260 phys_avail[i] = phys_avail[i+2];
261 phys_avail[i+1] = phys_avail[i+3];
266 panic("pmap_steal_memory: out of memory");
267 bank_size = phys_avail[1] - phys_avail[0];
271 phys_avail[0] += size;
273 va = IA64_PHYS_TO_RR7(pa);
274 bzero((caddr_t) va, size);
279 pmap_initialize_vhpt(vm_offset_t vhpt)
281 struct ia64_lpte *pte;
284 pte = (struct ia64_lpte *)vhpt;
285 for (i = 0; i < pmap_vhpt_nbuckets; i++) {
288 pte[i].tag = 1UL << 63; /* Invalid tag */
289 pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
294 MALLOC_DECLARE(M_SMP);
297 pmap_alloc_vhpt(void)
302 size = 1UL << pmap_vhpt_log2size;
303 vhpt = (uintptr_t)contigmalloc(size, M_SMP, 0, 0UL, ~0UL, size, 0UL);
305 vhpt = IA64_PHYS_TO_RR7(ia64_tpa(vhpt));
306 pmap_initialize_vhpt(vhpt);
313 * Bootstrap the system enough to run with virtual memory.
318 struct ia64_pal_result res;
321 int i, j, count, ridbits;
324 * Query the PAL Code to find the loop parameters for the
327 res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
328 if (res.pal_status != 0)
329 panic("Can't configure ptc.e parameters");
330 pmap_ptc_e_base = res.pal_result[0];
331 pmap_ptc_e_count1 = res.pal_result[1] >> 32;
332 pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
333 pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
334 pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
336 printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
337 "stride1=0x%lx, stride2=0x%lx\n",
343 mtx_init(&pmap_ptcmutex, "Global PTC lock", NULL, MTX_SPIN);
346 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
348 * We currently need at least 19 bits in the RID because PID_MAX
349 * can only be encoded in 17 bits and we need RIDs for 5 regions
350 * per process. With PID_MAX equalling 99999 this means that we
351 * need to be able to encode 499995 (=5*PID_MAX).
352 * The Itanium processor only has 18 bits and the architected
353 * minimum is exactly that. So, we cannot use a PID based scheme
354 * in those cases. Enter pmap_ridmap...
355 * We should avoid the map when running on a processor that has
356 * implemented enough bits. This means that we should pass the
357 * process/thread ID to pmap. This we currently don't do, so we
358 * use the map anyway. However, we don't want to allocate a map
359 * that is large enough to cover the range dictated by the number
360 * of bits in the RID, because that may result in a RID map of
361 * 2MB in size for a 24-bit RID. A 64KB map is enough.
362 * The bottomline: we create a 32KB map when the processor only
363 * implements 18 bits (or when we can't figure it out). Otherwise
364 * we create a 64KB map.
366 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
367 if (res.pal_status != 0) {
369 printf("Can't read VM Summary - assuming 18 Region ID bits\n");
370 ridbits = 18; /* guaranteed minimum */
372 ridbits = (res.pal_result[1] >> 8) & 0xff;
374 printf("Processor supports %d Region ID bits\n",
380 pmap_ridmax = (1 << ridbits);
381 pmap_ridmapsz = pmap_ridmax / 64;
382 pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
383 pmap_ridmap[0] |= 0xff;
386 mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
389 * Allocate some memory for initial kernel 'page tables'.
391 ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
393 kernel_vm_end = VM_MIN_KERNEL_ADDRESS - VM_GATEWAY_SIZE;
395 for (i = 0; phys_avail[i+2]; i+= 2)
399 TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
400 if (pmap_vhpt_log2size == 0)
401 pmap_vhpt_log2size = 20;
402 else if (pmap_vhpt_log2size < 15)
403 pmap_vhpt_log2size = 15;
404 else if (pmap_vhpt_log2size > 61)
405 pmap_vhpt_log2size = 61;
408 size = 1UL << pmap_vhpt_log2size;
409 for (i = 0; i < count; i += 2) {
410 base = (phys_avail[i] + size - 1) & ~(size - 1);
411 if (base + size <= phys_avail[i+1])
415 panic("Unable to allocate VHPT");
417 if (base != phys_avail[i]) {
418 /* Split this region. */
419 for (j = count; j > i; j -= 2) {
420 phys_avail[j] = phys_avail[j-2];
421 phys_avail[j+1] = phys_avail[j-2+1];
423 phys_avail[i+1] = base;
424 phys_avail[i+2] = base + size;
426 phys_avail[i] = base + size;
428 base = IA64_PHYS_TO_RR7(base);
429 PCPU_SET(md.vhpt, base);
431 printf("VHPT: address=%#lx, size=%#lx\n", base, size);
433 pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
434 pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
435 sizeof(struct ia64_bucket));
436 for (i = 0; i < pmap_vhpt_nbuckets; i++) {
437 /* Stolen memory is zeroed. */
438 mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
439 MTX_NOWITNESS | MTX_SPIN);
442 pmap_initialize_vhpt(base);
444 ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
447 virtual_avail = VM_MIN_KERNEL_ADDRESS;
448 virtual_end = VM_MAX_KERNEL_ADDRESS;
451 * Initialize the kernel pmap (which is statically allocated).
453 PMAP_LOCK_INIT(kernel_pmap);
454 for (i = 0; i < 5; i++)
455 kernel_pmap->pm_rid[i] = 0;
456 TAILQ_INIT(&kernel_pmap->pm_pvlist);
457 PCPU_SET(md.current_pmap, kernel_pmap);
460 * Region 5 is mapped via the vhpt.
462 ia64_set_rr(IA64_RR_BASE(5),
463 (5 << 8) | (PAGE_SHIFT << 2) | 1);
466 * Region 6 is direct mapped UC and region 7 is direct mapped
467 * WC. The details of this is controlled by the Alt {I,D}TLB
468 * handlers. Here we just make sure that they have the largest
469 * possible page size to minimise TLB usage.
471 ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2));
472 ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2));
476 * Clear out any random TLB entries left over from booting.
478 pmap_invalidate_all();
484 pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
489 for (i = 0; i < pmap_vhpt_nbuckets; i++)
490 count += pmap_vhpt_bucket[i].length;
492 error = SYSCTL_OUT(req, &count, sizeof(count));
497 * Initialize a vm_page's machine-dependent fields.
500 pmap_page_init(vm_page_t m)
503 TAILQ_INIT(&m->md.pv_list);
504 m->md.pv_list_count = 0;
508 * Initialize the pmap module.
509 * Called by vm_init, to initialize any structures that the pmap
510 * system needs to map virtual memory.
515 int shpgperproc = PMAP_SHPGPERPROC;
518 * Initialize the address space (zone) for the pv entries. Set a
519 * high water mark so that the system can recover from excessive
520 * numbers of pv entries.
522 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
523 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
524 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
525 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
526 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
527 pv_entry_high_water = 9 * (pv_entry_max / 10);
529 ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
530 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
534 /***************************************************
535 * Manipulate TLBs for a pmap
536 ***************************************************/
539 pmap_invalidate_page(vm_offset_t va)
541 struct ia64_lpte *pte;
547 vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
549 SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
550 pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
551 atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
554 mtx_lock_spin(&pmap_ptcmutex);
555 ia64_ptc_ga(va, PAGE_SHIFT << 2);
556 mtx_unlock_spin(&pmap_ptcmutex);
560 pmap_invalidate_all_1(void *arg)
566 addr = pmap_ptc_e_base;
567 for (i = 0; i < pmap_ptc_e_count1; i++) {
568 for (j = 0; j < pmap_ptc_e_count2; j++) {
570 addr += pmap_ptc_e_stride2;
572 addr += pmap_ptc_e_stride1;
578 pmap_invalidate_all(void)
583 smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
587 pmap_invalidate_all_1(NULL);
591 pmap_allocate_rid(void)
596 mtx_lock(&pmap_ridmutex);
597 if (pmap_ridcount == pmap_ridmax)
598 panic("pmap_allocate_rid: All Region IDs used");
600 /* Find an index with a free bit. */
601 while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
603 if (pmap_rididx == pmap_ridmapsz)
606 rid = pmap_rididx * 64;
608 /* Find a free bit. */
615 pmap_ridmap[pmap_rididx] |= bit;
617 mtx_unlock(&pmap_ridmutex);
623 pmap_free_rid(uint32_t rid)
629 bit = ~(1UL << (rid & 63));
631 mtx_lock(&pmap_ridmutex);
632 pmap_ridmap[idx] &= bit;
634 mtx_unlock(&pmap_ridmutex);
637 /***************************************************
638 * Page table page management routines.....
639 ***************************************************/
642 pmap_pinit0(struct pmap *pmap)
644 /* kernel_pmap is the same as any other pmap. */
649 * Initialize a preallocated and zeroed pmap structure,
650 * such as one in a vmspace structure.
653 pmap_pinit(struct pmap *pmap)
657 PMAP_LOCK_INIT(pmap);
658 for (i = 0; i < 5; i++)
659 pmap->pm_rid[i] = pmap_allocate_rid();
660 TAILQ_INIT(&pmap->pm_pvlist);
661 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
665 /***************************************************
666 * Pmap allocation/deallocation routines.
667 ***************************************************/
670 * Release any resources held by the given physical map.
671 * Called when a pmap initialized by pmap_pinit is being released.
672 * Should only be called if the map contains no valid mappings.
675 pmap_release(pmap_t pmap)
679 for (i = 0; i < 5; i++)
681 pmap_free_rid(pmap->pm_rid[i]);
682 PMAP_LOCK_DESTROY(pmap);
686 * grow the number of kernel page table entries, if needed
689 pmap_growkernel(vm_offset_t addr)
691 struct ia64_lpte **dir1;
692 struct ia64_lpte *leaf;
695 while (kernel_vm_end <= addr) {
696 if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
697 panic("%s: out of kernel address space", __func__);
699 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
701 nkpg = vm_page_alloc(NULL, nkpt++,
702 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
704 panic("%s: cannot add dir. page", __func__);
706 dir1 = (struct ia64_lpte **)
707 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
708 bzero(dir1, PAGE_SIZE);
709 ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
712 nkpg = vm_page_alloc(NULL, nkpt++,
713 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
715 panic("%s: cannot add PTE page", __func__);
717 leaf = (struct ia64_lpte *)
718 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
719 bzero(leaf, PAGE_SIZE);
720 dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
722 kernel_vm_end += PAGE_SIZE * NKPTEPG;
726 /***************************************************
727 * page management routines.
728 ***************************************************/
731 * free the pv_entry back to the free list
733 static PMAP_INLINE void
734 free_pv_entry(pv_entry_t pv)
737 uma_zfree(pvzone, pv);
741 * get a new pv_entry, allocating a block from the system
745 get_pv_entry(pmap_t locked_pmap)
747 static const struct timeval printinterval = { 60, 0 };
748 static struct timeval lastprint;
749 struct vpgqueues *vpq;
750 struct ia64_lpte *pte;
751 pmap_t oldpmap, pmap;
752 pv_entry_t allocated_pv, next_pv, pv;
756 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
757 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
758 allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
759 if (allocated_pv != NULL) {
761 if (pv_entry_count > pv_entry_high_water)
764 return (allocated_pv);
768 * Reclaim pv entries: At first, destroy mappings to inactive
769 * pages. After that, if a pv entry is still needed, destroy
770 * mappings to active pages.
772 if (ratecheck(&lastprint, &printinterval))
773 printf("Approaching the limit on PV entries, "
774 "increase the vm.pmap.shpgperproc tunable.\n");
775 vpq = &vm_page_queues[PQ_INACTIVE];
777 TAILQ_FOREACH(m, &vpq->pl, pageq) {
778 if (m->hold_count || m->busy)
780 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
783 /* Avoid deadlock and lock recursion. */
784 if (pmap > locked_pmap)
786 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
788 pmap->pm_stats.resident_count--;
789 oldpmap = pmap_switch(pmap);
790 pte = pmap_find_vhpt(va);
791 KASSERT(pte != NULL, ("pte"));
792 pmap_remove_vhpt(va);
793 pmap_invalidate_page(va);
794 pmap_switch(oldpmap);
795 if (pmap_accessed(pte))
796 vm_page_flag_set(m, PG_REFERENCED);
799 pmap_free_pte(pte, va);
800 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
801 m->md.pv_list_count--;
802 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
803 if (TAILQ_EMPTY(&m->md.pv_list))
804 vm_page_flag_clear(m, PG_WRITEABLE);
805 if (pmap != locked_pmap)
807 if (allocated_pv == NULL)
813 if (allocated_pv == NULL) {
814 if (vpq == &vm_page_queues[PQ_INACTIVE]) {
815 vpq = &vm_page_queues[PQ_ACTIVE];
818 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
820 return (allocated_pv);
824 * Conditionally create a pv entry.
827 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
831 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
832 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
833 if (pv_entry_count < pv_entry_high_water &&
834 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
838 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
839 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
840 m->md.pv_list_count++;
847 * Add an ia64_lpte to the VHPT.
850 pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
852 struct ia64_bucket *bckt;
853 struct ia64_lpte *vhpte;
856 /* Can fault, so get it out of the way. */
857 pte_pa = ia64_tpa((vm_offset_t)pte);
859 vhpte = (struct ia64_lpte *)ia64_thash(va);
860 bckt = (struct ia64_bucket *)vhpte->chain;
862 mtx_lock_spin(&bckt->mutex);
863 pte->chain = bckt->chain;
865 bckt->chain = pte_pa;
869 mtx_unlock_spin(&bckt->mutex);
873 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
874 * worked or an appropriate error code otherwise.
877 pmap_remove_vhpt(vm_offset_t va)
879 struct ia64_bucket *bckt;
880 struct ia64_lpte *pte;
881 struct ia64_lpte *lpte;
882 struct ia64_lpte *vhpte;
886 vhpte = (struct ia64_lpte *)ia64_thash(va);
887 bckt = (struct ia64_bucket *)vhpte->chain;
890 mtx_lock_spin(&bckt->mutex);
892 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
893 while (chain != 0 && pte->tag != tag) {
896 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
899 mtx_unlock_spin(&bckt->mutex);
903 /* Snip this pv_entry out of the collision chain. */
905 bckt->chain = pte->chain;
907 lpte->chain = pte->chain;
911 mtx_unlock_spin(&bckt->mutex);
916 * Find the ia64_lpte for the given va, if any.
918 static struct ia64_lpte *
919 pmap_find_vhpt(vm_offset_t va)
921 struct ia64_bucket *bckt;
922 struct ia64_lpte *pte;
926 pte = (struct ia64_lpte *)ia64_thash(va);
927 bckt = (struct ia64_bucket *)pte->chain;
929 mtx_lock_spin(&bckt->mutex);
931 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
932 while (chain != 0 && pte->tag != tag) {
934 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
936 mtx_unlock_spin(&bckt->mutex);
937 return ((chain != 0) ? pte : NULL);
941 * Remove an entry from the list of managed mappings.
944 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
947 if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
948 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
949 if (pmap == pv->pv_pmap && va == pv->pv_va)
953 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
961 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
962 m->md.pv_list_count--;
963 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
964 vm_page_flag_clear(m, PG_WRITEABLE);
966 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
975 * Create a pv entry for page at pa for
979 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
983 pv = get_pv_entry(pmap);
987 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
988 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
989 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
990 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
991 m->md.pv_list_count++;
995 * Routine: pmap_extract
997 * Extract the physical page address associated
998 * with the given map/virtual_address pair.
1001 pmap_extract(pmap_t pmap, vm_offset_t va)
1003 struct ia64_lpte *pte;
1009 oldpmap = pmap_switch(pmap);
1010 pte = pmap_find_vhpt(va);
1011 if (pte != NULL && pmap_present(pte))
1013 pmap_switch(oldpmap);
1019 * Routine: pmap_extract_and_hold
1021 * Atomically extract and hold the physical page
1022 * with the given pmap and virtual address pair
1023 * if that mapping permits the given protection.
1026 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1028 struct ia64_lpte *pte;
1033 vm_page_lock_queues();
1035 oldpmap = pmap_switch(pmap);
1036 pte = pmap_find_vhpt(va);
1037 if (pte != NULL && pmap_present(pte) &&
1038 (pmap_prot(pte) & prot) == prot) {
1039 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1042 vm_page_unlock_queues();
1043 pmap_switch(oldpmap);
1048 /***************************************************
1049 * Low level mapping routines.....
1050 ***************************************************/
1053 * Find the kernel lpte for mapping the given virtual address, which
1054 * must be in the part of region 5 which we can cover with our kernel
1057 static struct ia64_lpte *
1058 pmap_find_kpte(vm_offset_t va)
1060 struct ia64_lpte **dir1;
1061 struct ia64_lpte *leaf;
1063 KASSERT((va >> 61) == 5,
1064 ("kernel mapping 0x%lx not in region 5", va));
1065 KASSERT(va < kernel_vm_end,
1066 ("kernel mapping 0x%lx out of range", va));
1068 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1069 leaf = dir1[KPTE_DIR1_INDEX(va)];
1070 return (&leaf[KPTE_PTE_INDEX(va)]);
1074 * Find a pte suitable for mapping a user-space address. If one exists
1075 * in the VHPT, that one will be returned, otherwise a new pte is
1078 static struct ia64_lpte *
1079 pmap_find_pte(vm_offset_t va)
1081 struct ia64_lpte *pte;
1083 if (va >= VM_MAXUSER_ADDRESS)
1084 return pmap_find_kpte(va);
1086 pte = pmap_find_vhpt(va);
1088 pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1089 pte->tag = 1UL << 63;
1095 * Free a pte which is now unused. This simply returns it to the zone
1096 * allocator if it is a user mapping. For kernel mappings, clear the
1097 * valid bit to make it clear that the mapping is not currently used.
1100 pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1102 if (va < VM_MAXUSER_ADDRESS)
1103 uma_zfree(ptezone, pte);
1105 pmap_clear_present(pte);
1108 static PMAP_INLINE void
1109 pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1111 static long prot2ar[4] = {
1112 PTE_AR_R, /* VM_PROT_NONE */
1113 PTE_AR_RW, /* VM_PROT_WRITE */
1114 PTE_AR_RX|PTE_ED, /* VM_PROT_EXECUTE */
1115 PTE_AR_RWX|PTE_ED /* VM_PROT_WRITE|VM_PROT_EXECUTE */
1118 pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1119 pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1120 pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1121 ? PTE_PL_KERN : PTE_PL_USER;
1122 pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1126 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1127 * the pte was orginally valid, then its assumed to already be in the
1129 * This functions does not set the protection bits. It's expected
1130 * that those have been set correctly prior to calling this function.
1133 pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1134 boolean_t wired, boolean_t managed)
1137 pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED;
1138 pte->pte |= PTE_PRESENT | PTE_MA_WB;
1139 pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1140 pte->pte |= (wired) ? PTE_WIRED : 0;
1141 pte->pte |= pa & PTE_PPN_MASK;
1143 pte->itir = PAGE_SHIFT << 2;
1145 pte->tag = ia64_ttag(va);
1149 * Remove the (possibly managed) mapping represented by pte from the
1153 pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1154 pv_entry_t pv, int freepte)
1160 * First remove from the VHPT.
1162 error = pmap_remove_vhpt(va);
1166 pmap_invalidate_page(va);
1168 if (pmap_wired(pte))
1169 pmap->pm_stats.wired_count -= 1;
1171 pmap->pm_stats.resident_count -= 1;
1172 if (pmap_managed(pte)) {
1173 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1174 if (pmap_dirty(pte))
1176 if (pmap_accessed(pte))
1177 vm_page_flag_set(m, PG_REFERENCED);
1179 error = pmap_remove_entry(pmap, m, va, pv);
1182 pmap_free_pte(pte, va);
1188 * Extract the physical page address associated with a kernel
1192 pmap_kextract(vm_offset_t va)
1194 struct ia64_lpte *pte;
1197 KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1199 /* Regions 6 and 7 are direct mapped. */
1200 if (va >= IA64_RR_BASE(6))
1201 return (IA64_RR_MASK(va));
1203 /* EPC gateway page? */
1204 gwpage = (vm_offset_t)ia64_get_k5();
1205 if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1206 return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1208 /* Bail out if the virtual address is beyond our limits. */
1209 if (va >= kernel_vm_end)
1212 pte = pmap_find_kpte(va);
1213 if (!pmap_present(pte))
1215 return (pmap_ppn(pte) | (va & PAGE_MASK));
1219 * Add a list of wired pages to the kva this routine is only used for
1220 * temporary kernel mappings that do not need to have page modification
1221 * or references recorded. Note that old mappings are simply written
1222 * over. The page is effectively wired, but it's customary to not have
1223 * the PTE reflect that, nor update statistics.
1226 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1228 struct ia64_lpte *pte;
1231 for (i = 0; i < count; i++) {
1232 pte = pmap_find_kpte(va);
1233 if (pmap_present(pte))
1234 pmap_invalidate_page(va);
1236 pmap_enter_vhpt(pte, va);
1237 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1238 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1244 * this routine jerks page mappings from the
1245 * kernel -- it is meant only for temporary mappings.
1248 pmap_qremove(vm_offset_t va, int count)
1250 struct ia64_lpte *pte;
1253 for (i = 0; i < count; i++) {
1254 pte = pmap_find_kpte(va);
1255 if (pmap_present(pte)) {
1256 pmap_remove_vhpt(va);
1257 pmap_invalidate_page(va);
1258 pmap_clear_present(pte);
1265 * Add a wired page to the kva. As for pmap_qenter(), it's customary
1266 * to not have the PTE reflect that, nor update statistics.
1269 pmap_kenter(vm_offset_t va, vm_offset_t pa)
1271 struct ia64_lpte *pte;
1273 pte = pmap_find_kpte(va);
1274 if (pmap_present(pte))
1275 pmap_invalidate_page(va);
1277 pmap_enter_vhpt(pte, va);
1278 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1279 pmap_set_pte(pte, va, pa, FALSE, FALSE);
1283 * Remove a page from the kva
1286 pmap_kremove(vm_offset_t va)
1288 struct ia64_lpte *pte;
1290 pte = pmap_find_kpte(va);
1291 if (pmap_present(pte)) {
1292 pmap_remove_vhpt(va);
1293 pmap_invalidate_page(va);
1294 pmap_clear_present(pte);
1299 * Used to map a range of physical addresses into kernel
1300 * virtual address space.
1302 * The value passed in '*virt' is a suggested virtual address for
1303 * the mapping. Architectures which can support a direct-mapped
1304 * physical to virtual region can return the appropriate address
1305 * within that region, leaving '*virt' unchanged. Other
1306 * architectures should map the pages starting at '*virt' and
1307 * update '*virt' with the first usable address after the mapped
1311 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1313 return IA64_PHYS_TO_RR7(start);
1317 * Remove the given range of addresses from the specified map.
1319 * It is assumed that the start and end are properly
1320 * rounded to the page size.
1323 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1328 struct ia64_lpte *pte;
1330 if (pmap->pm_stats.resident_count == 0)
1333 vm_page_lock_queues();
1335 oldpmap = pmap_switch(pmap);
1338 * special handling of removing one page. a very
1339 * common operation and easy to short circuit some
1342 if (sva + PAGE_SIZE == eva) {
1343 pte = pmap_find_vhpt(sva);
1345 pmap_remove_pte(pmap, pte, sva, 0, 1);
1349 if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1350 TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1352 if (va >= sva && va < eva) {
1353 pte = pmap_find_vhpt(va);
1354 KASSERT(pte != NULL, ("pte"));
1355 pmap_remove_pte(pmap, pte, va, pv, 1);
1359 for (va = sva; va < eva; va += PAGE_SIZE) {
1360 pte = pmap_find_vhpt(va);
1362 pmap_remove_pte(pmap, pte, va, 0, 1);
1367 vm_page_unlock_queues();
1368 pmap_switch(oldpmap);
1373 * Routine: pmap_remove_all
1375 * Removes this physical page from
1376 * all physical maps in which it resides.
1377 * Reflects back modify bits to the pager.
1380 * Original versions of this routine were very
1381 * inefficient because they iteratively called
1382 * pmap_remove (slow...)
1386 pmap_remove_all(vm_page_t m)
1391 #if defined(DIAGNOSTIC)
1393 * XXX This makes pmap_remove_all() illegal for non-managed pages!
1395 if (m->flags & PG_FICTITIOUS) {
1396 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1399 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1400 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1401 struct ia64_lpte *pte;
1402 pmap_t pmap = pv->pv_pmap;
1403 vm_offset_t va = pv->pv_va;
1406 oldpmap = pmap_switch(pmap);
1407 pte = pmap_find_vhpt(va);
1408 KASSERT(pte != NULL, ("pte"));
1409 if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1410 panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1411 pmap_remove_pte(pmap, pte, va, pv, 1);
1412 pmap_switch(oldpmap);
1415 vm_page_flag_clear(m, PG_WRITEABLE);
1419 * Set the physical protection on the
1420 * specified range of this map as requested.
1423 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1426 struct ia64_lpte *pte;
1428 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1429 pmap_remove(pmap, sva, eva);
1433 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1434 (VM_PROT_WRITE|VM_PROT_EXECUTE))
1437 if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1438 panic("pmap_protect: unaligned addresses");
1440 vm_page_lock_queues();
1442 oldpmap = pmap_switch(pmap);
1443 for ( ; sva < eva; sva += PAGE_SIZE) {
1444 /* If page is invalid, skip this page */
1445 pte = pmap_find_vhpt(sva);
1449 /* If there's no change, skip it too */
1450 if (pmap_prot(pte) == prot)
1453 if (pmap_managed(pte)) {
1454 vm_offset_t pa = pmap_ppn(pte);
1455 vm_page_t m = PHYS_TO_VM_PAGE(pa);
1457 if (pmap_dirty(pte)) {
1459 pmap_clear_dirty(pte);
1462 if (pmap_accessed(pte)) {
1463 vm_page_flag_set(m, PG_REFERENCED);
1464 pmap_clear_accessed(pte);
1468 if (prot & VM_PROT_EXECUTE)
1469 ia64_sync_icache(sva, PAGE_SIZE);
1471 pmap_pte_prot(pmap, pte, prot);
1472 pmap_invalidate_page(sva);
1474 vm_page_unlock_queues();
1475 pmap_switch(oldpmap);
1480 * Insert the given physical page (p) at
1481 * the specified virtual address (v) in the
1482 * target physical map with the protection requested.
1484 * If specified, the page will be wired down, meaning
1485 * that the related pte can not be reclaimed.
1487 * NB: This is the only routine which MAY NOT lazy-evaluate
1488 * or lose information. That is, this routine must actually
1489 * insert this page into the given map NOW.
1492 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1493 vm_prot_t prot, boolean_t wired)
1498 struct ia64_lpte origpte;
1499 struct ia64_lpte *pte;
1500 boolean_t icache_inval, managed;
1502 vm_page_lock_queues();
1504 oldpmap = pmap_switch(pmap);
1508 if (va > VM_MAX_KERNEL_ADDRESS)
1509 panic("pmap_enter: toobig");
1513 * Find (or create) a pte for the given mapping.
1515 while ((pte = pmap_find_pte(va)) == NULL) {
1516 pmap_switch(oldpmap);
1518 vm_page_unlock_queues();
1520 vm_page_lock_queues();
1522 oldpmap = pmap_switch(pmap);
1525 if (!pmap_present(pte)) {
1527 pmap_enter_vhpt(pte, va);
1529 opa = pmap_ppn(pte);
1531 pa = VM_PAGE_TO_PHYS(m);
1533 icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1536 * Mapping has not changed, must be protection or wiring change.
1540 * Wiring change, just update stats. We don't worry about
1541 * wiring PT pages as they remain resident as long as there
1542 * are valid mappings in them. Hence, if a user page is wired,
1543 * the PT page will be also.
1545 if (wired && !pmap_wired(&origpte))
1546 pmap->pm_stats.wired_count++;
1547 else if (!wired && pmap_wired(&origpte))
1548 pmap->pm_stats.wired_count--;
1550 managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1553 * We might be turning off write access to the page,
1554 * so we go ahead and sense modify status. Otherwise,
1555 * we can avoid I-cache invalidation if the page
1556 * already allowed execution.
1558 if (managed && pmap_dirty(&origpte))
1560 else if (pmap_exec(&origpte))
1561 icache_inval = FALSE;
1563 pmap_invalidate_page(va);
1568 * Mapping has changed, invalidate old range and fall
1569 * through to handle validating new mapping.
1572 pmap_remove_pte(pmap, pte, va, 0, 0);
1573 pmap_enter_vhpt(pte, va);
1577 * Enter on the PV list if part of our managed memory.
1579 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1580 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1581 ("pmap_enter: managed mapping within the clean submap"));
1582 pmap_insert_entry(pmap, va, m);
1587 * Increment counters
1589 pmap->pm_stats.resident_count++;
1591 pmap->pm_stats.wired_count++;
1596 * Now validate mapping with desired protection/wiring. This
1597 * adds the pte to the VHPT if necessary.
1599 pmap_pte_prot(pmap, pte, prot);
1600 pmap_set_pte(pte, va, pa, wired, managed);
1602 /* Invalidate the I-cache when needed. */
1604 ia64_sync_icache(va, PAGE_SIZE);
1606 if ((prot & VM_PROT_WRITE) != 0)
1607 vm_page_flag_set(m, PG_WRITEABLE);
1608 vm_page_unlock_queues();
1609 pmap_switch(oldpmap);
1614 * Maps a sequence of resident pages belonging to the same object.
1615 * The sequence begins with the given page m_start. This page is
1616 * mapped at the given virtual address start. Each subsequent page is
1617 * mapped at a virtual address that is offset from start by the same
1618 * amount as the page is offset from m_start within the object. The
1619 * last page in the sequence is the page with the largest offset from
1620 * m_start that can be mapped at a virtual address less than the given
1621 * virtual address end. Not every virtual page between start and end
1622 * is mapped; only those for which a resident page exists with the
1623 * corresponding offset from m_start are mapped.
1626 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1627 vm_page_t m_start, vm_prot_t prot)
1631 vm_pindex_t diff, psize;
1633 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1634 psize = atop(end - start);
1637 oldpmap = pmap_switch(pmap);
1638 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1639 pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1640 m = TAILQ_NEXT(m, listq);
1642 pmap_switch(oldpmap);
1647 * this code makes some *MAJOR* assumptions:
1648 * 1. Current pmap & pmap exists.
1651 * 4. No page table pages.
1652 * but is *MUCH* faster than pmap_enter...
1656 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1661 oldpmap = pmap_switch(pmap);
1662 pmap_enter_quick_locked(pmap, va, m, prot);
1663 pmap_switch(oldpmap);
1668 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1671 struct ia64_lpte *pte;
1674 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1675 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1676 ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1677 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1678 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1680 if ((pte = pmap_find_pte(va)) == NULL)
1683 if (!pmap_present(pte)) {
1684 /* Enter on the PV list if the page is managed. */
1685 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1686 if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1687 pmap_free_pte(pte, va);
1694 /* Increment counters. */
1695 pmap->pm_stats.resident_count++;
1697 /* Initialise with R/O protection and enter into VHPT. */
1698 pmap_enter_vhpt(pte, va);
1699 pmap_pte_prot(pmap, pte,
1700 prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1701 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1703 if (prot & VM_PROT_EXECUTE)
1704 ia64_sync_icache(va, PAGE_SIZE);
1709 * pmap_object_init_pt preloads the ptes for a given object
1710 * into the specified pmap. This eliminates the blast of soft
1711 * faults on process startup and immediately after an mmap.
1714 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1715 vm_object_t object, vm_pindex_t pindex,
1719 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1720 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1721 ("pmap_object_init_pt: non-device object"));
1725 * Routine: pmap_change_wiring
1726 * Function: Change the wiring attribute for a map/virtual-address
1728 * In/out conditions:
1729 * The mapping must already exist in the pmap.
1732 pmap_change_wiring(pmap, va, wired)
1733 register pmap_t pmap;
1738 struct ia64_lpte *pte;
1741 oldpmap = pmap_switch(pmap);
1743 pte = pmap_find_vhpt(va);
1744 KASSERT(pte != NULL, ("pte"));
1745 if (wired && !pmap_wired(pte)) {
1746 pmap->pm_stats.wired_count++;
1747 pmap_set_wired(pte);
1748 } else if (!wired && pmap_wired(pte)) {
1749 pmap->pm_stats.wired_count--;
1750 pmap_clear_wired(pte);
1753 pmap_switch(oldpmap);
1760 * Copy the range specified by src_addr/len
1761 * from the source map to the range dst_addr/len
1762 * in the destination map.
1764 * This routine is only advisory and need not do anything.
1768 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1769 vm_offset_t src_addr)
1775 * pmap_zero_page zeros the specified hardware page by
1776 * mapping it into virtual memory and using bzero to clear
1781 pmap_zero_page(vm_page_t m)
1783 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1784 bzero((caddr_t) va, PAGE_SIZE);
1789 * pmap_zero_page_area zeros the specified hardware page by
1790 * mapping it into virtual memory and using bzero to clear
1793 * off and size must reside within a single page.
1797 pmap_zero_page_area(vm_page_t m, int off, int size)
1799 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1800 bzero((char *)(caddr_t)va + off, size);
1805 * pmap_zero_page_idle zeros the specified hardware page by
1806 * mapping it into virtual memory and using bzero to clear
1807 * its contents. This is for the vm_idlezero process.
1811 pmap_zero_page_idle(vm_page_t m)
1813 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1814 bzero((caddr_t) va, PAGE_SIZE);
1819 * pmap_copy_page copies the specified (machine independent)
1820 * page by mapping the page into virtual memory and using
1821 * bcopy to copy the page, one machine dependent page at a
1825 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1827 vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1828 vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1829 bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1833 * Returns true if the pmap's pv is one of the first
1834 * 16 pvs linked to from this page. This count may
1835 * be changed upwards or downwards in the future; it
1836 * is only necessary that true be returned for a small
1837 * subset of pmaps for proper page aging.
1840 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1845 if (m->flags & PG_FICTITIOUS)
1849 * Not found, check current mappings returning immediately if found.
1851 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1852 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1853 if (pv->pv_pmap == pmap) {
1864 * pmap_page_wired_mappings:
1866 * Return the number of managed mappings to the given physical page
1870 pmap_page_wired_mappings(vm_page_t m)
1872 struct ia64_lpte *pte;
1873 pmap_t oldpmap, pmap;
1878 if ((m->flags & PG_FICTITIOUS) != 0)
1880 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1881 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1884 oldpmap = pmap_switch(pmap);
1885 pte = pmap_find_vhpt(pv->pv_va);
1886 KASSERT(pte != NULL, ("pte"));
1887 if (pmap_wired(pte))
1889 pmap_switch(oldpmap);
1896 * Remove all pages from specified address space
1897 * this aids process exit speeds. Also, this code
1898 * is special cased for current process only, but
1899 * can have the more generic (and slightly slower)
1900 * mode enabled. This is much faster than pmap_remove
1901 * in the case of running down an entire address space.
1904 pmap_remove_pages(pmap_t pmap)
1909 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1910 printf("warning: %s called with non-current pmap\n",
1915 vm_page_lock_queues();
1917 oldpmap = pmap_switch(pmap);
1919 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1920 struct ia64_lpte *pte;
1922 npv = TAILQ_NEXT(pv, pv_plist);
1924 pte = pmap_find_vhpt(pv->pv_va);
1925 KASSERT(pte != NULL, ("pte"));
1926 if (!pmap_wired(pte))
1927 pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1930 pmap_switch(oldpmap);
1932 vm_page_unlock_queues();
1936 * pmap_ts_referenced:
1938 * Return a count of reference bits for a page, clearing those bits.
1939 * It is not necessary for every reference bit to be cleared, but it
1940 * is necessary that 0 only be returned when there are truly no
1941 * reference bits set.
1943 * XXX: The exact number of bits to check and clear is a matter that
1944 * should be tested and standardized at some point in the future for
1945 * optimal aging of shared pages.
1948 pmap_ts_referenced(vm_page_t m)
1950 struct ia64_lpte *pte;
1955 if (m->flags & PG_FICTITIOUS)
1958 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1959 PMAP_LOCK(pv->pv_pmap);
1960 oldpmap = pmap_switch(pv->pv_pmap);
1961 pte = pmap_find_vhpt(pv->pv_va);
1962 KASSERT(pte != NULL, ("pte"));
1963 if (pmap_accessed(pte)) {
1965 pmap_clear_accessed(pte);
1966 pmap_invalidate_page(pv->pv_va);
1968 pmap_switch(oldpmap);
1969 PMAP_UNLOCK(pv->pv_pmap);
1978 * Return whether or not the specified physical page was modified
1979 * in any physical maps.
1982 pmap_is_modified(vm_page_t m)
1984 struct ia64_lpte *pte;
1990 if (m->flags & PG_FICTITIOUS)
1993 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1994 PMAP_LOCK(pv->pv_pmap);
1995 oldpmap = pmap_switch(pv->pv_pmap);
1996 pte = pmap_find_vhpt(pv->pv_va);
1997 pmap_switch(oldpmap);
1998 KASSERT(pte != NULL, ("pte"));
1999 rv = pmap_dirty(pte) ? TRUE : FALSE;
2000 PMAP_UNLOCK(pv->pv_pmap);
2009 * pmap_is_prefaultable:
2011 * Return whether or not the specified virtual address is elgible
2015 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2017 struct ia64_lpte *pte;
2019 pte = pmap_find_vhpt(addr);
2020 if (pte != NULL && pmap_present(pte))
2026 * Clear the modify bits on the specified physical page.
2029 pmap_clear_modify(vm_page_t m)
2031 struct ia64_lpte *pte;
2035 if (m->flags & PG_FICTITIOUS)
2038 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2039 PMAP_LOCK(pv->pv_pmap);
2040 oldpmap = pmap_switch(pv->pv_pmap);
2041 pte = pmap_find_vhpt(pv->pv_va);
2042 KASSERT(pte != NULL, ("pte"));
2043 if (pmap_dirty(pte)) {
2044 pmap_clear_dirty(pte);
2045 pmap_invalidate_page(pv->pv_va);
2047 pmap_switch(oldpmap);
2048 PMAP_UNLOCK(pv->pv_pmap);
2053 * pmap_clear_reference:
2055 * Clear the reference bit on the specified physical page.
2058 pmap_clear_reference(vm_page_t m)
2060 struct ia64_lpte *pte;
2064 if (m->flags & PG_FICTITIOUS)
2067 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2068 PMAP_LOCK(pv->pv_pmap);
2069 oldpmap = pmap_switch(pv->pv_pmap);
2070 pte = pmap_find_vhpt(pv->pv_va);
2071 KASSERT(pte != NULL, ("pte"));
2072 if (pmap_accessed(pte)) {
2073 pmap_clear_accessed(pte);
2074 pmap_invalidate_page(pv->pv_va);
2076 pmap_switch(oldpmap);
2077 PMAP_UNLOCK(pv->pv_pmap);
2082 * Clear the write and modified bits in each of the given page's mappings.
2085 pmap_remove_write(vm_page_t m)
2087 struct ia64_lpte *pte;
2088 pmap_t oldpmap, pmap;
2092 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2093 if ((m->flags & PG_FICTITIOUS) != 0 ||
2094 (m->flags & PG_WRITEABLE) == 0)
2096 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2099 oldpmap = pmap_switch(pmap);
2100 pte = pmap_find_vhpt(pv->pv_va);
2101 KASSERT(pte != NULL, ("pte"));
2102 prot = pmap_prot(pte);
2103 if ((prot & VM_PROT_WRITE) != 0) {
2104 if (pmap_dirty(pte)) {
2106 pmap_clear_dirty(pte);
2108 prot &= ~VM_PROT_WRITE;
2109 pmap_pte_prot(pmap, pte, prot);
2110 pmap_invalidate_page(pv->pv_va);
2112 pmap_switch(oldpmap);
2115 vm_page_flag_clear(m, PG_WRITEABLE);
2119 * Map a set of physical memory pages into the kernel virtual
2120 * address space. Return a pointer to where it is mapped. This
2121 * routine is intended to be used for mapping device memory,
2125 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
2129 va = pa | IA64_RR_BASE(6);
2130 return ((void *)va);
2134 * 'Unmap' a range mapped by pmap_mapdev().
2137 pmap_unmapdev(vm_offset_t va, vm_size_t size)
2142 * perform the pmap work for mincore
2145 pmap_mincore(pmap_t pmap, vm_offset_t addr)
2148 struct ia64_lpte *pte, tpte;
2152 oldpmap = pmap_switch(pmap);
2153 pte = pmap_find_vhpt(addr);
2158 pmap_switch(oldpmap);
2164 if (pmap_present(pte)) {
2168 val = MINCORE_INCORE;
2169 if (!pmap_managed(pte))
2174 m = PHYS_TO_VM_PAGE(pa);
2179 if (pmap_dirty(pte))
2180 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2183 * Modified by someone
2185 vm_page_lock_queues();
2186 if (pmap_is_modified(m))
2187 val |= MINCORE_MODIFIED_OTHER;
2188 vm_page_unlock_queues();
2193 if (pmap_accessed(pte))
2194 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2197 * Referenced by someone
2199 vm_page_lock_queues();
2200 if (pmap_ts_referenced(m)) {
2201 val |= MINCORE_REFERENCED_OTHER;
2202 vm_page_flag_set(m, PG_REFERENCED);
2204 vm_page_unlock_queues();
2211 pmap_activate(struct thread *td)
2213 pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2217 pmap_switch(pmap_t pm)
2223 prevpm = PCPU_GET(md.current_pmap);
2227 for (i = 0; i < 5; i++) {
2228 ia64_set_rr(IA64_RR_BASE(i),
2229 (i << 8)|(PAGE_SHIFT << 2)|1);
2232 for (i = 0; i < 5; i++) {
2233 ia64_set_rr(IA64_RR_BASE(i),
2234 (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2237 PCPU_SET(md.current_pmap, pm);
2246 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2249 struct ia64_lpte *pte;
2255 sz = (sz + 31) & ~31;
2258 oldpm = pmap_switch(pm);
2260 lim = round_page(va);
2261 len = MIN(lim - va, sz);
2262 pte = pmap_find_vhpt(va);
2263 if (pte != NULL && pmap_present(pte))
2264 ia64_sync_icache(va, len);
2273 * Increase the starting virtual address of the given mapping if a
2274 * different alignment might result in more superpage mappings.
2277 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2278 vm_offset_t *addr, vm_size_t size)
2282 #include "opt_ddb.h"
2286 #include <ddb/ddb.h>
2288 static const char* psnames[] = {
2289 "1B", "2B", "4B", "8B",
2290 "16B", "32B", "64B", "128B",
2291 "256B", "512B", "1K", "2K",
2292 "4K", "8K", "16K", "32K",
2293 "64K", "128K", "256K", "512K",
2294 "1M", "2M", "4M", "8M",
2295 "16M", "32M", "64M", "128M",
2296 "256M", "512M", "1G", "2G"
2302 struct ia64_pal_result res;
2310 static const char *manames[] = {
2311 "WB", "bad", "bad", "bad",
2312 "UC", "UCE", "WC", "NaT",
2315 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2316 if (res.pal_status != 0) {
2317 db_printf("Can't get VM summary\n");
2322 maxtr = (res.pal_result[0] >> 40) & 0xff;
2324 maxtr = (res.pal_result[0] >> 32) & 0xff;
2326 db_printf("V RID Virtual Page Physical Page PgSz ED AR PL D A MA P KEY\n");
2327 for (i = 0; i <= maxtr; i++) {
2328 bzero(&buf, sizeof(buf));
2329 res = ia64_call_pal_stacked_physical
2330 (PAL_VM_TR_READ, i, type, ia64_tpa((uint64_t) &buf));
2331 if (!(res.pal_result[0] & 1))
2332 buf.pte &= ~PTE_AR_MASK;
2333 if (!(res.pal_result[0] & 2))
2334 buf.pte &= ~PTE_PL_MASK;
2335 if (!(res.pal_result[0] & 4))
2336 pmap_clear_dirty(&buf);
2337 if (!(res.pal_result[0] & 8))
2338 buf.pte &= ~PTE_MA_MASK;
2339 db_printf("%d %06x %013lx %013lx %4s %d %d %d %d %d %-3s "
2340 "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2341 buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2342 psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2343 (buf.pte & PTE_ED) ? 1 : 0,
2344 (int)(buf.pte & PTE_AR_MASK) >> 9,
2345 (int)(buf.pte & PTE_PL_MASK) >> 7,
2346 (pmap_dirty(&buf)) ? 1 : 0,
2347 (pmap_accessed(&buf)) ? 1 : 0,
2348 manames[(buf.pte & PTE_MA_MASK) >> 2],
2349 (pmap_present(&buf)) ? 1 : 0,
2350 (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2354 DB_COMMAND(itr, db_itr)
2359 DB_COMMAND(dtr, db_dtr)
2364 DB_COMMAND(rr, db_rr)
2370 printf("RR RID PgSz VE\n");
2371 for (i = 0; i < 8; i++) {
2372 __asm __volatile ("mov %0=rr[%1]"
2374 : "r"(IA64_RR_BASE(i)));
2375 *(uint64_t *) &rr = t;
2376 printf("%d %06x %4s %d\n",
2377 i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2381 DB_COMMAND(thash, db_thash)
2386 db_printf("%p\n", (void *) ia64_thash(addr));
2389 DB_COMMAND(ttag, db_ttag)
2394 db_printf("0x%lx\n", ia64_ttag(addr));
2397 DB_COMMAND(kpte, db_kpte)
2399 struct ia64_lpte *pte;
2402 db_printf("usage: kpte <kva>\n");
2405 if (addr < VM_MIN_KERNEL_ADDRESS) {
2406 db_printf("kpte: error: invalid <kva>\n");
2409 pte = pmap_find_kpte(addr);
2410 db_printf("kpte at %p:\n", pte);
2411 db_printf(" pte =%016lx\n", pte->pte);
2412 db_printf(" itir =%016lx\n", pte->itir);
2413 db_printf(" tag =%016lx\n", pte->tag);
2414 db_printf(" chain=%016lx\n", pte->chain);