2 * Copyright (c) 1991 Regents of the University of California.
4 * Copyright (c) 1994 John S. Dyson
6 * Copyright (c) 1994 David Greenman
8 * Copyright (c) 1998,2000 Doug Rabson
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 * must display the following acknowledgement:
25 * This product includes software developed by the University of
26 * California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
44 * from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 * with some ideas from NetBSD's alpha pmap
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
51 #include <sys/param.h>
52 #include <sys/kernel.h>
55 #include <sys/mutex.h>
58 #include <sys/sysctl.h>
59 #include <sys/systm.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_map.h>
64 #include <vm/vm_object.h>
65 #include <vm/vm_pageout.h>
68 #include <machine/md_var.h>
69 #include <machine/pal.h>
72 * Manages physical address maps.
74 * In addition to hardware address maps, this
75 * module is called upon to provide software-use-only
76 * maps which may or may not be stored in the same
77 * form as hardware maps. These pseudo-maps are
78 * used to store intermediate results from copy
79 * operations to and from address spaces.
81 * Since the information managed by this module is
82 * also stored by the logical address mapping module,
83 * this module may throw away valid virtual-to-physical
84 * mappings at almost any time. However, invalidations
85 * of virtual-to-physical mappings must be done as
88 * In order to cope with hardware architectures which
89 * make virtual-to-physical map invalidates expensive,
90 * this module may delay invalidate or reduced protection
91 * operations until such time as they are actually
92 * necessary. This module is given full information as
93 * to which processors are currently using which maps,
94 * and to when physical maps must be made correct.
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
106 * User virtually mapped
109 * Kernel virtually mapped
112 * Kernel physically mapped uncacheable
115 * Kernel physically mapped cacheable
118 /* XXX move to a header. */
119 extern uint64_t ia64_gateway_page[];
121 #ifndef PMAP_SHPGPERPROC
122 #define PMAP_SHPGPERPROC 200
125 #if !defined(DIAGNOSTIC)
126 #define PMAP_INLINE __inline
131 #define pmap_accessed(lpte) ((lpte)->pte & PTE_ACCESSED)
132 #define pmap_dirty(lpte) ((lpte)->pte & PTE_DIRTY)
133 #define pmap_exec(lpte) ((lpte)->pte & PTE_AR_RX)
134 #define pmap_managed(lpte) ((lpte)->pte & PTE_MANAGED)
135 #define pmap_ppn(lpte) ((lpte)->pte & PTE_PPN_MASK)
136 #define pmap_present(lpte) ((lpte)->pte & PTE_PRESENT)
137 #define pmap_prot(lpte) (((lpte)->pte & PTE_PROT_MASK) >> 56)
138 #define pmap_wired(lpte) ((lpte)->pte & PTE_WIRED)
140 #define pmap_clear_accessed(lpte) (lpte)->pte &= ~PTE_ACCESSED
141 #define pmap_clear_dirty(lpte) (lpte)->pte &= ~PTE_DIRTY
142 #define pmap_clear_present(lpte) (lpte)->pte &= ~PTE_PRESENT
143 #define pmap_clear_wired(lpte) (lpte)->pte &= ~PTE_WIRED
145 #define pmap_set_wired(lpte) (lpte)->pte |= PTE_WIRED
148 * The VHPT bucket head structure.
157 * Statically allocated kernel pmap
159 struct pmap kernel_pmap_store;
161 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
162 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
165 * Kernel virtual memory management.
168 struct ia64_lpte ***ia64_kptdir;
169 #define KPTE_DIR0_INDEX(va) \
170 (((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
171 #define KPTE_DIR1_INDEX(va) \
172 (((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
173 #define KPTE_PTE_INDEX(va) \
174 (((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
175 #define NKPTEPG (PAGE_SIZE / sizeof(struct ia64_lpte))
177 vm_offset_t kernel_vm_end;
179 /* Values for ptc.e. XXX values for SKI. */
180 static uint64_t pmap_ptc_e_base = 0x100000000;
181 static uint64_t pmap_ptc_e_count1 = 3;
182 static uint64_t pmap_ptc_e_count2 = 2;
183 static uint64_t pmap_ptc_e_stride1 = 0x2000;
184 static uint64_t pmap_ptc_e_stride2 = 0x100000000;
185 struct mtx pmap_ptcmutex;
188 * Data for the RID allocator
190 static int pmap_ridcount;
191 static int pmap_rididx;
192 static int pmap_ridmapsz;
193 static int pmap_ridmax;
194 static uint64_t *pmap_ridmap;
195 struct mtx pmap_ridmutex;
198 * Data for the pv entry allocation mechanism
200 static uma_zone_t pvzone;
201 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
204 * Data for allocating PTEs for user processes.
206 static uma_zone_t ptezone;
209 * Virtual Hash Page Table (VHPT) data.
211 /* SYSCTL_DECL(_machdep); */
212 SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
214 struct ia64_bucket *pmap_vhpt_bucket;
216 int pmap_vhpt_nbuckets;
217 SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
218 &pmap_vhpt_nbuckets, 0, "");
220 uint64_t pmap_vhpt_base[MAXCPU];
222 int pmap_vhpt_log2size = 0;
223 TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
224 SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
225 &pmap_vhpt_log2size, 0, "");
227 static int pmap_vhpt_inserts;
228 SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
229 &pmap_vhpt_inserts, 0, "");
231 static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
232 SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
233 NULL, 0, pmap_vhpt_population, "I", "");
235 static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
237 static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
238 static pv_entry_t get_pv_entry(pmap_t locked_pmap);
240 static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
241 vm_page_t m, vm_prot_t prot);
242 static void pmap_invalidate_all(pmap_t pmap);
243 static int pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
244 vm_offset_t va, pv_entry_t pv, int freepte);
245 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
249 pmap_steal_memory(vm_size_t size)
254 size = round_page(size);
256 bank_size = phys_avail[1] - phys_avail[0];
257 while (size > bank_size) {
259 for (i = 0; phys_avail[i+2]; i+= 2) {
260 phys_avail[i] = phys_avail[i+2];
261 phys_avail[i+1] = phys_avail[i+3];
266 panic("pmap_steal_memory: out of memory");
267 bank_size = phys_avail[1] - phys_avail[0];
271 phys_avail[0] += size;
273 va = IA64_PHYS_TO_RR7(pa);
274 bzero((caddr_t) va, size);
279 * Bootstrap the system enough to run with virtual memory.
284 struct ia64_pal_result res;
285 struct ia64_lpte *pte;
286 vm_offset_t base, limit;
288 int i, j, count, ridbits;
291 * Query the PAL Code to find the loop parameters for the
294 res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
295 if (res.pal_status != 0)
296 panic("Can't configure ptc.e parameters");
297 pmap_ptc_e_base = res.pal_result[0];
298 pmap_ptc_e_count1 = res.pal_result[1] >> 32;
299 pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
300 pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
301 pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
303 printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
304 "stride1=0x%lx, stride2=0x%lx\n",
310 mtx_init(&pmap_ptcmutex, "Global PTC lock", NULL, MTX_SPIN);
313 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
315 * We currently need at least 19 bits in the RID because PID_MAX
316 * can only be encoded in 17 bits and we need RIDs for 5 regions
317 * per process. With PID_MAX equalling 99999 this means that we
318 * need to be able to encode 499995 (=5*PID_MAX).
319 * The Itanium processor only has 18 bits and the architected
320 * minimum is exactly that. So, we cannot use a PID based scheme
321 * in those cases. Enter pmap_ridmap...
322 * We should avoid the map when running on a processor that has
323 * implemented enough bits. This means that we should pass the
324 * process/thread ID to pmap. This we currently don't do, so we
325 * use the map anyway. However, we don't want to allocate a map
326 * that is large enough to cover the range dictated by the number
327 * of bits in the RID, because that may result in a RID map of
328 * 2MB in size for a 24-bit RID. A 64KB map is enough.
329 * The bottomline: we create a 32KB map when the processor only
330 * implements 18 bits (or when we can't figure it out). Otherwise
331 * we create a 64KB map.
333 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
334 if (res.pal_status != 0) {
336 printf("Can't read VM Summary - assuming 18 Region ID bits\n");
337 ridbits = 18; /* guaranteed minimum */
339 ridbits = (res.pal_result[1] >> 8) & 0xff;
341 printf("Processor supports %d Region ID bits\n",
347 pmap_ridmax = (1 << ridbits);
348 pmap_ridmapsz = pmap_ridmax / 64;
349 pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
350 pmap_ridmap[0] |= 0xff;
353 mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
356 * Allocate some memory for initial kernel 'page tables'.
358 ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
360 kernel_vm_end = VM_MIN_KERNEL_ADDRESS - VM_GATEWAY_SIZE;
362 for (i = 0; phys_avail[i+2]; i+= 2)
367 * Figure out a useful size for the VHPT, based on the size of
368 * physical memory and try to locate a region which is large
369 * enough to contain the VHPT (which must be a power of two in
370 * size and aligned to a natural boundary).
371 * We silently bump up the VHPT size to the minimum size if the
372 * user has set the tunable too small. Likewise, the VHPT size
373 * is silently capped to the maximum allowed.
375 TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
376 if (pmap_vhpt_log2size == 0) {
377 pmap_vhpt_log2size = 15;
378 size = 1UL << pmap_vhpt_log2size;
379 while (size < Maxmem * 32) {
380 pmap_vhpt_log2size++;
383 } else if (pmap_vhpt_log2size < 15)
384 pmap_vhpt_log2size = 15;
385 if (pmap_vhpt_log2size > 61)
386 pmap_vhpt_log2size = 61;
388 pmap_vhpt_base[0] = 0;
390 size = 1UL << pmap_vhpt_log2size;
391 while (pmap_vhpt_base[0] == 0) {
393 printf("Trying VHPT size 0x%lx\n", size);
394 for (i = 0; i < count; i += 2) {
395 base = (phys_avail[i] + size - 1) & ~(size - 1);
396 limit = base + MAXCPU * size;
397 if (limit <= phys_avail[i+1])
399 * VHPT can fit in this region
403 if (!phys_avail[i]) {
404 /* Can't fit, try next smaller size. */
405 pmap_vhpt_log2size--;
408 pmap_vhpt_base[0] = IA64_PHYS_TO_RR7(base);
410 if (pmap_vhpt_log2size < 15)
411 panic("Can't find space for VHPT");
414 printf("Putting VHPT at 0x%lx\n", base);
416 if (base != phys_avail[i]) {
417 /* Split this region. */
419 printf("Splitting [%p-%p]\n", (void *)phys_avail[i],
420 (void *)phys_avail[i+1]);
421 for (j = count; j > i; j -= 2) {
422 phys_avail[j] = phys_avail[j-2];
423 phys_avail[j+1] = phys_avail[j-2+1];
425 phys_avail[i+1] = base;
426 phys_avail[i+2] = limit;
428 phys_avail[i] = limit;
430 pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
432 pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
433 sizeof(struct ia64_bucket));
434 pte = (struct ia64_lpte *)pmap_vhpt_base[0];
435 for (i = 0; i < pmap_vhpt_nbuckets; i++) {
438 pte[i].tag = 1UL << 63; /* Invalid tag */
439 pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
440 /* Stolen memory is zeroed! */
441 mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
442 MTX_NOWITNESS | MTX_SPIN);
445 for (i = 1; i < MAXCPU; i++) {
446 pmap_vhpt_base[i] = pmap_vhpt_base[i - 1] + size;
447 bcopy((void *)pmap_vhpt_base[i - 1], (void *)pmap_vhpt_base[i],
451 map_vhpt(pmap_vhpt_base[0]);
452 ia64_set_pta(pmap_vhpt_base[0] + (1 << 8) +
453 (pmap_vhpt_log2size << 2) + 1);
456 virtual_avail = VM_MIN_KERNEL_ADDRESS;
457 virtual_end = VM_MAX_KERNEL_ADDRESS;
460 * Initialize the kernel pmap (which is statically allocated).
462 PMAP_LOCK_INIT(kernel_pmap);
463 for (i = 0; i < 5; i++)
464 kernel_pmap->pm_rid[i] = 0;
465 kernel_pmap->pm_active = 1;
466 TAILQ_INIT(&kernel_pmap->pm_pvlist);
467 PCPU_SET(current_pmap, kernel_pmap);
470 * Region 5 is mapped via the vhpt.
472 ia64_set_rr(IA64_RR_BASE(5),
473 (5 << 8) | (PAGE_SHIFT << 2) | 1);
476 * Region 6 is direct mapped UC and region 7 is direct mapped
477 * WC. The details of this is controlled by the Alt {I,D}TLB
478 * handlers. Here we just make sure that they have the largest
479 * possible page size to minimise TLB usage.
481 ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2));
482 ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2));
486 * Clear out any random TLB entries left over from booting.
488 pmap_invalidate_all(kernel_pmap);
494 pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
499 for (i = 0; i < pmap_vhpt_nbuckets; i++)
500 count += pmap_vhpt_bucket[i].length;
502 error = SYSCTL_OUT(req, &count, sizeof(count));
507 * Initialize a vm_page's machine-dependent fields.
510 pmap_page_init(vm_page_t m)
513 TAILQ_INIT(&m->md.pv_list);
514 m->md.pv_list_count = 0;
518 * Initialize the pmap module.
519 * Called by vm_init, to initialize any structures that the pmap
520 * system needs to map virtual memory.
525 int shpgperproc = PMAP_SHPGPERPROC;
528 * Initialize the address space (zone) for the pv entries. Set a
529 * high water mark so that the system can recover from excessive
530 * numbers of pv entries.
532 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
533 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
534 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
535 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
536 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
537 pv_entry_high_water = 9 * (pv_entry_max / 10);
539 ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
540 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
544 /***************************************************
545 * Manipulate TLBs for a pmap
546 ***************************************************/
549 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
551 struct ia64_lpte *pte;
554 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
555 ("invalidating TLB for non-current pmap"));
557 vhpt_ofs = ia64_thash(va) - pmap_vhpt_base[PCPU_GET(cpuid)];
559 for (i = 0; i < MAXCPU; i++) {
560 pte = (struct ia64_lpte *)(pmap_vhpt_base[i] + vhpt_ofs);
561 if (pte->tag == ia64_ttag(va))
562 pte->tag = 1UL << 63;
565 mtx_lock_spin(&pmap_ptcmutex);
566 ia64_ptc_ga(va, PAGE_SHIFT << 2);
567 mtx_unlock_spin(&pmap_ptcmutex);
571 pmap_invalidate_all_1(void *arg)
577 addr = pmap_ptc_e_base;
578 for (i = 0; i < pmap_ptc_e_count1; i++) {
579 for (j = 0; j < pmap_ptc_e_count2; j++) {
581 addr += pmap_ptc_e_stride2;
583 addr += pmap_ptc_e_stride1;
589 pmap_invalidate_all(pmap_t pmap)
592 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
593 ("invalidating TLB for non-current pmap"));
597 smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
600 pmap_invalidate_all_1(NULL);
604 pmap_allocate_rid(void)
609 mtx_lock(&pmap_ridmutex);
610 if (pmap_ridcount == pmap_ridmax)
611 panic("pmap_allocate_rid: All Region IDs used");
613 /* Find an index with a free bit. */
614 while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
616 if (pmap_rididx == pmap_ridmapsz)
619 rid = pmap_rididx * 64;
621 /* Find a free bit. */
628 pmap_ridmap[pmap_rididx] |= bit;
630 mtx_unlock(&pmap_ridmutex);
636 pmap_free_rid(uint32_t rid)
642 bit = ~(1UL << (rid & 63));
644 mtx_lock(&pmap_ridmutex);
645 pmap_ridmap[idx] &= bit;
647 mtx_unlock(&pmap_ridmutex);
650 /***************************************************
651 * Page table page management routines.....
652 ***************************************************/
655 pmap_pinit0(struct pmap *pmap)
657 /* kernel_pmap is the same as any other pmap. */
662 * Initialize a preallocated and zeroed pmap structure,
663 * such as one in a vmspace structure.
666 pmap_pinit(struct pmap *pmap)
670 PMAP_LOCK_INIT(pmap);
671 for (i = 0; i < 5; i++)
672 pmap->pm_rid[i] = pmap_allocate_rid();
674 TAILQ_INIT(&pmap->pm_pvlist);
675 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
679 /***************************************************
680 * Pmap allocation/deallocation routines.
681 ***************************************************/
684 * Release any resources held by the given physical map.
685 * Called when a pmap initialized by pmap_pinit is being released.
686 * Should only be called if the map contains no valid mappings.
689 pmap_release(pmap_t pmap)
693 for (i = 0; i < 5; i++)
695 pmap_free_rid(pmap->pm_rid[i]);
696 PMAP_LOCK_DESTROY(pmap);
700 * grow the number of kernel page table entries, if needed
703 pmap_growkernel(vm_offset_t addr)
705 struct ia64_lpte **dir1;
706 struct ia64_lpte *leaf;
709 while (kernel_vm_end <= addr) {
710 if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
711 panic("%s: out of kernel address space", __func__);
713 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
715 nkpg = vm_page_alloc(NULL, nkpt++,
716 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
718 panic("%s: cannot add dir. page", __func__);
720 dir1 = (struct ia64_lpte **)
721 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
722 bzero(dir1, PAGE_SIZE);
723 ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
726 nkpg = vm_page_alloc(NULL, nkpt++,
727 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
729 panic("%s: cannot add PTE page", __func__);
731 leaf = (struct ia64_lpte *)
732 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
733 bzero(leaf, PAGE_SIZE);
734 dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
736 kernel_vm_end += PAGE_SIZE * NKPTEPG;
740 /***************************************************
741 * page management routines.
742 ***************************************************/
745 * free the pv_entry back to the free list
747 static PMAP_INLINE void
748 free_pv_entry(pv_entry_t pv)
751 uma_zfree(pvzone, pv);
755 * get a new pv_entry, allocating a block from the system
759 get_pv_entry(pmap_t locked_pmap)
761 static const struct timeval printinterval = { 60, 0 };
762 static struct timeval lastprint;
763 struct vpgqueues *vpq;
764 struct ia64_lpte *pte;
765 pmap_t oldpmap, pmap;
766 pv_entry_t allocated_pv, next_pv, pv;
770 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
771 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
772 allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
773 if (allocated_pv != NULL) {
775 if (pv_entry_count > pv_entry_high_water)
778 return (allocated_pv);
782 * Reclaim pv entries: At first, destroy mappings to inactive
783 * pages. After that, if a pv entry is still needed, destroy
784 * mappings to active pages.
786 if (ratecheck(&lastprint, &printinterval))
787 printf("Approaching the limit on PV entries, "
788 "increase the vm.pmap.shpgperproc tunable.\n");
789 vpq = &vm_page_queues[PQ_INACTIVE];
791 TAILQ_FOREACH(m, &vpq->pl, pageq) {
792 if (m->hold_count || m->busy)
794 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
797 /* Avoid deadlock and lock recursion. */
798 if (pmap > locked_pmap)
800 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
802 oldpmap = pmap_switch(pmap);
803 pte = pmap_find_vhpt(va);
804 KASSERT(pte != NULL, ("pte"));
805 pmap_remove_pte(pmap, pte, va, pv, 1);
806 pmap_switch(oldpmap);
807 if (pmap != locked_pmap)
809 if (allocated_pv == NULL)
815 if (allocated_pv == NULL) {
816 if (vpq == &vm_page_queues[PQ_INACTIVE]) {
817 vpq = &vm_page_queues[PQ_ACTIVE];
820 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
822 return (allocated_pv);
826 * Conditionally create a pv entry.
829 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
833 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
834 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
835 if (pv_entry_count < pv_entry_high_water &&
836 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
840 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
841 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
842 m->md.pv_list_count++;
849 * Add an ia64_lpte to the VHPT.
852 pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
854 struct ia64_bucket *bckt;
855 struct ia64_lpte *vhpte;
858 /* Can fault, so get it out of the way. */
859 pte_pa = ia64_tpa((vm_offset_t)pte);
861 vhpte = (struct ia64_lpte *)ia64_thash(va);
862 bckt = (struct ia64_bucket *)vhpte->chain;
864 mtx_lock_spin(&bckt->mutex);
865 pte->chain = bckt->chain;
867 bckt->chain = pte_pa;
871 mtx_unlock_spin(&bckt->mutex);
875 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
876 * worked or an appropriate error code otherwise.
879 pmap_remove_vhpt(vm_offset_t va)
881 struct ia64_bucket *bckt;
882 struct ia64_lpte *pte;
883 struct ia64_lpte *lpte;
884 struct ia64_lpte *vhpte;
888 vhpte = (struct ia64_lpte *)ia64_thash(va);
889 bckt = (struct ia64_bucket *)vhpte->chain;
892 mtx_lock_spin(&bckt->mutex);
894 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
895 while (chain != 0 && pte->tag != tag) {
898 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
901 mtx_unlock_spin(&bckt->mutex);
905 /* Snip this pv_entry out of the collision chain. */
907 bckt->chain = pte->chain;
909 lpte->chain = pte->chain;
913 mtx_unlock_spin(&bckt->mutex);
918 * Find the ia64_lpte for the given va, if any.
920 static struct ia64_lpte *
921 pmap_find_vhpt(vm_offset_t va)
923 struct ia64_bucket *bckt;
924 struct ia64_lpte *pte;
928 pte = (struct ia64_lpte *)ia64_thash(va);
929 bckt = (struct ia64_bucket *)pte->chain;
931 mtx_lock_spin(&bckt->mutex);
933 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
934 while (chain != 0 && pte->tag != tag) {
936 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
938 mtx_unlock_spin(&bckt->mutex);
939 return ((chain != 0) ? pte : NULL);
943 * Remove an entry from the list of managed mappings.
946 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
949 if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
950 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
951 if (pmap == pv->pv_pmap && va == pv->pv_va)
955 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
963 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
964 m->md.pv_list_count--;
965 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
966 vm_page_flag_clear(m, PG_WRITEABLE);
968 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
977 * Create a pv entry for page at pa for
981 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
985 pv = get_pv_entry(pmap);
989 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
990 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
991 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
992 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
993 m->md.pv_list_count++;
997 * Routine: pmap_extract
999 * Extract the physical page address associated
1000 * with the given map/virtual_address pair.
1003 pmap_extract(pmap_t pmap, vm_offset_t va)
1005 struct ia64_lpte *pte;
1011 oldpmap = pmap_switch(pmap);
1012 pte = pmap_find_vhpt(va);
1013 if (pte != NULL && pmap_present(pte))
1015 pmap_switch(oldpmap);
1021 * Routine: pmap_extract_and_hold
1023 * Atomically extract and hold the physical page
1024 * with the given pmap and virtual address pair
1025 * if that mapping permits the given protection.
1028 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1030 struct ia64_lpte *pte;
1035 vm_page_lock_queues();
1037 oldpmap = pmap_switch(pmap);
1038 pte = pmap_find_vhpt(va);
1039 if (pte != NULL && pmap_present(pte) &&
1040 (pmap_prot(pte) & prot) == prot) {
1041 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1044 vm_page_unlock_queues();
1045 pmap_switch(oldpmap);
1050 /***************************************************
1051 * Low level mapping routines.....
1052 ***************************************************/
1055 * Find the kernel lpte for mapping the given virtual address, which
1056 * must be in the part of region 5 which we can cover with our kernel
1059 static struct ia64_lpte *
1060 pmap_find_kpte(vm_offset_t va)
1062 struct ia64_lpte **dir1;
1063 struct ia64_lpte *leaf;
1065 KASSERT((va >> 61) == 5,
1066 ("kernel mapping 0x%lx not in region 5", va));
1067 KASSERT(va < kernel_vm_end,
1068 ("kernel mapping 0x%lx out of range", va));
1070 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1071 leaf = dir1[KPTE_DIR1_INDEX(va)];
1072 return (&leaf[KPTE_PTE_INDEX(va)]);
1076 * Find a pte suitable for mapping a user-space address. If one exists
1077 * in the VHPT, that one will be returned, otherwise a new pte is
1080 static struct ia64_lpte *
1081 pmap_find_pte(vm_offset_t va)
1083 struct ia64_lpte *pte;
1085 if (va >= VM_MAXUSER_ADDRESS)
1086 return pmap_find_kpte(va);
1088 pte = pmap_find_vhpt(va);
1090 pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1091 pte->tag = 1UL << 63;
1097 * Free a pte which is now unused. This simply returns it to the zone
1098 * allocator if it is a user mapping. For kernel mappings, clear the
1099 * valid bit to make it clear that the mapping is not currently used.
1102 pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1104 if (va < VM_MAXUSER_ADDRESS)
1105 uma_zfree(ptezone, pte);
1107 pmap_clear_present(pte);
1110 static PMAP_INLINE void
1111 pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1113 static long prot2ar[4] = {
1114 PTE_AR_R, /* VM_PROT_NONE */
1115 PTE_AR_RW, /* VM_PROT_WRITE */
1116 PTE_AR_RX|PTE_ED, /* VM_PROT_EXECUTE */
1117 PTE_AR_RWX|PTE_ED /* VM_PROT_WRITE|VM_PROT_EXECUTE */
1120 pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1121 pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1122 pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1123 ? PTE_PL_KERN : PTE_PL_USER;
1124 pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1128 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1129 * the pte was orginally valid, then its assumed to already be in the
1131 * This functions does not set the protection bits. It's expected
1132 * that those have been set correctly prior to calling this function.
1135 pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1136 boolean_t wired, boolean_t managed)
1139 pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED;
1140 pte->pte |= PTE_PRESENT | PTE_MA_WB;
1141 pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1142 pte->pte |= (wired) ? PTE_WIRED : 0;
1143 pte->pte |= pa & PTE_PPN_MASK;
1145 pte->itir = PAGE_SHIFT << 2;
1147 pte->tag = ia64_ttag(va);
1151 * Remove the (possibly managed) mapping represented by pte from the
1155 pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1156 pv_entry_t pv, int freepte)
1161 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1162 ("removing pte for non-current pmap"));
1165 * First remove from the VHPT.
1167 error = pmap_remove_vhpt(va);
1171 pmap_invalidate_page(pmap, va);
1173 if (pmap_wired(pte))
1174 pmap->pm_stats.wired_count -= 1;
1176 pmap->pm_stats.resident_count -= 1;
1177 if (pmap_managed(pte)) {
1178 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1179 if (pmap_dirty(pte))
1181 if (pmap_accessed(pte))
1182 vm_page_flag_set(m, PG_REFERENCED);
1184 error = pmap_remove_entry(pmap, m, va, pv);
1187 pmap_free_pte(pte, va);
1193 * Extract the physical page address associated with a kernel
1197 pmap_kextract(vm_offset_t va)
1199 struct ia64_lpte *pte;
1202 KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1204 /* Regions 6 and 7 are direct mapped. */
1205 if (va >= IA64_RR_BASE(6))
1206 return (IA64_RR_MASK(va));
1208 /* EPC gateway page? */
1209 gwpage = (vm_offset_t)ia64_get_k5();
1210 if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1211 return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1213 /* Bail out if the virtual address is beyond our limits. */
1214 if (va >= kernel_vm_end)
1217 pte = pmap_find_kpte(va);
1218 if (!pmap_present(pte))
1220 return (pmap_ppn(pte) | (va & PAGE_MASK));
1224 * Add a list of wired pages to the kva this routine is only used for
1225 * temporary kernel mappings that do not need to have page modification
1226 * or references recorded. Note that old mappings are simply written
1227 * over. The page is effectively wired, but it's customary to not have
1228 * the PTE reflect that, nor update statistics.
1231 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1233 struct ia64_lpte *pte;
1236 for (i = 0; i < count; i++) {
1237 pte = pmap_find_kpte(va);
1238 if (pmap_present(pte))
1239 pmap_invalidate_page(kernel_pmap, va);
1241 pmap_enter_vhpt(pte, va);
1242 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1243 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1249 * this routine jerks page mappings from the
1250 * kernel -- it is meant only for temporary mappings.
1253 pmap_qremove(vm_offset_t va, int count)
1255 struct ia64_lpte *pte;
1258 for (i = 0; i < count; i++) {
1259 pte = pmap_find_kpte(va);
1260 if (pmap_present(pte)) {
1261 pmap_remove_vhpt(va);
1262 pmap_invalidate_page(kernel_pmap, va);
1263 pmap_clear_present(pte);
1270 * Add a wired page to the kva. As for pmap_qenter(), it's customary
1271 * to not have the PTE reflect that, nor update statistics.
1274 pmap_kenter(vm_offset_t va, vm_offset_t pa)
1276 struct ia64_lpte *pte;
1278 pte = pmap_find_kpte(va);
1279 if (pmap_present(pte))
1280 pmap_invalidate_page(kernel_pmap, va);
1282 pmap_enter_vhpt(pte, va);
1283 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1284 pmap_set_pte(pte, va, pa, FALSE, FALSE);
1288 * Remove a page from the kva
1291 pmap_kremove(vm_offset_t va)
1293 struct ia64_lpte *pte;
1295 pte = pmap_find_kpte(va);
1296 if (pmap_present(pte)) {
1297 pmap_remove_vhpt(va);
1298 pmap_invalidate_page(kernel_pmap, va);
1299 pmap_clear_present(pte);
1304 * Used to map a range of physical addresses into kernel
1305 * virtual address space.
1307 * The value passed in '*virt' is a suggested virtual address for
1308 * the mapping. Architectures which can support a direct-mapped
1309 * physical to virtual region can return the appropriate address
1310 * within that region, leaving '*virt' unchanged. Other
1311 * architectures should map the pages starting at '*virt' and
1312 * update '*virt' with the first usable address after the mapped
1316 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1318 return IA64_PHYS_TO_RR7(start);
1322 * Remove a single page from a process address space
1325 pmap_remove_page(pmap_t pmap, vm_offset_t va)
1327 struct ia64_lpte *pte;
1329 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1330 ("removing page for non-current pmap"));
1332 pte = pmap_find_vhpt(va);
1334 pmap_remove_pte(pmap, pte, va, 0, 1);
1339 * Remove the given range of addresses from the specified map.
1341 * It is assumed that the start and end are properly
1342 * rounded to the page size.
1345 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1350 struct ia64_lpte *pte;
1352 if (pmap->pm_stats.resident_count == 0)
1355 vm_page_lock_queues();
1357 oldpmap = pmap_switch(pmap);
1360 * special handling of removing one page. a very
1361 * common operation and easy to short circuit some
1364 if (sva + PAGE_SIZE == eva) {
1365 pmap_remove_page(pmap, sva);
1369 if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1370 TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1372 if (va >= sva && va < eva) {
1373 pte = pmap_find_vhpt(va);
1374 KASSERT(pte != NULL, ("pte"));
1375 pmap_remove_pte(pmap, pte, va, pv, 1);
1379 for (va = sva; va < eva; va += PAGE_SIZE) {
1380 pte = pmap_find_vhpt(va);
1382 pmap_remove_pte(pmap, pte, va, 0, 1);
1387 vm_page_unlock_queues();
1388 pmap_switch(oldpmap);
1393 * Routine: pmap_remove_all
1395 * Removes this physical page from
1396 * all physical maps in which it resides.
1397 * Reflects back modify bits to the pager.
1400 * Original versions of this routine were very
1401 * inefficient because they iteratively called
1402 * pmap_remove (slow...)
1406 pmap_remove_all(vm_page_t m)
1411 #if defined(DIAGNOSTIC)
1413 * XXX This makes pmap_remove_all() illegal for non-managed pages!
1415 if (m->flags & PG_FICTITIOUS) {
1416 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1419 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1420 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1421 struct ia64_lpte *pte;
1422 pmap_t pmap = pv->pv_pmap;
1423 vm_offset_t va = pv->pv_va;
1426 oldpmap = pmap_switch(pmap);
1427 pte = pmap_find_vhpt(va);
1428 KASSERT(pte != NULL, ("pte"));
1429 if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1430 panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1431 pmap_remove_pte(pmap, pte, va, pv, 1);
1432 pmap_switch(oldpmap);
1435 vm_page_flag_clear(m, PG_WRITEABLE);
1439 * Set the physical protection on the
1440 * specified range of this map as requested.
1443 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1446 struct ia64_lpte *pte;
1448 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1449 pmap_remove(pmap, sva, eva);
1453 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1454 (VM_PROT_WRITE|VM_PROT_EXECUTE))
1457 if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1458 panic("pmap_protect: unaligned addresses");
1460 vm_page_lock_queues();
1462 oldpmap = pmap_switch(pmap);
1463 for ( ; sva < eva; sva += PAGE_SIZE) {
1464 /* If page is invalid, skip this page */
1465 pte = pmap_find_vhpt(sva);
1469 /* If there's no change, skip it too */
1470 if (pmap_prot(pte) == prot)
1473 if (pmap_managed(pte)) {
1474 vm_offset_t pa = pmap_ppn(pte);
1475 vm_page_t m = PHYS_TO_VM_PAGE(pa);
1477 if (pmap_dirty(pte)) {
1479 pmap_clear_dirty(pte);
1482 if (pmap_accessed(pte)) {
1483 vm_page_flag_set(m, PG_REFERENCED);
1484 pmap_clear_accessed(pte);
1488 if (prot & VM_PROT_EXECUTE)
1489 ia64_invalidate_icache(sva, PAGE_SIZE);
1491 pmap_pte_prot(pmap, pte, prot);
1492 pmap_invalidate_page(pmap, sva);
1494 vm_page_unlock_queues();
1495 pmap_switch(oldpmap);
1500 * Insert the given physical page (p) at
1501 * the specified virtual address (v) in the
1502 * target physical map with the protection requested.
1504 * If specified, the page will be wired down, meaning
1505 * that the related pte can not be reclaimed.
1507 * NB: This is the only routine which MAY NOT lazy-evaluate
1508 * or lose information. That is, this routine must actually
1509 * insert this page into the given map NOW.
1512 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1513 vm_prot_t prot, boolean_t wired)
1518 struct ia64_lpte origpte;
1519 struct ia64_lpte *pte;
1520 boolean_t icache_inval, managed;
1522 vm_page_lock_queues();
1524 oldpmap = pmap_switch(pmap);
1528 if (va > VM_MAX_KERNEL_ADDRESS)
1529 panic("pmap_enter: toobig");
1533 * Find (or create) a pte for the given mapping.
1535 while ((pte = pmap_find_pte(va)) == NULL) {
1536 pmap_switch(oldpmap);
1538 vm_page_unlock_queues();
1540 vm_page_lock_queues();
1542 oldpmap = pmap_switch(pmap);
1545 if (!pmap_present(pte)) {
1547 pmap_enter_vhpt(pte, va);
1549 opa = pmap_ppn(pte);
1551 pa = VM_PAGE_TO_PHYS(m);
1553 icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1556 * Mapping has not changed, must be protection or wiring change.
1560 * Wiring change, just update stats. We don't worry about
1561 * wiring PT pages as they remain resident as long as there
1562 * are valid mappings in them. Hence, if a user page is wired,
1563 * the PT page will be also.
1565 if (wired && !pmap_wired(&origpte))
1566 pmap->pm_stats.wired_count++;
1567 else if (!wired && pmap_wired(&origpte))
1568 pmap->pm_stats.wired_count--;
1570 managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1573 * We might be turning off write access to the page,
1574 * so we go ahead and sense modify status. Otherwise,
1575 * we can avoid I-cache invalidation if the page
1576 * already allowed execution.
1578 if (managed && pmap_dirty(&origpte))
1580 else if (pmap_exec(&origpte))
1581 icache_inval = FALSE;
1583 pmap_invalidate_page(pmap, va);
1588 * Mapping has changed, invalidate old range and fall
1589 * through to handle validating new mapping.
1592 pmap_remove_pte(pmap, pte, va, 0, 0);
1593 pmap_enter_vhpt(pte, va);
1597 * Enter on the PV list if part of our managed memory.
1599 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1600 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1601 ("pmap_enter: managed mapping within the clean submap"));
1602 pmap_insert_entry(pmap, va, m);
1607 * Increment counters
1609 pmap->pm_stats.resident_count++;
1611 pmap->pm_stats.wired_count++;
1616 * Now validate mapping with desired protection/wiring. This
1617 * adds the pte to the VHPT if necessary.
1619 pmap_pte_prot(pmap, pte, prot);
1620 pmap_set_pte(pte, va, pa, wired, managed);
1622 /* Invalidate the I-cache when needed. */
1624 ia64_invalidate_icache(va, PAGE_SIZE);
1626 if ((prot & VM_PROT_WRITE) != 0)
1627 vm_page_flag_set(m, PG_WRITEABLE);
1628 vm_page_unlock_queues();
1629 pmap_switch(oldpmap);
1634 * Maps a sequence of resident pages belonging to the same object.
1635 * The sequence begins with the given page m_start. This page is
1636 * mapped at the given virtual address start. Each subsequent page is
1637 * mapped at a virtual address that is offset from start by the same
1638 * amount as the page is offset from m_start within the object. The
1639 * last page in the sequence is the page with the largest offset from
1640 * m_start that can be mapped at a virtual address less than the given
1641 * virtual address end. Not every virtual page between start and end
1642 * is mapped; only those for which a resident page exists with the
1643 * corresponding offset from m_start are mapped.
1646 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1647 vm_page_t m_start, vm_prot_t prot)
1651 vm_pindex_t diff, psize;
1653 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1654 psize = atop(end - start);
1657 oldpmap = pmap_switch(pmap);
1658 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1659 pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1660 m = TAILQ_NEXT(m, listq);
1662 pmap_switch(oldpmap);
1667 * this code makes some *MAJOR* assumptions:
1668 * 1. Current pmap & pmap exists.
1671 * 4. No page table pages.
1672 * but is *MUCH* faster than pmap_enter...
1676 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1681 oldpmap = pmap_switch(pmap);
1682 pmap_enter_quick_locked(pmap, va, m, prot);
1683 pmap_switch(oldpmap);
1688 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1691 struct ia64_lpte *pte;
1694 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1695 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1696 ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1697 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1698 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1700 if ((pte = pmap_find_pte(va)) == NULL)
1703 if (!pmap_present(pte)) {
1704 /* Enter on the PV list if the page is managed. */
1705 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1706 if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1707 pmap_free_pte(pte, va);
1714 /* Increment counters. */
1715 pmap->pm_stats.resident_count++;
1717 /* Initialise with R/O protection and enter into VHPT. */
1718 pmap_enter_vhpt(pte, va);
1719 pmap_pte_prot(pmap, pte,
1720 prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1721 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1726 * pmap_object_init_pt preloads the ptes for a given object
1727 * into the specified pmap. This eliminates the blast of soft
1728 * faults on process startup and immediately after an mmap.
1731 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1732 vm_object_t object, vm_pindex_t pindex,
1736 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1737 KASSERT(object->type == OBJT_DEVICE,
1738 ("pmap_object_init_pt: non-device object"));
1742 * Routine: pmap_change_wiring
1743 * Function: Change the wiring attribute for a map/virtual-address
1745 * In/out conditions:
1746 * The mapping must already exist in the pmap.
1749 pmap_change_wiring(pmap, va, wired)
1750 register pmap_t pmap;
1755 struct ia64_lpte *pte;
1758 oldpmap = pmap_switch(pmap);
1760 pte = pmap_find_vhpt(va);
1761 KASSERT(pte != NULL, ("pte"));
1762 if (wired && !pmap_wired(pte)) {
1763 pmap->pm_stats.wired_count++;
1764 pmap_set_wired(pte);
1765 } else if (!wired && pmap_wired(pte)) {
1766 pmap->pm_stats.wired_count--;
1767 pmap_clear_wired(pte);
1770 pmap_switch(oldpmap);
1777 * Copy the range specified by src_addr/len
1778 * from the source map to the range dst_addr/len
1779 * in the destination map.
1781 * This routine is only advisory and need not do anything.
1785 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1786 vm_offset_t src_addr)
1792 * pmap_zero_page zeros the specified hardware page by
1793 * mapping it into virtual memory and using bzero to clear
1798 pmap_zero_page(vm_page_t m)
1800 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1801 bzero((caddr_t) va, PAGE_SIZE);
1806 * pmap_zero_page_area zeros the specified hardware page by
1807 * mapping it into virtual memory and using bzero to clear
1810 * off and size must reside within a single page.
1814 pmap_zero_page_area(vm_page_t m, int off, int size)
1816 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1817 bzero((char *)(caddr_t)va + off, size);
1822 * pmap_zero_page_idle zeros the specified hardware page by
1823 * mapping it into virtual memory and using bzero to clear
1824 * its contents. This is for the vm_idlezero process.
1828 pmap_zero_page_idle(vm_page_t m)
1830 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1831 bzero((caddr_t) va, PAGE_SIZE);
1836 * pmap_copy_page copies the specified (machine independent)
1837 * page by mapping the page into virtual memory and using
1838 * bcopy to copy the page, one machine dependent page at a
1842 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1844 vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1845 vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1846 bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1850 * Returns true if the pmap's pv is one of the first
1851 * 16 pvs linked to from this page. This count may
1852 * be changed upwards or downwards in the future; it
1853 * is only necessary that true be returned for a small
1854 * subset of pmaps for proper page aging.
1857 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1862 if (m->flags & PG_FICTITIOUS)
1866 * Not found, check current mappings returning immediately if found.
1868 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1869 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1870 if (pv->pv_pmap == pmap) {
1881 * pmap_page_wired_mappings:
1883 * Return the number of managed mappings to the given physical page
1887 pmap_page_wired_mappings(vm_page_t m)
1889 struct ia64_lpte *pte;
1890 pmap_t oldpmap, pmap;
1895 if ((m->flags & PG_FICTITIOUS) != 0)
1897 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1898 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1901 oldpmap = pmap_switch(pmap);
1902 pte = pmap_find_vhpt(pv->pv_va);
1903 KASSERT(pte != NULL, ("pte"));
1904 if (pmap_wired(pte))
1906 pmap_switch(oldpmap);
1913 * Remove all pages from specified address space
1914 * this aids process exit speeds. Also, this code
1915 * is special cased for current process only, but
1916 * can have the more generic (and slightly slower)
1917 * mode enabled. This is much faster than pmap_remove
1918 * in the case of running down an entire address space.
1921 pmap_remove_pages(pmap_t pmap)
1926 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1927 printf("warning: pmap_remove_pages called with non-current pmap\n");
1931 vm_page_lock_queues();
1933 oldpmap = pmap_switch(pmap);
1935 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1936 struct ia64_lpte *pte;
1938 npv = TAILQ_NEXT(pv, pv_plist);
1940 pte = pmap_find_vhpt(pv->pv_va);
1941 KASSERT(pte != NULL, ("pte"));
1942 if (!pmap_wired(pte))
1943 pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1946 pmap_switch(oldpmap);
1948 vm_page_unlock_queues();
1952 * pmap_ts_referenced:
1954 * Return a count of reference bits for a page, clearing those bits.
1955 * It is not necessary for every reference bit to be cleared, but it
1956 * is necessary that 0 only be returned when there are truly no
1957 * reference bits set.
1959 * XXX: The exact number of bits to check and clear is a matter that
1960 * should be tested and standardized at some point in the future for
1961 * optimal aging of shared pages.
1964 pmap_ts_referenced(vm_page_t m)
1966 struct ia64_lpte *pte;
1971 if (m->flags & PG_FICTITIOUS)
1974 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1975 PMAP_LOCK(pv->pv_pmap);
1976 oldpmap = pmap_switch(pv->pv_pmap);
1977 pte = pmap_find_vhpt(pv->pv_va);
1978 KASSERT(pte != NULL, ("pte"));
1979 if (pmap_accessed(pte)) {
1981 pmap_clear_accessed(pte);
1982 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
1984 pmap_switch(oldpmap);
1985 PMAP_UNLOCK(pv->pv_pmap);
1994 * Return whether or not the specified physical page was modified
1995 * in any physical maps.
1998 pmap_is_modified(vm_page_t m)
2000 struct ia64_lpte *pte;
2006 if (m->flags & PG_FICTITIOUS)
2009 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2010 PMAP_LOCK(pv->pv_pmap);
2011 oldpmap = pmap_switch(pv->pv_pmap);
2012 pte = pmap_find_vhpt(pv->pv_va);
2013 pmap_switch(oldpmap);
2014 KASSERT(pte != NULL, ("pte"));
2015 rv = pmap_dirty(pte) ? TRUE : FALSE;
2016 PMAP_UNLOCK(pv->pv_pmap);
2025 * pmap_is_prefaultable:
2027 * Return whether or not the specified virtual address is elgible
2031 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2033 struct ia64_lpte *pte;
2035 pte = pmap_find_vhpt(addr);
2036 if (pte != NULL && pmap_present(pte))
2042 * Clear the modify bits on the specified physical page.
2045 pmap_clear_modify(vm_page_t m)
2047 struct ia64_lpte *pte;
2051 if (m->flags & PG_FICTITIOUS)
2054 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2055 PMAP_LOCK(pv->pv_pmap);
2056 oldpmap = pmap_switch(pv->pv_pmap);
2057 pte = pmap_find_vhpt(pv->pv_va);
2058 KASSERT(pte != NULL, ("pte"));
2059 if (pmap_dirty(pte)) {
2060 pmap_clear_dirty(pte);
2061 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2063 pmap_switch(oldpmap);
2064 PMAP_UNLOCK(pv->pv_pmap);
2069 * pmap_clear_reference:
2071 * Clear the reference bit on the specified physical page.
2074 pmap_clear_reference(vm_page_t m)
2076 struct ia64_lpte *pte;
2080 if (m->flags & PG_FICTITIOUS)
2083 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2084 PMAP_LOCK(pv->pv_pmap);
2085 oldpmap = pmap_switch(pv->pv_pmap);
2086 pte = pmap_find_vhpt(pv->pv_va);
2087 KASSERT(pte != NULL, ("pte"));
2088 if (pmap_accessed(pte)) {
2089 pmap_clear_accessed(pte);
2090 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2092 pmap_switch(oldpmap);
2093 PMAP_UNLOCK(pv->pv_pmap);
2098 * Clear the write and modified bits in each of the given page's mappings.
2101 pmap_remove_write(vm_page_t m)
2103 struct ia64_lpte *pte;
2104 pmap_t oldpmap, pmap;
2108 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2109 if ((m->flags & PG_FICTITIOUS) != 0 ||
2110 (m->flags & PG_WRITEABLE) == 0)
2112 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2115 oldpmap = pmap_switch(pmap);
2116 pte = pmap_find_vhpt(pv->pv_va);
2117 KASSERT(pte != NULL, ("pte"));
2118 prot = pmap_prot(pte);
2119 if ((prot & VM_PROT_WRITE) != 0) {
2120 if (pmap_dirty(pte)) {
2122 pmap_clear_dirty(pte);
2124 prot &= ~VM_PROT_WRITE;
2125 pmap_pte_prot(pmap, pte, prot);
2126 pmap_invalidate_page(pmap, pv->pv_va);
2128 pmap_switch(oldpmap);
2131 vm_page_flag_clear(m, PG_WRITEABLE);
2135 * Map a set of physical memory pages into the kernel virtual
2136 * address space. Return a pointer to where it is mapped. This
2137 * routine is intended to be used for mapping device memory,
2141 pmap_mapdev(vm_offset_t pa, vm_size_t size)
2143 return (void*) IA64_PHYS_TO_RR6(pa);
2147 * 'Unmap' a range mapped by pmap_mapdev().
2150 pmap_unmapdev(vm_offset_t va, vm_size_t size)
2156 * perform the pmap work for mincore
2159 pmap_mincore(pmap_t pmap, vm_offset_t addr)
2162 struct ia64_lpte *pte, tpte;
2166 oldpmap = pmap_switch(pmap);
2167 pte = pmap_find_vhpt(addr);
2172 pmap_switch(oldpmap);
2178 if (pmap_present(pte)) {
2182 val = MINCORE_INCORE;
2183 if (!pmap_managed(pte))
2188 m = PHYS_TO_VM_PAGE(pa);
2193 if (pmap_dirty(pte))
2194 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2197 * Modified by someone
2199 vm_page_lock_queues();
2200 if (pmap_is_modified(m))
2201 val |= MINCORE_MODIFIED_OTHER;
2202 vm_page_unlock_queues();
2207 if (pmap_accessed(pte))
2208 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2211 * Referenced by someone
2213 vm_page_lock_queues();
2214 if (pmap_ts_referenced(m)) {
2215 val |= MINCORE_REFERENCED_OTHER;
2216 vm_page_flag_set(m, PG_REFERENCED);
2218 vm_page_unlock_queues();
2225 pmap_activate(struct thread *td)
2227 pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2231 pmap_switch(pmap_t pm)
2237 prevpm = PCPU_GET(current_pmap);
2241 atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2243 for (i = 0; i < 5; i++) {
2244 ia64_set_rr(IA64_RR_BASE(i),
2245 (i << 8)|(PAGE_SHIFT << 2)|1);
2248 for (i = 0; i < 5; i++) {
2249 ia64_set_rr(IA64_RR_BASE(i),
2250 (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2252 atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2254 PCPU_SET(current_pmap, pm);
2263 * Increase the starting virtual address of the given mapping if a
2264 * different alignment might result in more superpage mappings.
2267 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2268 vm_offset_t *addr, vm_size_t size)
2272 #include "opt_ddb.h"
2276 #include <ddb/ddb.h>
2278 static const char* psnames[] = {
2279 "1B", "2B", "4B", "8B",
2280 "16B", "32B", "64B", "128B",
2281 "256B", "512B", "1K", "2K",
2282 "4K", "8K", "16K", "32K",
2283 "64K", "128K", "256K", "512K",
2284 "1M", "2M", "4M", "8M",
2285 "16M", "32M", "64M", "128M",
2286 "256M", "512M", "1G", "2G"
2292 struct ia64_pal_result res;
2300 static const char *manames[] = {
2301 "WB", "bad", "bad", "bad",
2302 "UC", "UCE", "WC", "NaT",
2305 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2306 if (res.pal_status != 0) {
2307 db_printf("Can't get VM summary\n");
2312 maxtr = (res.pal_result[0] >> 40) & 0xff;
2314 maxtr = (res.pal_result[0] >> 32) & 0xff;
2316 db_printf("V RID Virtual Page Physical Page PgSz ED AR PL D A MA P KEY\n");
2317 for (i = 0; i <= maxtr; i++) {
2318 bzero(&buf, sizeof(buf));
2319 res = ia64_call_pal_stacked_physical
2320 (PAL_VM_TR_READ, i, type, ia64_tpa((uint64_t) &buf));
2321 if (!(res.pal_result[0] & 1))
2322 buf.pte &= ~PTE_AR_MASK;
2323 if (!(res.pal_result[0] & 2))
2324 buf.pte &= ~PTE_PL_MASK;
2325 if (!(res.pal_result[0] & 4))
2326 pmap_clear_dirty(&buf);
2327 if (!(res.pal_result[0] & 8))
2328 buf.pte &= ~PTE_MA_MASK;
2329 db_printf("%d %06x %013lx %013lx %4s %d %d %d %d %d %-3s "
2330 "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2331 buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2332 psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2333 (buf.pte & PTE_ED) ? 1 : 0,
2334 (int)(buf.pte & PTE_AR_MASK) >> 9,
2335 (int)(buf.pte & PTE_PL_MASK) >> 7,
2336 (pmap_dirty(&buf)) ? 1 : 0,
2337 (pmap_accessed(&buf)) ? 1 : 0,
2338 manames[(buf.pte & PTE_MA_MASK) >> 2],
2339 (pmap_present(&buf)) ? 1 : 0,
2340 (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2344 DB_COMMAND(itr, db_itr)
2349 DB_COMMAND(dtr, db_dtr)
2354 DB_COMMAND(rr, db_rr)
2360 printf("RR RID PgSz VE\n");
2361 for (i = 0; i < 8; i++) {
2362 __asm __volatile ("mov %0=rr[%1]"
2364 : "r"(IA64_RR_BASE(i)));
2365 *(uint64_t *) &rr = t;
2366 printf("%d %06x %4s %d\n",
2367 i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2371 DB_COMMAND(thash, db_thash)
2376 db_printf("%p\n", (void *) ia64_thash(addr));
2379 DB_COMMAND(ttag, db_ttag)
2384 db_printf("0x%lx\n", ia64_ttag(addr));
2387 DB_COMMAND(kpte, db_kpte)
2389 struct ia64_lpte *pte;
2392 db_printf("usage: kpte <kva>\n");
2395 if (addr < VM_MIN_KERNEL_ADDRESS) {
2396 db_printf("kpte: error: invalid <kva>\n");
2399 pte = pmap_find_kpte(addr);
2400 db_printf("kpte at %p:\n", pte);
2401 db_printf(" pte =%016lx\n", pte->pte);
2402 db_printf(" itir =%016lx\n", pte->itir);
2403 db_printf(" tag =%016lx\n", pte->tag);
2404 db_printf(" chain=%016lx\n", pte->chain);