2 * Copyright (c) 1991 Regents of the University of California.
4 * Copyright (c) 1994 John S. Dyson
6 * Copyright (c) 1994 David Greenman
8 * Copyright (c) 1998,2000 Doug Rabson
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 * must display the following acknowledgement:
25 * This product includes software developed by the University of
26 * California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
44 * from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 * with some ideas from NetBSD's alpha pmap
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
51 #include <sys/param.h>
52 #include <sys/kernel.h>
55 #include <sys/mutex.h>
58 #include <sys/sysctl.h>
59 #include <sys/systm.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_map.h>
64 #include <vm/vm_object.h>
65 #include <vm/vm_pageout.h>
68 #include <machine/md_var.h>
69 #include <machine/pal.h>
72 * Manages physical address maps.
74 * In addition to hardware address maps, this
75 * module is called upon to provide software-use-only
76 * maps which may or may not be stored in the same
77 * form as hardware maps. These pseudo-maps are
78 * used to store intermediate results from copy
79 * operations to and from address spaces.
81 * Since the information managed by this module is
82 * also stored by the logical address mapping module,
83 * this module may throw away valid virtual-to-physical
84 * mappings at almost any time. However, invalidations
85 * of virtual-to-physical mappings must be done as
88 * In order to cope with hardware architectures which
89 * make virtual-to-physical map invalidates expensive,
90 * this module may delay invalidate or reduced protection
91 * operations until such time as they are actually
92 * necessary. This module is given full information as
93 * to which processors are currently using which maps,
94 * and to when physical maps must be made correct.
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
106 * User virtually mapped
109 * Kernel virtually mapped
112 * Kernel physically mapped uncacheable
115 * Kernel physically mapped cacheable
118 /* XXX move to a header. */
119 extern uint64_t ia64_gateway_page[];
121 #ifndef PMAP_SHPGPERPROC
122 #define PMAP_SHPGPERPROC 200
125 #if !defined(DIAGNOSTIC)
126 #define PMAP_INLINE __inline
131 #define pmap_accessed(lpte) ((lpte)->pte & PTE_ACCESSED)
132 #define pmap_dirty(lpte) ((lpte)->pte & PTE_DIRTY)
133 #define pmap_exec(lpte) ((lpte)->pte & PTE_AR_RX)
134 #define pmap_managed(lpte) ((lpte)->pte & PTE_MANAGED)
135 #define pmap_ppn(lpte) ((lpte)->pte & PTE_PPN_MASK)
136 #define pmap_present(lpte) ((lpte)->pte & PTE_PRESENT)
137 #define pmap_prot(lpte) (((lpte)->pte & PTE_PROT_MASK) >> 56)
138 #define pmap_wired(lpte) ((lpte)->pte & PTE_WIRED)
140 #define pmap_clear_accessed(lpte) (lpte)->pte &= ~PTE_ACCESSED
141 #define pmap_clear_dirty(lpte) (lpte)->pte &= ~PTE_DIRTY
142 #define pmap_clear_present(lpte) (lpte)->pte &= ~PTE_PRESENT
143 #define pmap_clear_wired(lpte) (lpte)->pte &= ~PTE_WIRED
145 #define pmap_set_wired(lpte) (lpte)->pte |= PTE_WIRED
148 * The VHPT bucket head structure.
157 * Statically allocated kernel pmap
159 struct pmap kernel_pmap_store;
161 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
162 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
165 * Kernel virtual memory management.
168 struct ia64_lpte ***ia64_kptdir;
169 #define KPTE_DIR0_INDEX(va) \
170 (((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
171 #define KPTE_DIR1_INDEX(va) \
172 (((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
173 #define KPTE_PTE_INDEX(va) \
174 (((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
175 #define NKPTEPG (PAGE_SIZE / sizeof(struct ia64_lpte))
177 vm_offset_t kernel_vm_end;
179 /* Values for ptc.e. XXX values for SKI. */
180 static uint64_t pmap_ptc_e_base = 0x100000000;
181 static uint64_t pmap_ptc_e_count1 = 3;
182 static uint64_t pmap_ptc_e_count2 = 2;
183 static uint64_t pmap_ptc_e_stride1 = 0x2000;
184 static uint64_t pmap_ptc_e_stride2 = 0x100000000;
185 struct mtx pmap_ptcmutex;
188 * Data for the RID allocator
190 static int pmap_ridcount;
191 static int pmap_rididx;
192 static int pmap_ridmapsz;
193 static int pmap_ridmax;
194 static uint64_t *pmap_ridmap;
195 struct mtx pmap_ridmutex;
198 * Data for the pv entry allocation mechanism
200 static uma_zone_t pvzone;
201 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
204 * Data for allocating PTEs for user processes.
206 static uma_zone_t ptezone;
209 * Virtual Hash Page Table (VHPT) data.
211 /* SYSCTL_DECL(_machdep); */
212 SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
214 struct ia64_bucket *pmap_vhpt_bucket;
216 int pmap_vhpt_nbuckets;
217 SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
218 &pmap_vhpt_nbuckets, 0, "");
220 uint64_t pmap_vhpt_base[MAXCPU];
222 int pmap_vhpt_log2size = 0;
223 TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
224 SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
225 &pmap_vhpt_log2size, 0, "");
227 static int pmap_vhpt_inserts;
228 SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
229 &pmap_vhpt_inserts, 0, "");
231 static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
232 SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
233 NULL, 0, pmap_vhpt_population, "I", "");
235 static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
237 static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
238 static pv_entry_t get_pv_entry(pmap_t locked_pmap);
240 static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
241 vm_page_t m, vm_prot_t prot);
242 static void pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
243 static void pmap_invalidate_all(pmap_t pmap);
244 static int pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
245 vm_offset_t va, pv_entry_t pv, int freepte);
246 static int pmap_remove_vhpt(vm_offset_t va);
247 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
251 pmap_steal_memory(vm_size_t size)
256 size = round_page(size);
258 bank_size = phys_avail[1] - phys_avail[0];
259 while (size > bank_size) {
261 for (i = 0; phys_avail[i+2]; i+= 2) {
262 phys_avail[i] = phys_avail[i+2];
263 phys_avail[i+1] = phys_avail[i+3];
268 panic("pmap_steal_memory: out of memory");
269 bank_size = phys_avail[1] - phys_avail[0];
273 phys_avail[0] += size;
275 va = IA64_PHYS_TO_RR7(pa);
276 bzero((caddr_t) va, size);
281 * Bootstrap the system enough to run with virtual memory.
286 struct ia64_pal_result res;
287 struct ia64_lpte *pte;
288 vm_offset_t base, limit;
290 int i, j, count, ridbits;
293 * Query the PAL Code to find the loop parameters for the
296 res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
297 if (res.pal_status != 0)
298 panic("Can't configure ptc.e parameters");
299 pmap_ptc_e_base = res.pal_result[0];
300 pmap_ptc_e_count1 = res.pal_result[1] >> 32;
301 pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
302 pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
303 pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
305 printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
306 "stride1=0x%lx, stride2=0x%lx\n",
312 mtx_init(&pmap_ptcmutex, "Global PTC lock", NULL, MTX_SPIN);
315 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
317 * We currently need at least 19 bits in the RID because PID_MAX
318 * can only be encoded in 17 bits and we need RIDs for 5 regions
319 * per process. With PID_MAX equalling 99999 this means that we
320 * need to be able to encode 499995 (=5*PID_MAX).
321 * The Itanium processor only has 18 bits and the architected
322 * minimum is exactly that. So, we cannot use a PID based scheme
323 * in those cases. Enter pmap_ridmap...
324 * We should avoid the map when running on a processor that has
325 * implemented enough bits. This means that we should pass the
326 * process/thread ID to pmap. This we currently don't do, so we
327 * use the map anyway. However, we don't want to allocate a map
328 * that is large enough to cover the range dictated by the number
329 * of bits in the RID, because that may result in a RID map of
330 * 2MB in size for a 24-bit RID. A 64KB map is enough.
331 * The bottomline: we create a 32KB map when the processor only
332 * implements 18 bits (or when we can't figure it out). Otherwise
333 * we create a 64KB map.
335 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
336 if (res.pal_status != 0) {
338 printf("Can't read VM Summary - assuming 18 Region ID bits\n");
339 ridbits = 18; /* guaranteed minimum */
341 ridbits = (res.pal_result[1] >> 8) & 0xff;
343 printf("Processor supports %d Region ID bits\n",
349 pmap_ridmax = (1 << ridbits);
350 pmap_ridmapsz = pmap_ridmax / 64;
351 pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
352 pmap_ridmap[0] |= 0xff;
355 mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
358 * Allocate some memory for initial kernel 'page tables'.
360 ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
362 kernel_vm_end = VM_MIN_KERNEL_ADDRESS - VM_GATEWAY_SIZE;
364 for (i = 0; phys_avail[i+2]; i+= 2)
369 * Figure out a useful size for the VHPT, based on the size of
370 * physical memory and try to locate a region which is large
371 * enough to contain the VHPT (which must be a power of two in
372 * size and aligned to a natural boundary).
373 * We silently bump up the VHPT size to the minimum size if the
374 * user has set the tunable too small. Likewise, the VHPT size
375 * is silently capped to the maximum allowed.
377 TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
378 if (pmap_vhpt_log2size == 0) {
379 pmap_vhpt_log2size = 15;
380 size = 1UL << pmap_vhpt_log2size;
381 while (size < Maxmem * 32) {
382 pmap_vhpt_log2size++;
385 } else if (pmap_vhpt_log2size < 15)
386 pmap_vhpt_log2size = 15;
387 if (pmap_vhpt_log2size > 61)
388 pmap_vhpt_log2size = 61;
390 pmap_vhpt_base[0] = 0;
392 size = 1UL << pmap_vhpt_log2size;
393 while (pmap_vhpt_base[0] == 0) {
395 printf("Trying VHPT size 0x%lx\n", size);
396 for (i = 0; i < count; i += 2) {
397 base = (phys_avail[i] + size - 1) & ~(size - 1);
398 limit = base + MAXCPU * size;
399 if (limit <= phys_avail[i+1])
401 * VHPT can fit in this region
405 if (!phys_avail[i]) {
406 /* Can't fit, try next smaller size. */
407 pmap_vhpt_log2size--;
410 pmap_vhpt_base[0] = IA64_PHYS_TO_RR7(base);
412 if (pmap_vhpt_log2size < 15)
413 panic("Can't find space for VHPT");
416 printf("Putting VHPT at 0x%lx\n", base);
418 if (base != phys_avail[i]) {
419 /* Split this region. */
421 printf("Splitting [%p-%p]\n", (void *)phys_avail[i],
422 (void *)phys_avail[i+1]);
423 for (j = count; j > i; j -= 2) {
424 phys_avail[j] = phys_avail[j-2];
425 phys_avail[j+1] = phys_avail[j-2+1];
427 phys_avail[i+1] = base;
428 phys_avail[i+2] = limit;
430 phys_avail[i] = limit;
432 pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
434 pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
435 sizeof(struct ia64_bucket));
436 pte = (struct ia64_lpte *)pmap_vhpt_base[0];
437 for (i = 0; i < pmap_vhpt_nbuckets; i++) {
440 pte[i].tag = 1UL << 63; /* Invalid tag */
441 pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
442 /* Stolen memory is zeroed! */
443 mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
444 MTX_NOWITNESS | MTX_SPIN);
447 for (i = 1; i < MAXCPU; i++) {
448 pmap_vhpt_base[i] = pmap_vhpt_base[i - 1] + size;
449 bcopy((void *)pmap_vhpt_base[i - 1], (void *)pmap_vhpt_base[i],
453 map_vhpt(pmap_vhpt_base[0]);
454 ia64_set_pta(pmap_vhpt_base[0] + (1 << 8) +
455 (pmap_vhpt_log2size << 2) + 1);
458 virtual_avail = VM_MIN_KERNEL_ADDRESS;
459 virtual_end = VM_MAX_KERNEL_ADDRESS;
462 * Initialize the kernel pmap (which is statically allocated).
464 PMAP_LOCK_INIT(kernel_pmap);
465 for (i = 0; i < 5; i++)
466 kernel_pmap->pm_rid[i] = 0;
467 kernel_pmap->pm_active = 1;
468 TAILQ_INIT(&kernel_pmap->pm_pvlist);
469 PCPU_SET(current_pmap, kernel_pmap);
472 * Region 5 is mapped via the vhpt.
474 ia64_set_rr(IA64_RR_BASE(5),
475 (5 << 8) | (PAGE_SHIFT << 2) | 1);
478 * Region 6 is direct mapped UC and region 7 is direct mapped
479 * WC. The details of this is controlled by the Alt {I,D}TLB
480 * handlers. Here we just make sure that they have the largest
481 * possible page size to minimise TLB usage.
483 ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2));
484 ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2));
488 * Clear out any random TLB entries left over from booting.
490 pmap_invalidate_all(kernel_pmap);
496 pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
501 for (i = 0; i < pmap_vhpt_nbuckets; i++)
502 count += pmap_vhpt_bucket[i].length;
504 error = SYSCTL_OUT(req, &count, sizeof(count));
509 * Initialize a vm_page's machine-dependent fields.
512 pmap_page_init(vm_page_t m)
515 TAILQ_INIT(&m->md.pv_list);
516 m->md.pv_list_count = 0;
520 * Initialize the pmap module.
521 * Called by vm_init, to initialize any structures that the pmap
522 * system needs to map virtual memory.
527 int shpgperproc = PMAP_SHPGPERPROC;
530 * Initialize the address space (zone) for the pv entries. Set a
531 * high water mark so that the system can recover from excessive
532 * numbers of pv entries.
534 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
535 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
536 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
537 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
538 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
539 pv_entry_high_water = 9 * (pv_entry_max / 10);
541 ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
542 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
546 /***************************************************
547 * Manipulate TLBs for a pmap
548 ***************************************************/
551 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
553 struct ia64_lpte *pte;
556 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
557 ("invalidating TLB for non-current pmap"));
559 vhpt_ofs = ia64_thash(va) - pmap_vhpt_base[PCPU_GET(cpuid)];
561 for (i = 0; i < MAXCPU; i++) {
562 pte = (struct ia64_lpte *)(pmap_vhpt_base[i] + vhpt_ofs);
563 if (pte->tag == ia64_ttag(va))
564 pte->tag = 1UL << 63;
567 mtx_lock_spin(&pmap_ptcmutex);
568 ia64_ptc_ga(va, PAGE_SHIFT << 2);
569 mtx_unlock_spin(&pmap_ptcmutex);
573 pmap_invalidate_all_1(void *arg)
579 addr = pmap_ptc_e_base;
580 for (i = 0; i < pmap_ptc_e_count1; i++) {
581 for (j = 0; j < pmap_ptc_e_count2; j++) {
583 addr += pmap_ptc_e_stride2;
585 addr += pmap_ptc_e_stride1;
591 pmap_invalidate_all(pmap_t pmap)
594 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
595 ("invalidating TLB for non-current pmap"));
599 smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
602 pmap_invalidate_all_1(NULL);
606 pmap_allocate_rid(void)
611 mtx_lock(&pmap_ridmutex);
612 if (pmap_ridcount == pmap_ridmax)
613 panic("pmap_allocate_rid: All Region IDs used");
615 /* Find an index with a free bit. */
616 while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
618 if (pmap_rididx == pmap_ridmapsz)
621 rid = pmap_rididx * 64;
623 /* Find a free bit. */
630 pmap_ridmap[pmap_rididx] |= bit;
632 mtx_unlock(&pmap_ridmutex);
638 pmap_free_rid(uint32_t rid)
644 bit = ~(1UL << (rid & 63));
646 mtx_lock(&pmap_ridmutex);
647 pmap_ridmap[idx] &= bit;
649 mtx_unlock(&pmap_ridmutex);
652 /***************************************************
653 * Page table page management routines.....
654 ***************************************************/
657 pmap_pinit0(struct pmap *pmap)
659 /* kernel_pmap is the same as any other pmap. */
664 * Initialize a preallocated and zeroed pmap structure,
665 * such as one in a vmspace structure.
668 pmap_pinit(struct pmap *pmap)
672 PMAP_LOCK_INIT(pmap);
673 for (i = 0; i < 5; i++)
674 pmap->pm_rid[i] = pmap_allocate_rid();
676 TAILQ_INIT(&pmap->pm_pvlist);
677 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
681 /***************************************************
682 * Pmap allocation/deallocation routines.
683 ***************************************************/
686 * Release any resources held by the given physical map.
687 * Called when a pmap initialized by pmap_pinit is being released.
688 * Should only be called if the map contains no valid mappings.
691 pmap_release(pmap_t pmap)
695 for (i = 0; i < 5; i++)
697 pmap_free_rid(pmap->pm_rid[i]);
698 PMAP_LOCK_DESTROY(pmap);
702 * grow the number of kernel page table entries, if needed
705 pmap_growkernel(vm_offset_t addr)
707 struct ia64_lpte **dir1;
708 struct ia64_lpte *leaf;
711 while (kernel_vm_end <= addr) {
712 if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
713 panic("%s: out of kernel address space", __func__);
715 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
717 nkpg = vm_page_alloc(NULL, nkpt++,
718 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
720 panic("%s: cannot add dir. page", __func__);
722 dir1 = (struct ia64_lpte **)
723 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
724 bzero(dir1, PAGE_SIZE);
725 ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
728 nkpg = vm_page_alloc(NULL, nkpt++,
729 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
731 panic("%s: cannot add PTE page", __func__);
733 leaf = (struct ia64_lpte *)
734 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
735 bzero(leaf, PAGE_SIZE);
736 dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
738 kernel_vm_end += PAGE_SIZE * NKPTEPG;
742 /***************************************************
743 * page management routines.
744 ***************************************************/
747 * free the pv_entry back to the free list
749 static PMAP_INLINE void
750 free_pv_entry(pv_entry_t pv)
753 uma_zfree(pvzone, pv);
757 * get a new pv_entry, allocating a block from the system
761 get_pv_entry(pmap_t locked_pmap)
763 static const struct timeval printinterval = { 60, 0 };
764 static struct timeval lastprint;
765 struct vpgqueues *vpq;
766 struct ia64_lpte *pte;
767 pmap_t oldpmap, pmap;
768 pv_entry_t allocated_pv, next_pv, pv;
772 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
773 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
774 allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
775 if (allocated_pv != NULL) {
777 if (pv_entry_count > pv_entry_high_water)
780 return (allocated_pv);
784 * Reclaim pv entries: At first, destroy mappings to inactive
785 * pages. After that, if a pv entry is still needed, destroy
786 * mappings to active pages.
788 if (ratecheck(&lastprint, &printinterval))
789 printf("Approaching the limit on PV entries, "
790 "increase the vm.pmap.shpgperproc tunable.\n");
791 vpq = &vm_page_queues[PQ_INACTIVE];
793 TAILQ_FOREACH(m, &vpq->pl, pageq) {
794 if (m->hold_count || m->busy)
796 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
799 /* Avoid deadlock and lock recursion. */
800 if (pmap > locked_pmap)
802 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
804 pmap->pm_stats.resident_count--;
805 oldpmap = pmap_switch(pmap);
806 pte = pmap_find_vhpt(va);
807 KASSERT(pte != NULL, ("pte"));
808 pmap_remove_vhpt(va);
809 pmap_invalidate_page(pmap, va);
810 pmap_switch(oldpmap);
811 if (pmap_accessed(pte))
812 vm_page_flag_set(m, PG_REFERENCED);
815 pmap_free_pte(pte, va);
816 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
817 m->md.pv_list_count--;
818 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
819 if (TAILQ_EMPTY(&m->md.pv_list))
820 vm_page_flag_clear(m, PG_WRITEABLE);
821 if (pmap != locked_pmap)
823 if (allocated_pv == NULL)
829 if (allocated_pv == NULL) {
830 if (vpq == &vm_page_queues[PQ_INACTIVE]) {
831 vpq = &vm_page_queues[PQ_ACTIVE];
834 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
836 return (allocated_pv);
840 * Conditionally create a pv entry.
843 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
847 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
848 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
849 if (pv_entry_count < pv_entry_high_water &&
850 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
854 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
855 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
856 m->md.pv_list_count++;
863 * Add an ia64_lpte to the VHPT.
866 pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
868 struct ia64_bucket *bckt;
869 struct ia64_lpte *vhpte;
872 /* Can fault, so get it out of the way. */
873 pte_pa = ia64_tpa((vm_offset_t)pte);
875 vhpte = (struct ia64_lpte *)ia64_thash(va);
876 bckt = (struct ia64_bucket *)vhpte->chain;
878 mtx_lock_spin(&bckt->mutex);
879 pte->chain = bckt->chain;
881 bckt->chain = pte_pa;
885 mtx_unlock_spin(&bckt->mutex);
889 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
890 * worked or an appropriate error code otherwise.
893 pmap_remove_vhpt(vm_offset_t va)
895 struct ia64_bucket *bckt;
896 struct ia64_lpte *pte;
897 struct ia64_lpte *lpte;
898 struct ia64_lpte *vhpte;
902 vhpte = (struct ia64_lpte *)ia64_thash(va);
903 bckt = (struct ia64_bucket *)vhpte->chain;
906 mtx_lock_spin(&bckt->mutex);
908 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
909 while (chain != 0 && pte->tag != tag) {
912 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
915 mtx_unlock_spin(&bckt->mutex);
919 /* Snip this pv_entry out of the collision chain. */
921 bckt->chain = pte->chain;
923 lpte->chain = pte->chain;
927 mtx_unlock_spin(&bckt->mutex);
932 * Find the ia64_lpte for the given va, if any.
934 static struct ia64_lpte *
935 pmap_find_vhpt(vm_offset_t va)
937 struct ia64_bucket *bckt;
938 struct ia64_lpte *pte;
942 pte = (struct ia64_lpte *)ia64_thash(va);
943 bckt = (struct ia64_bucket *)pte->chain;
945 mtx_lock_spin(&bckt->mutex);
947 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
948 while (chain != 0 && pte->tag != tag) {
950 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
952 mtx_unlock_spin(&bckt->mutex);
953 return ((chain != 0) ? pte : NULL);
957 * Remove an entry from the list of managed mappings.
960 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
963 if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
964 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
965 if (pmap == pv->pv_pmap && va == pv->pv_va)
969 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
977 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
978 m->md.pv_list_count--;
979 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
980 vm_page_flag_clear(m, PG_WRITEABLE);
982 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
991 * Create a pv entry for page at pa for
995 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
999 pv = get_pv_entry(pmap);
1003 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1004 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1005 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1006 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1007 m->md.pv_list_count++;
1011 * Routine: pmap_extract
1013 * Extract the physical page address associated
1014 * with the given map/virtual_address pair.
1017 pmap_extract(pmap_t pmap, vm_offset_t va)
1019 struct ia64_lpte *pte;
1025 oldpmap = pmap_switch(pmap);
1026 pte = pmap_find_vhpt(va);
1027 if (pte != NULL && pmap_present(pte))
1029 pmap_switch(oldpmap);
1035 * Routine: pmap_extract_and_hold
1037 * Atomically extract and hold the physical page
1038 * with the given pmap and virtual address pair
1039 * if that mapping permits the given protection.
1042 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1044 struct ia64_lpte *pte;
1049 vm_page_lock_queues();
1051 oldpmap = pmap_switch(pmap);
1052 pte = pmap_find_vhpt(va);
1053 if (pte != NULL && pmap_present(pte) &&
1054 (pmap_prot(pte) & prot) == prot) {
1055 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1058 vm_page_unlock_queues();
1059 pmap_switch(oldpmap);
1064 /***************************************************
1065 * Low level mapping routines.....
1066 ***************************************************/
1069 * Find the kernel lpte for mapping the given virtual address, which
1070 * must be in the part of region 5 which we can cover with our kernel
1073 static struct ia64_lpte *
1074 pmap_find_kpte(vm_offset_t va)
1076 struct ia64_lpte **dir1;
1077 struct ia64_lpte *leaf;
1079 KASSERT((va >> 61) == 5,
1080 ("kernel mapping 0x%lx not in region 5", va));
1081 KASSERT(va < kernel_vm_end,
1082 ("kernel mapping 0x%lx out of range", va));
1084 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1085 leaf = dir1[KPTE_DIR1_INDEX(va)];
1086 return (&leaf[KPTE_PTE_INDEX(va)]);
1090 * Find a pte suitable for mapping a user-space address. If one exists
1091 * in the VHPT, that one will be returned, otherwise a new pte is
1094 static struct ia64_lpte *
1095 pmap_find_pte(vm_offset_t va)
1097 struct ia64_lpte *pte;
1099 if (va >= VM_MAXUSER_ADDRESS)
1100 return pmap_find_kpte(va);
1102 pte = pmap_find_vhpt(va);
1104 pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1105 pte->tag = 1UL << 63;
1111 * Free a pte which is now unused. This simply returns it to the zone
1112 * allocator if it is a user mapping. For kernel mappings, clear the
1113 * valid bit to make it clear that the mapping is not currently used.
1116 pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1118 if (va < VM_MAXUSER_ADDRESS)
1119 uma_zfree(ptezone, pte);
1121 pmap_clear_present(pte);
1124 static PMAP_INLINE void
1125 pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1127 static long prot2ar[4] = {
1128 PTE_AR_R, /* VM_PROT_NONE */
1129 PTE_AR_RW, /* VM_PROT_WRITE */
1130 PTE_AR_RX|PTE_ED, /* VM_PROT_EXECUTE */
1131 PTE_AR_RWX|PTE_ED /* VM_PROT_WRITE|VM_PROT_EXECUTE */
1134 pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1135 pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1136 pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1137 ? PTE_PL_KERN : PTE_PL_USER;
1138 pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1142 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1143 * the pte was orginally valid, then its assumed to already be in the
1145 * This functions does not set the protection bits. It's expected
1146 * that those have been set correctly prior to calling this function.
1149 pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1150 boolean_t wired, boolean_t managed)
1153 pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED;
1154 pte->pte |= PTE_PRESENT | PTE_MA_WB;
1155 pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1156 pte->pte |= (wired) ? PTE_WIRED : 0;
1157 pte->pte |= pa & PTE_PPN_MASK;
1159 pte->itir = PAGE_SHIFT << 2;
1161 pte->tag = ia64_ttag(va);
1165 * Remove the (possibly managed) mapping represented by pte from the
1169 pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1170 pv_entry_t pv, int freepte)
1175 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1176 ("removing pte for non-current pmap"));
1179 * First remove from the VHPT.
1181 error = pmap_remove_vhpt(va);
1185 pmap_invalidate_page(pmap, va);
1187 if (pmap_wired(pte))
1188 pmap->pm_stats.wired_count -= 1;
1190 pmap->pm_stats.resident_count -= 1;
1191 if (pmap_managed(pte)) {
1192 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1193 if (pmap_dirty(pte))
1195 if (pmap_accessed(pte))
1196 vm_page_flag_set(m, PG_REFERENCED);
1198 error = pmap_remove_entry(pmap, m, va, pv);
1201 pmap_free_pte(pte, va);
1207 * Extract the physical page address associated with a kernel
1211 pmap_kextract(vm_offset_t va)
1213 struct ia64_lpte *pte;
1216 KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1218 /* Regions 6 and 7 are direct mapped. */
1219 if (va >= IA64_RR_BASE(6))
1220 return (IA64_RR_MASK(va));
1222 /* EPC gateway page? */
1223 gwpage = (vm_offset_t)ia64_get_k5();
1224 if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1225 return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1227 /* Bail out if the virtual address is beyond our limits. */
1228 if (va >= kernel_vm_end)
1231 pte = pmap_find_kpte(va);
1232 if (!pmap_present(pte))
1234 return (pmap_ppn(pte) | (va & PAGE_MASK));
1238 * Add a list of wired pages to the kva this routine is only used for
1239 * temporary kernel mappings that do not need to have page modification
1240 * or references recorded. Note that old mappings are simply written
1241 * over. The page is effectively wired, but it's customary to not have
1242 * the PTE reflect that, nor update statistics.
1245 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1247 struct ia64_lpte *pte;
1250 for (i = 0; i < count; i++) {
1251 pte = pmap_find_kpte(va);
1252 if (pmap_present(pte))
1253 pmap_invalidate_page(kernel_pmap, va);
1255 pmap_enter_vhpt(pte, va);
1256 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1257 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1263 * this routine jerks page mappings from the
1264 * kernel -- it is meant only for temporary mappings.
1267 pmap_qremove(vm_offset_t va, int count)
1269 struct ia64_lpte *pte;
1272 for (i = 0; i < count; i++) {
1273 pte = pmap_find_kpte(va);
1274 if (pmap_present(pte)) {
1275 pmap_remove_vhpt(va);
1276 pmap_invalidate_page(kernel_pmap, va);
1277 pmap_clear_present(pte);
1284 * Add a wired page to the kva. As for pmap_qenter(), it's customary
1285 * to not have the PTE reflect that, nor update statistics.
1288 pmap_kenter(vm_offset_t va, vm_offset_t pa)
1290 struct ia64_lpte *pte;
1292 pte = pmap_find_kpte(va);
1293 if (pmap_present(pte))
1294 pmap_invalidate_page(kernel_pmap, va);
1296 pmap_enter_vhpt(pte, va);
1297 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1298 pmap_set_pte(pte, va, pa, FALSE, FALSE);
1302 * Remove a page from the kva
1305 pmap_kremove(vm_offset_t va)
1307 struct ia64_lpte *pte;
1309 pte = pmap_find_kpte(va);
1310 if (pmap_present(pte)) {
1311 pmap_remove_vhpt(va);
1312 pmap_invalidate_page(kernel_pmap, va);
1313 pmap_clear_present(pte);
1318 * Used to map a range of physical addresses into kernel
1319 * virtual address space.
1321 * The value passed in '*virt' is a suggested virtual address for
1322 * the mapping. Architectures which can support a direct-mapped
1323 * physical to virtual region can return the appropriate address
1324 * within that region, leaving '*virt' unchanged. Other
1325 * architectures should map the pages starting at '*virt' and
1326 * update '*virt' with the first usable address after the mapped
1330 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1332 return IA64_PHYS_TO_RR7(start);
1336 * Remove a single page from a process address space
1339 pmap_remove_page(pmap_t pmap, vm_offset_t va)
1341 struct ia64_lpte *pte;
1343 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1344 ("removing page for non-current pmap"));
1346 pte = pmap_find_vhpt(va);
1348 pmap_remove_pte(pmap, pte, va, 0, 1);
1353 * Remove the given range of addresses from the specified map.
1355 * It is assumed that the start and end are properly
1356 * rounded to the page size.
1359 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1364 struct ia64_lpte *pte;
1366 if (pmap->pm_stats.resident_count == 0)
1369 vm_page_lock_queues();
1371 oldpmap = pmap_switch(pmap);
1374 * special handling of removing one page. a very
1375 * common operation and easy to short circuit some
1378 if (sva + PAGE_SIZE == eva) {
1379 pmap_remove_page(pmap, sva);
1383 if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1384 TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1386 if (va >= sva && va < eva) {
1387 pte = pmap_find_vhpt(va);
1388 KASSERT(pte != NULL, ("pte"));
1389 pmap_remove_pte(pmap, pte, va, pv, 1);
1393 for (va = sva; va < eva; va += PAGE_SIZE) {
1394 pte = pmap_find_vhpt(va);
1396 pmap_remove_pte(pmap, pte, va, 0, 1);
1401 vm_page_unlock_queues();
1402 pmap_switch(oldpmap);
1407 * Routine: pmap_remove_all
1409 * Removes this physical page from
1410 * all physical maps in which it resides.
1411 * Reflects back modify bits to the pager.
1414 * Original versions of this routine were very
1415 * inefficient because they iteratively called
1416 * pmap_remove (slow...)
1420 pmap_remove_all(vm_page_t m)
1425 #if defined(DIAGNOSTIC)
1427 * XXX This makes pmap_remove_all() illegal for non-managed pages!
1429 if (m->flags & PG_FICTITIOUS) {
1430 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1433 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1434 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1435 struct ia64_lpte *pte;
1436 pmap_t pmap = pv->pv_pmap;
1437 vm_offset_t va = pv->pv_va;
1440 oldpmap = pmap_switch(pmap);
1441 pte = pmap_find_vhpt(va);
1442 KASSERT(pte != NULL, ("pte"));
1443 if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1444 panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1445 pmap_remove_pte(pmap, pte, va, pv, 1);
1446 pmap_switch(oldpmap);
1449 vm_page_flag_clear(m, PG_WRITEABLE);
1453 * Set the physical protection on the
1454 * specified range of this map as requested.
1457 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1460 struct ia64_lpte *pte;
1462 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1463 pmap_remove(pmap, sva, eva);
1467 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1468 (VM_PROT_WRITE|VM_PROT_EXECUTE))
1471 if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1472 panic("pmap_protect: unaligned addresses");
1474 vm_page_lock_queues();
1476 oldpmap = pmap_switch(pmap);
1477 for ( ; sva < eva; sva += PAGE_SIZE) {
1478 /* If page is invalid, skip this page */
1479 pte = pmap_find_vhpt(sva);
1483 /* If there's no change, skip it too */
1484 if (pmap_prot(pte) == prot)
1487 if (pmap_managed(pte)) {
1488 vm_offset_t pa = pmap_ppn(pte);
1489 vm_page_t m = PHYS_TO_VM_PAGE(pa);
1491 if (pmap_dirty(pte)) {
1493 pmap_clear_dirty(pte);
1496 if (pmap_accessed(pte)) {
1497 vm_page_flag_set(m, PG_REFERENCED);
1498 pmap_clear_accessed(pte);
1502 if (prot & VM_PROT_EXECUTE)
1503 ia64_sync_icache(sva, PAGE_SIZE);
1505 pmap_pte_prot(pmap, pte, prot);
1506 pmap_invalidate_page(pmap, sva);
1508 vm_page_unlock_queues();
1509 pmap_switch(oldpmap);
1514 * Insert the given physical page (p) at
1515 * the specified virtual address (v) in the
1516 * target physical map with the protection requested.
1518 * If specified, the page will be wired down, meaning
1519 * that the related pte can not be reclaimed.
1521 * NB: This is the only routine which MAY NOT lazy-evaluate
1522 * or lose information. That is, this routine must actually
1523 * insert this page into the given map NOW.
1526 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1527 vm_prot_t prot, boolean_t wired)
1532 struct ia64_lpte origpte;
1533 struct ia64_lpte *pte;
1534 boolean_t icache_inval, managed;
1536 vm_page_lock_queues();
1538 oldpmap = pmap_switch(pmap);
1542 if (va > VM_MAX_KERNEL_ADDRESS)
1543 panic("pmap_enter: toobig");
1547 * Find (or create) a pte for the given mapping.
1549 while ((pte = pmap_find_pte(va)) == NULL) {
1550 pmap_switch(oldpmap);
1552 vm_page_unlock_queues();
1554 vm_page_lock_queues();
1556 oldpmap = pmap_switch(pmap);
1559 if (!pmap_present(pte)) {
1561 pmap_enter_vhpt(pte, va);
1563 opa = pmap_ppn(pte);
1565 pa = VM_PAGE_TO_PHYS(m);
1567 icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1570 * Mapping has not changed, must be protection or wiring change.
1574 * Wiring change, just update stats. We don't worry about
1575 * wiring PT pages as they remain resident as long as there
1576 * are valid mappings in them. Hence, if a user page is wired,
1577 * the PT page will be also.
1579 if (wired && !pmap_wired(&origpte))
1580 pmap->pm_stats.wired_count++;
1581 else if (!wired && pmap_wired(&origpte))
1582 pmap->pm_stats.wired_count--;
1584 managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1587 * We might be turning off write access to the page,
1588 * so we go ahead and sense modify status. Otherwise,
1589 * we can avoid I-cache invalidation if the page
1590 * already allowed execution.
1592 if (managed && pmap_dirty(&origpte))
1594 else if (pmap_exec(&origpte))
1595 icache_inval = FALSE;
1597 pmap_invalidate_page(pmap, va);
1602 * Mapping has changed, invalidate old range and fall
1603 * through to handle validating new mapping.
1606 pmap_remove_pte(pmap, pte, va, 0, 0);
1607 pmap_enter_vhpt(pte, va);
1611 * Enter on the PV list if part of our managed memory.
1613 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1614 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1615 ("pmap_enter: managed mapping within the clean submap"));
1616 pmap_insert_entry(pmap, va, m);
1621 * Increment counters
1623 pmap->pm_stats.resident_count++;
1625 pmap->pm_stats.wired_count++;
1630 * Now validate mapping with desired protection/wiring. This
1631 * adds the pte to the VHPT if necessary.
1633 pmap_pte_prot(pmap, pte, prot);
1634 pmap_set_pte(pte, va, pa, wired, managed);
1636 /* Invalidate the I-cache when needed. */
1638 ia64_sync_icache(va, PAGE_SIZE);
1640 if ((prot & VM_PROT_WRITE) != 0)
1641 vm_page_flag_set(m, PG_WRITEABLE);
1642 vm_page_unlock_queues();
1643 pmap_switch(oldpmap);
1648 * Maps a sequence of resident pages belonging to the same object.
1649 * The sequence begins with the given page m_start. This page is
1650 * mapped at the given virtual address start. Each subsequent page is
1651 * mapped at a virtual address that is offset from start by the same
1652 * amount as the page is offset from m_start within the object. The
1653 * last page in the sequence is the page with the largest offset from
1654 * m_start that can be mapped at a virtual address less than the given
1655 * virtual address end. Not every virtual page between start and end
1656 * is mapped; only those for which a resident page exists with the
1657 * corresponding offset from m_start are mapped.
1660 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1661 vm_page_t m_start, vm_prot_t prot)
1665 vm_pindex_t diff, psize;
1667 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1668 psize = atop(end - start);
1671 oldpmap = pmap_switch(pmap);
1672 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1673 pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1674 m = TAILQ_NEXT(m, listq);
1676 pmap_switch(oldpmap);
1681 * this code makes some *MAJOR* assumptions:
1682 * 1. Current pmap & pmap exists.
1685 * 4. No page table pages.
1686 * but is *MUCH* faster than pmap_enter...
1690 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1695 oldpmap = pmap_switch(pmap);
1696 pmap_enter_quick_locked(pmap, va, m, prot);
1697 pmap_switch(oldpmap);
1702 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1705 struct ia64_lpte *pte;
1708 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1709 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1710 ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1711 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1712 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1714 if ((pte = pmap_find_pte(va)) == NULL)
1717 if (!pmap_present(pte)) {
1718 /* Enter on the PV list if the page is managed. */
1719 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1720 if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1721 pmap_free_pte(pte, va);
1728 /* Increment counters. */
1729 pmap->pm_stats.resident_count++;
1731 /* Initialise with R/O protection and enter into VHPT. */
1732 pmap_enter_vhpt(pte, va);
1733 pmap_pte_prot(pmap, pte,
1734 prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1735 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1737 if (prot & VM_PROT_EXECUTE)
1738 ia64_sync_icache(va, PAGE_SIZE);
1743 * pmap_object_init_pt preloads the ptes for a given object
1744 * into the specified pmap. This eliminates the blast of soft
1745 * faults on process startup and immediately after an mmap.
1748 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1749 vm_object_t object, vm_pindex_t pindex,
1753 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1754 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1755 ("pmap_object_init_pt: non-device object"));
1759 * Routine: pmap_change_wiring
1760 * Function: Change the wiring attribute for a map/virtual-address
1762 * In/out conditions:
1763 * The mapping must already exist in the pmap.
1766 pmap_change_wiring(pmap, va, wired)
1767 register pmap_t pmap;
1772 struct ia64_lpte *pte;
1775 oldpmap = pmap_switch(pmap);
1777 pte = pmap_find_vhpt(va);
1778 KASSERT(pte != NULL, ("pte"));
1779 if (wired && !pmap_wired(pte)) {
1780 pmap->pm_stats.wired_count++;
1781 pmap_set_wired(pte);
1782 } else if (!wired && pmap_wired(pte)) {
1783 pmap->pm_stats.wired_count--;
1784 pmap_clear_wired(pte);
1787 pmap_switch(oldpmap);
1794 * Copy the range specified by src_addr/len
1795 * from the source map to the range dst_addr/len
1796 * in the destination map.
1798 * This routine is only advisory and need not do anything.
1802 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1803 vm_offset_t src_addr)
1809 * pmap_zero_page zeros the specified hardware page by
1810 * mapping it into virtual memory and using bzero to clear
1815 pmap_zero_page(vm_page_t m)
1817 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1818 bzero((caddr_t) va, PAGE_SIZE);
1823 * pmap_zero_page_area zeros the specified hardware page by
1824 * mapping it into virtual memory and using bzero to clear
1827 * off and size must reside within a single page.
1831 pmap_zero_page_area(vm_page_t m, int off, int size)
1833 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1834 bzero((char *)(caddr_t)va + off, size);
1839 * pmap_zero_page_idle zeros the specified hardware page by
1840 * mapping it into virtual memory and using bzero to clear
1841 * its contents. This is for the vm_idlezero process.
1845 pmap_zero_page_idle(vm_page_t m)
1847 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1848 bzero((caddr_t) va, PAGE_SIZE);
1853 * pmap_copy_page copies the specified (machine independent)
1854 * page by mapping the page into virtual memory and using
1855 * bcopy to copy the page, one machine dependent page at a
1859 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1861 vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1862 vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1863 bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1867 * Returns true if the pmap's pv is one of the first
1868 * 16 pvs linked to from this page. This count may
1869 * be changed upwards or downwards in the future; it
1870 * is only necessary that true be returned for a small
1871 * subset of pmaps for proper page aging.
1874 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1879 if (m->flags & PG_FICTITIOUS)
1883 * Not found, check current mappings returning immediately if found.
1885 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1886 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1887 if (pv->pv_pmap == pmap) {
1898 * pmap_page_wired_mappings:
1900 * Return the number of managed mappings to the given physical page
1904 pmap_page_wired_mappings(vm_page_t m)
1906 struct ia64_lpte *pte;
1907 pmap_t oldpmap, pmap;
1912 if ((m->flags & PG_FICTITIOUS) != 0)
1914 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1915 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1918 oldpmap = pmap_switch(pmap);
1919 pte = pmap_find_vhpt(pv->pv_va);
1920 KASSERT(pte != NULL, ("pte"));
1921 if (pmap_wired(pte))
1923 pmap_switch(oldpmap);
1930 * Remove all pages from specified address space
1931 * this aids process exit speeds. Also, this code
1932 * is special cased for current process only, but
1933 * can have the more generic (and slightly slower)
1934 * mode enabled. This is much faster than pmap_remove
1935 * in the case of running down an entire address space.
1938 pmap_remove_pages(pmap_t pmap)
1943 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1944 printf("warning: pmap_remove_pages called with non-current pmap\n");
1948 vm_page_lock_queues();
1950 oldpmap = pmap_switch(pmap);
1952 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1953 struct ia64_lpte *pte;
1955 npv = TAILQ_NEXT(pv, pv_plist);
1957 pte = pmap_find_vhpt(pv->pv_va);
1958 KASSERT(pte != NULL, ("pte"));
1959 if (!pmap_wired(pte))
1960 pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1963 pmap_switch(oldpmap);
1965 vm_page_unlock_queues();
1969 * pmap_ts_referenced:
1971 * Return a count of reference bits for a page, clearing those bits.
1972 * It is not necessary for every reference bit to be cleared, but it
1973 * is necessary that 0 only be returned when there are truly no
1974 * reference bits set.
1976 * XXX: The exact number of bits to check and clear is a matter that
1977 * should be tested and standardized at some point in the future for
1978 * optimal aging of shared pages.
1981 pmap_ts_referenced(vm_page_t m)
1983 struct ia64_lpte *pte;
1988 if (m->flags & PG_FICTITIOUS)
1991 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1992 PMAP_LOCK(pv->pv_pmap);
1993 oldpmap = pmap_switch(pv->pv_pmap);
1994 pte = pmap_find_vhpt(pv->pv_va);
1995 KASSERT(pte != NULL, ("pte"));
1996 if (pmap_accessed(pte)) {
1998 pmap_clear_accessed(pte);
1999 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2001 pmap_switch(oldpmap);
2002 PMAP_UNLOCK(pv->pv_pmap);
2011 * Return whether or not the specified physical page was modified
2012 * in any physical maps.
2015 pmap_is_modified(vm_page_t m)
2017 struct ia64_lpte *pte;
2023 if (m->flags & PG_FICTITIOUS)
2026 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2027 PMAP_LOCK(pv->pv_pmap);
2028 oldpmap = pmap_switch(pv->pv_pmap);
2029 pte = pmap_find_vhpt(pv->pv_va);
2030 pmap_switch(oldpmap);
2031 KASSERT(pte != NULL, ("pte"));
2032 rv = pmap_dirty(pte) ? TRUE : FALSE;
2033 PMAP_UNLOCK(pv->pv_pmap);
2042 * pmap_is_prefaultable:
2044 * Return whether or not the specified virtual address is elgible
2048 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2050 struct ia64_lpte *pte;
2052 pte = pmap_find_vhpt(addr);
2053 if (pte != NULL && pmap_present(pte))
2059 * Clear the modify bits on the specified physical page.
2062 pmap_clear_modify(vm_page_t m)
2064 struct ia64_lpte *pte;
2068 if (m->flags & PG_FICTITIOUS)
2071 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2072 PMAP_LOCK(pv->pv_pmap);
2073 oldpmap = pmap_switch(pv->pv_pmap);
2074 pte = pmap_find_vhpt(pv->pv_va);
2075 KASSERT(pte != NULL, ("pte"));
2076 if (pmap_dirty(pte)) {
2077 pmap_clear_dirty(pte);
2078 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2080 pmap_switch(oldpmap);
2081 PMAP_UNLOCK(pv->pv_pmap);
2086 * pmap_clear_reference:
2088 * Clear the reference bit on the specified physical page.
2091 pmap_clear_reference(vm_page_t m)
2093 struct ia64_lpte *pte;
2097 if (m->flags & PG_FICTITIOUS)
2100 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2101 PMAP_LOCK(pv->pv_pmap);
2102 oldpmap = pmap_switch(pv->pv_pmap);
2103 pte = pmap_find_vhpt(pv->pv_va);
2104 KASSERT(pte != NULL, ("pte"));
2105 if (pmap_accessed(pte)) {
2106 pmap_clear_accessed(pte);
2107 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2109 pmap_switch(oldpmap);
2110 PMAP_UNLOCK(pv->pv_pmap);
2115 * Clear the write and modified bits in each of the given page's mappings.
2118 pmap_remove_write(vm_page_t m)
2120 struct ia64_lpte *pte;
2121 pmap_t oldpmap, pmap;
2125 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2126 if ((m->flags & PG_FICTITIOUS) != 0 ||
2127 (m->flags & PG_WRITEABLE) == 0)
2129 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2132 oldpmap = pmap_switch(pmap);
2133 pte = pmap_find_vhpt(pv->pv_va);
2134 KASSERT(pte != NULL, ("pte"));
2135 prot = pmap_prot(pte);
2136 if ((prot & VM_PROT_WRITE) != 0) {
2137 if (pmap_dirty(pte)) {
2139 pmap_clear_dirty(pte);
2141 prot &= ~VM_PROT_WRITE;
2142 pmap_pte_prot(pmap, pte, prot);
2143 pmap_invalidate_page(pmap, pv->pv_va);
2145 pmap_switch(oldpmap);
2148 vm_page_flag_clear(m, PG_WRITEABLE);
2152 * Map a set of physical memory pages into the kernel virtual
2153 * address space. Return a pointer to where it is mapped. This
2154 * routine is intended to be used for mapping device memory,
2158 pmap_mapdev(vm_offset_t pa, vm_size_t size)
2160 return (void*) IA64_PHYS_TO_RR6(pa);
2164 * 'Unmap' a range mapped by pmap_mapdev().
2167 pmap_unmapdev(vm_offset_t va, vm_size_t size)
2173 * perform the pmap work for mincore
2176 pmap_mincore(pmap_t pmap, vm_offset_t addr)
2179 struct ia64_lpte *pte, tpte;
2183 oldpmap = pmap_switch(pmap);
2184 pte = pmap_find_vhpt(addr);
2189 pmap_switch(oldpmap);
2195 if (pmap_present(pte)) {
2199 val = MINCORE_INCORE;
2200 if (!pmap_managed(pte))
2205 m = PHYS_TO_VM_PAGE(pa);
2210 if (pmap_dirty(pte))
2211 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2214 * Modified by someone
2216 vm_page_lock_queues();
2217 if (pmap_is_modified(m))
2218 val |= MINCORE_MODIFIED_OTHER;
2219 vm_page_unlock_queues();
2224 if (pmap_accessed(pte))
2225 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2228 * Referenced by someone
2230 vm_page_lock_queues();
2231 if (pmap_ts_referenced(m)) {
2232 val |= MINCORE_REFERENCED_OTHER;
2233 vm_page_flag_set(m, PG_REFERENCED);
2235 vm_page_unlock_queues();
2242 pmap_activate(struct thread *td)
2244 pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2248 pmap_switch(pmap_t pm)
2254 prevpm = PCPU_GET(current_pmap);
2258 atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2260 for (i = 0; i < 5; i++) {
2261 ia64_set_rr(IA64_RR_BASE(i),
2262 (i << 8)|(PAGE_SHIFT << 2)|1);
2265 for (i = 0; i < 5; i++) {
2266 ia64_set_rr(IA64_RR_BASE(i),
2267 (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2269 atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2271 PCPU_SET(current_pmap, pm);
2280 * Increase the starting virtual address of the given mapping if a
2281 * different alignment might result in more superpage mappings.
2284 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2285 vm_offset_t *addr, vm_size_t size)
2289 #include "opt_ddb.h"
2293 #include <ddb/ddb.h>
2295 static const char* psnames[] = {
2296 "1B", "2B", "4B", "8B",
2297 "16B", "32B", "64B", "128B",
2298 "256B", "512B", "1K", "2K",
2299 "4K", "8K", "16K", "32K",
2300 "64K", "128K", "256K", "512K",
2301 "1M", "2M", "4M", "8M",
2302 "16M", "32M", "64M", "128M",
2303 "256M", "512M", "1G", "2G"
2309 struct ia64_pal_result res;
2317 static const char *manames[] = {
2318 "WB", "bad", "bad", "bad",
2319 "UC", "UCE", "WC", "NaT",
2322 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2323 if (res.pal_status != 0) {
2324 db_printf("Can't get VM summary\n");
2329 maxtr = (res.pal_result[0] >> 40) & 0xff;
2331 maxtr = (res.pal_result[0] >> 32) & 0xff;
2333 db_printf("V RID Virtual Page Physical Page PgSz ED AR PL D A MA P KEY\n");
2334 for (i = 0; i <= maxtr; i++) {
2335 bzero(&buf, sizeof(buf));
2336 res = ia64_call_pal_stacked_physical
2337 (PAL_VM_TR_READ, i, type, ia64_tpa((uint64_t) &buf));
2338 if (!(res.pal_result[0] & 1))
2339 buf.pte &= ~PTE_AR_MASK;
2340 if (!(res.pal_result[0] & 2))
2341 buf.pte &= ~PTE_PL_MASK;
2342 if (!(res.pal_result[0] & 4))
2343 pmap_clear_dirty(&buf);
2344 if (!(res.pal_result[0] & 8))
2345 buf.pte &= ~PTE_MA_MASK;
2346 db_printf("%d %06x %013lx %013lx %4s %d %d %d %d %d %-3s "
2347 "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2348 buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2349 psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2350 (buf.pte & PTE_ED) ? 1 : 0,
2351 (int)(buf.pte & PTE_AR_MASK) >> 9,
2352 (int)(buf.pte & PTE_PL_MASK) >> 7,
2353 (pmap_dirty(&buf)) ? 1 : 0,
2354 (pmap_accessed(&buf)) ? 1 : 0,
2355 manames[(buf.pte & PTE_MA_MASK) >> 2],
2356 (pmap_present(&buf)) ? 1 : 0,
2357 (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2361 DB_COMMAND(itr, db_itr)
2366 DB_COMMAND(dtr, db_dtr)
2371 DB_COMMAND(rr, db_rr)
2377 printf("RR RID PgSz VE\n");
2378 for (i = 0; i < 8; i++) {
2379 __asm __volatile ("mov %0=rr[%1]"
2381 : "r"(IA64_RR_BASE(i)));
2382 *(uint64_t *) &rr = t;
2383 printf("%d %06x %4s %d\n",
2384 i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2388 DB_COMMAND(thash, db_thash)
2393 db_printf("%p\n", (void *) ia64_thash(addr));
2396 DB_COMMAND(ttag, db_ttag)
2401 db_printf("0x%lx\n", ia64_ttag(addr));
2404 DB_COMMAND(kpte, db_kpte)
2406 struct ia64_lpte *pte;
2409 db_printf("usage: kpte <kva>\n");
2412 if (addr < VM_MIN_KERNEL_ADDRESS) {
2413 db_printf("kpte: error: invalid <kva>\n");
2416 pte = pmap_find_kpte(addr);
2417 db_printf("kpte at %p:\n", pte);
2418 db_printf(" pte =%016lx\n", pte->pte);
2419 db_printf(" itir =%016lx\n", pte->itir);
2420 db_printf(" tag =%016lx\n", pte->tag);
2421 db_printf(" chain=%016lx\n", pte->chain);