2 * Copyright (c) 1991 Regents of the University of California.
4 * Copyright (c) 1994 John S. Dyson
6 * Copyright (c) 1994 David Greenman
8 * Copyright (c) 1998,2000 Doug Rabson
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 * must display the following acknowledgement:
25 * This product includes software developed by the University of
26 * California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
44 * from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 * with some ideas from NetBSD's alpha pmap
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
51 #include <sys/param.h>
52 #include <sys/kernel.h>
55 #include <sys/mutex.h>
58 #include <sys/sysctl.h>
59 #include <sys/systm.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_map.h>
64 #include <vm/vm_object.h>
65 #include <vm/vm_pageout.h>
68 #include <machine/md_var.h>
69 #include <machine/pal.h>
72 * Manages physical address maps.
74 * In addition to hardware address maps, this
75 * module is called upon to provide software-use-only
76 * maps which may or may not be stored in the same
77 * form as hardware maps. These pseudo-maps are
78 * used to store intermediate results from copy
79 * operations to and from address spaces.
81 * Since the information managed by this module is
82 * also stored by the logical address mapping module,
83 * this module may throw away valid virtual-to-physical
84 * mappings at almost any time. However, invalidations
85 * of virtual-to-physical mappings must be done as
88 * In order to cope with hardware architectures which
89 * make virtual-to-physical map invalidates expensive,
90 * this module may delay invalidate or reduced protection
91 * operations until such time as they are actually
92 * necessary. This module is given full information as
93 * to which processors are currently using which maps,
94 * and to when physical maps must be made correct.
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
106 * User virtually mapped
109 * Kernel virtually mapped
112 * Kernel physically mapped uncacheable
115 * Kernel physically mapped cacheable
118 /* XXX move to a header. */
119 extern uint64_t ia64_gateway_page[];
121 MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
123 #ifndef PMAP_SHPGPERPROC
124 #define PMAP_SHPGPERPROC 200
127 #if !defined(DIAGNOSTIC)
128 #define PMAP_INLINE __inline
133 #define pmap_accessed(lpte) ((lpte)->pte & PTE_ACCESSED)
134 #define pmap_dirty(lpte) ((lpte)->pte & PTE_DIRTY)
135 #define pmap_managed(lpte) ((lpte)->pte & PTE_MANAGED)
136 #define pmap_ppn(lpte) ((lpte)->pte & PTE_PPN_MASK)
137 #define pmap_present(lpte) ((lpte)->pte & PTE_PRESENT)
138 #define pmap_prot(lpte) (((lpte)->pte & PTE_PROT_MASK) >> 56)
139 #define pmap_wired(lpte) ((lpte)->pte & PTE_WIRED)
141 #define pmap_clear_accessed(lpte) (lpte)->pte &= ~PTE_ACCESSED
142 #define pmap_clear_dirty(lpte) (lpte)->pte &= ~PTE_DIRTY
143 #define pmap_clear_present(lpte) (lpte)->pte &= ~PTE_PRESENT
144 #define pmap_clear_wired(lpte) (lpte)->pte &= ~PTE_WIRED
146 #define pmap_set_wired(lpte) (lpte)->pte |= PTE_WIRED
149 * The VHPT bucket head structure.
158 * Statically allocated kernel pmap
160 struct pmap kernel_pmap_store;
162 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
163 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
166 * Kernel virtual memory management.
169 struct ia64_lpte ***ia64_kptdir;
170 #define KPTE_DIR0_INDEX(va) \
171 (((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
172 #define KPTE_DIR1_INDEX(va) \
173 (((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
174 #define KPTE_PTE_INDEX(va) \
175 (((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
176 #define NKPTEPG (PAGE_SIZE / sizeof(struct ia64_lpte))
178 vm_offset_t kernel_vm_end;
180 /* Values for ptc.e. XXX values for SKI. */
181 static uint64_t pmap_ptc_e_base = 0x100000000;
182 static uint64_t pmap_ptc_e_count1 = 3;
183 static uint64_t pmap_ptc_e_count2 = 2;
184 static uint64_t pmap_ptc_e_stride1 = 0x2000;
185 static uint64_t pmap_ptc_e_stride2 = 0x100000000;
186 struct mtx pmap_ptcmutex;
189 * Data for the RID allocator
191 static int pmap_ridcount;
192 static int pmap_rididx;
193 static int pmap_ridmapsz;
194 static int pmap_ridmax;
195 static uint64_t *pmap_ridmap;
196 struct mtx pmap_ridmutex;
199 * Data for the pv entry allocation mechanism
201 static uma_zone_t pvzone;
202 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
205 * Data for allocating PTEs for user processes.
207 static uma_zone_t ptezone;
210 * Virtual Hash Page Table (VHPT) data.
212 /* SYSCTL_DECL(_machdep); */
213 SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
215 struct ia64_bucket *pmap_vhpt_bucket;
217 int pmap_vhpt_nbuckets;
218 SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
219 &pmap_vhpt_nbuckets, 0, "");
221 uint64_t pmap_vhpt_base[MAXCPU];
223 int pmap_vhpt_log2size = 0;
224 TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
225 SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
226 &pmap_vhpt_log2size, 0, "");
228 static int pmap_vhpt_inserts;
229 SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
230 &pmap_vhpt_inserts, 0, "");
232 static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
233 SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
234 NULL, 0, pmap_vhpt_population, "I", "");
236 static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
238 static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
239 static pv_entry_t get_pv_entry(pmap_t locked_pmap);
241 static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
242 vm_page_t m, vm_prot_t prot);
243 static void pmap_invalidate_all(pmap_t pmap);
244 static int pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
245 vm_offset_t va, pv_entry_t pv, int freepte);
246 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
250 pmap_steal_memory(vm_size_t size)
255 size = round_page(size);
257 bank_size = phys_avail[1] - phys_avail[0];
258 while (size > bank_size) {
260 for (i = 0; phys_avail[i+2]; i+= 2) {
261 phys_avail[i] = phys_avail[i+2];
262 phys_avail[i+1] = phys_avail[i+3];
267 panic("pmap_steal_memory: out of memory");
268 bank_size = phys_avail[1] - phys_avail[0];
272 phys_avail[0] += size;
274 va = IA64_PHYS_TO_RR7(pa);
275 bzero((caddr_t) va, size);
280 * Bootstrap the system enough to run with virtual memory.
285 struct ia64_pal_result res;
286 struct ia64_lpte *pte;
287 vm_offset_t base, limit;
289 int i, j, count, ridbits;
292 * Query the PAL Code to find the loop parameters for the
295 res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
296 if (res.pal_status != 0)
297 panic("Can't configure ptc.e parameters");
298 pmap_ptc_e_base = res.pal_result[0];
299 pmap_ptc_e_count1 = res.pal_result[1] >> 32;
300 pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
301 pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
302 pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
304 printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
305 "stride1=0x%lx, stride2=0x%lx\n",
311 mtx_init(&pmap_ptcmutex, "Global PTC lock", NULL, MTX_SPIN);
314 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
316 * We currently need at least 19 bits in the RID because PID_MAX
317 * can only be encoded in 17 bits and we need RIDs for 5 regions
318 * per process. With PID_MAX equalling 99999 this means that we
319 * need to be able to encode 499995 (=5*PID_MAX).
320 * The Itanium processor only has 18 bits and the architected
321 * minimum is exactly that. So, we cannot use a PID based scheme
322 * in those cases. Enter pmap_ridmap...
323 * We should avoid the map when running on a processor that has
324 * implemented enough bits. This means that we should pass the
325 * process/thread ID to pmap. This we currently don't do, so we
326 * use the map anyway. However, we don't want to allocate a map
327 * that is large enough to cover the range dictated by the number
328 * of bits in the RID, because that may result in a RID map of
329 * 2MB in size for a 24-bit RID. A 64KB map is enough.
330 * The bottomline: we create a 32KB map when the processor only
331 * implements 18 bits (or when we can't figure it out). Otherwise
332 * we create a 64KB map.
334 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
335 if (res.pal_status != 0) {
337 printf("Can't read VM Summary - assuming 18 Region ID bits\n");
338 ridbits = 18; /* guaranteed minimum */
340 ridbits = (res.pal_result[1] >> 8) & 0xff;
342 printf("Processor supports %d Region ID bits\n",
348 pmap_ridmax = (1 << ridbits);
349 pmap_ridmapsz = pmap_ridmax / 64;
350 pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
351 pmap_ridmap[0] |= 0xff;
354 mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
357 * Allocate some memory for initial kernel 'page tables'.
359 ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
361 kernel_vm_end = VM_MIN_KERNEL_ADDRESS - VM_GATEWAY_SIZE;
363 for (i = 0; phys_avail[i+2]; i+= 2)
368 * Figure out a useful size for the VHPT, based on the size of
369 * physical memory and try to locate a region which is large
370 * enough to contain the VHPT (which must be a power of two in
371 * size and aligned to a natural boundary).
372 * We silently bump up the VHPT size to the minimum size if the
373 * user has set the tunable too small. Likewise, the VHPT size
374 * is silently capped to the maximum allowed.
376 TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
377 if (pmap_vhpt_log2size == 0) {
378 pmap_vhpt_log2size = 15;
379 size = 1UL << pmap_vhpt_log2size;
380 while (size < Maxmem * 32) {
381 pmap_vhpt_log2size++;
384 } else if (pmap_vhpt_log2size < 15)
385 pmap_vhpt_log2size = 15;
386 if (pmap_vhpt_log2size > 61)
387 pmap_vhpt_log2size = 61;
389 pmap_vhpt_base[0] = 0;
391 size = 1UL << pmap_vhpt_log2size;
392 while (pmap_vhpt_base[0] == 0) {
394 printf("Trying VHPT size 0x%lx\n", size);
395 for (i = 0; i < count; i += 2) {
396 base = (phys_avail[i] + size - 1) & ~(size - 1);
397 limit = base + MAXCPU * size;
398 if (limit <= phys_avail[i+1])
400 * VHPT can fit in this region
404 if (!phys_avail[i]) {
405 /* Can't fit, try next smaller size. */
406 pmap_vhpt_log2size--;
409 pmap_vhpt_base[0] = IA64_PHYS_TO_RR7(base);
411 if (pmap_vhpt_log2size < 15)
412 panic("Can't find space for VHPT");
415 printf("Putting VHPT at 0x%lx\n", base);
417 if (base != phys_avail[i]) {
418 /* Split this region. */
420 printf("Splitting [%p-%p]\n", (void *)phys_avail[i],
421 (void *)phys_avail[i+1]);
422 for (j = count; j > i; j -= 2) {
423 phys_avail[j] = phys_avail[j-2];
424 phys_avail[j+1] = phys_avail[j-2+1];
426 phys_avail[i+1] = base;
427 phys_avail[i+2] = limit;
429 phys_avail[i] = limit;
431 pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
433 pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
434 sizeof(struct ia64_bucket));
435 pte = (struct ia64_lpte *)pmap_vhpt_base[0];
436 for (i = 0; i < pmap_vhpt_nbuckets; i++) {
439 pte[i].tag = 1UL << 63; /* Invalid tag */
440 pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
441 /* Stolen memory is zeroed! */
442 mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
446 for (i = 1; i < MAXCPU; i++) {
447 pmap_vhpt_base[i] = pmap_vhpt_base[i - 1] + size;
448 bcopy((void *)pmap_vhpt_base[i - 1], (void *)pmap_vhpt_base[i],
452 map_vhpt(pmap_vhpt_base[0]);
453 ia64_set_pta(pmap_vhpt_base[0] + (1 << 8) +
454 (pmap_vhpt_log2size << 2) + 1);
457 virtual_avail = VM_MIN_KERNEL_ADDRESS;
458 virtual_end = VM_MAX_KERNEL_ADDRESS;
461 * Initialize the kernel pmap (which is statically allocated).
463 PMAP_LOCK_INIT(kernel_pmap);
464 for (i = 0; i < 5; i++)
465 kernel_pmap->pm_rid[i] = 0;
466 kernel_pmap->pm_active = 1;
467 TAILQ_INIT(&kernel_pmap->pm_pvlist);
468 PCPU_SET(current_pmap, kernel_pmap);
471 * Region 5 is mapped via the vhpt.
473 ia64_set_rr(IA64_RR_BASE(5),
474 (5 << 8) | (PAGE_SHIFT << 2) | 1);
477 * Region 6 is direct mapped UC and region 7 is direct mapped
478 * WC. The details of this is controlled by the Alt {I,D}TLB
479 * handlers. Here we just make sure that they have the largest
480 * possible page size to minimise TLB usage.
482 ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2));
483 ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2));
487 * Clear out any random TLB entries left over from booting.
489 pmap_invalidate_all(kernel_pmap);
495 pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
500 for (i = 0; i < pmap_vhpt_nbuckets; i++)
501 count += pmap_vhpt_bucket[i].length;
503 error = SYSCTL_OUT(req, &count, sizeof(count));
508 * Initialize a vm_page's machine-dependent fields.
511 pmap_page_init(vm_page_t m)
514 TAILQ_INIT(&m->md.pv_list);
515 m->md.pv_list_count = 0;
519 * Initialize the pmap module.
520 * Called by vm_init, to initialize any structures that the pmap
521 * system needs to map virtual memory.
526 int shpgperproc = PMAP_SHPGPERPROC;
529 * Initialize the address space (zone) for the pv entries. Set a
530 * high water mark so that the system can recover from excessive
531 * numbers of pv entries.
533 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
534 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
535 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
536 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
537 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
538 pv_entry_high_water = 9 * (pv_entry_max / 10);
540 ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
541 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
545 /***************************************************
546 * Manipulate TLBs for a pmap
547 ***************************************************/
551 pmap_invalidate_page_locally(void *arg)
553 vm_offset_t va = (uintptr_t)arg;
554 struct ia64_lpte *pte;
556 pte = (struct ia64_lpte *)ia64_thash(va);
557 if (pte->tag == ia64_ttag(va))
558 pte->tag = 1UL << 63;
559 ia64_ptc_l(va, PAGE_SHIFT << 2);
564 pmap_invalidate_page_1(void *arg)
570 oldpmap = pmap_switch(args[0]);
571 pmap_invalidate_page_locally(args[1]);
572 pmap_switch(oldpmap);
578 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
581 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
582 ("invalidating TLB for non-current pmap"));
588 args[1] = (void *)va;
589 smp_rendezvous(NULL, pmap_invalidate_page_1, NULL, args);
592 pmap_invalidate_page_locally((void *)va);
597 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
599 struct ia64_lpte *pte;
602 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
603 ("invalidating TLB for non-current pmap"));
605 vhpt_ofs = ia64_thash(va) - pmap_vhpt_base[PCPU_GET(cpuid)];
607 for (i = 0; i < MAXCPU; i++) {
608 pte = (struct ia64_lpte *)(pmap_vhpt_base[i] + vhpt_ofs);
609 if (pte->tag == ia64_ttag(va))
610 pte->tag = 1UL << 63;
613 mtx_lock_spin(&pmap_ptcmutex);
614 ia64_ptc_ga(va, PAGE_SHIFT << 2);
615 mtx_unlock_spin(&pmap_ptcmutex);
619 pmap_invalidate_all_1(void *arg)
625 addr = pmap_ptc_e_base;
626 for (i = 0; i < pmap_ptc_e_count1; i++) {
627 for (j = 0; j < pmap_ptc_e_count2; j++) {
629 addr += pmap_ptc_e_stride2;
631 addr += pmap_ptc_e_stride1;
637 pmap_invalidate_all(pmap_t pmap)
640 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
641 ("invalidating TLB for non-current pmap"));
645 smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
648 pmap_invalidate_all_1(NULL);
652 pmap_allocate_rid(void)
657 mtx_lock(&pmap_ridmutex);
658 if (pmap_ridcount == pmap_ridmax)
659 panic("pmap_allocate_rid: All Region IDs used");
661 /* Find an index with a free bit. */
662 while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
664 if (pmap_rididx == pmap_ridmapsz)
667 rid = pmap_rididx * 64;
669 /* Find a free bit. */
676 pmap_ridmap[pmap_rididx] |= bit;
678 mtx_unlock(&pmap_ridmutex);
684 pmap_free_rid(uint32_t rid)
690 bit = ~(1UL << (rid & 63));
692 mtx_lock(&pmap_ridmutex);
693 pmap_ridmap[idx] &= bit;
695 mtx_unlock(&pmap_ridmutex);
698 /***************************************************
699 * Page table page management routines.....
700 ***************************************************/
703 pmap_pinit0(struct pmap *pmap)
705 /* kernel_pmap is the same as any other pmap. */
710 * Initialize a preallocated and zeroed pmap structure,
711 * such as one in a vmspace structure.
714 pmap_pinit(struct pmap *pmap)
718 PMAP_LOCK_INIT(pmap);
719 for (i = 0; i < 5; i++)
720 pmap->pm_rid[i] = pmap_allocate_rid();
722 TAILQ_INIT(&pmap->pm_pvlist);
723 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
727 /***************************************************
728 * Pmap allocation/deallocation routines.
729 ***************************************************/
732 * Release any resources held by the given physical map.
733 * Called when a pmap initialized by pmap_pinit is being released.
734 * Should only be called if the map contains no valid mappings.
737 pmap_release(pmap_t pmap)
741 for (i = 0; i < 5; i++)
743 pmap_free_rid(pmap->pm_rid[i]);
744 PMAP_LOCK_DESTROY(pmap);
748 * grow the number of kernel page table entries, if needed
751 pmap_growkernel(vm_offset_t addr)
753 struct ia64_lpte **dir1;
754 struct ia64_lpte *leaf;
757 while (kernel_vm_end <= addr) {
758 if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
759 panic("%s: out of kernel address space", __func__);
761 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
763 nkpg = vm_page_alloc(NULL, nkpt++,
764 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
766 panic("%s: cannot add dir. page", __func__);
768 dir1 = (struct ia64_lpte **)
769 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
770 bzero(dir1, PAGE_SIZE);
771 ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
774 nkpg = vm_page_alloc(NULL, nkpt++,
775 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
777 panic("%s: cannot add PTE page", __func__);
779 leaf = (struct ia64_lpte *)
780 IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
781 bzero(leaf, PAGE_SIZE);
782 dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
784 kernel_vm_end += PAGE_SIZE * NKPTEPG;
788 /***************************************************
789 * page management routines.
790 ***************************************************/
793 * free the pv_entry back to the free list
795 static PMAP_INLINE void
796 free_pv_entry(pv_entry_t pv)
799 uma_zfree(pvzone, pv);
803 * get a new pv_entry, allocating a block from the system
807 get_pv_entry(pmap_t locked_pmap)
809 static const struct timeval printinterval = { 60, 0 };
810 static struct timeval lastprint;
811 struct vpgqueues *vpq;
812 struct ia64_lpte *pte;
813 pmap_t oldpmap, pmap;
814 pv_entry_t allocated_pv, next_pv, pv;
818 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
819 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
820 allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
821 if (allocated_pv != NULL) {
823 if (pv_entry_count > pv_entry_high_water)
826 return (allocated_pv);
830 * Reclaim pv entries: At first, destroy mappings to inactive
831 * pages. After that, if a pv entry is still needed, destroy
832 * mappings to active pages.
834 if (ratecheck(&lastprint, &printinterval))
835 printf("Approaching the limit on PV entries, "
836 "increase the vm.pmap.shpgperproc tunable.\n");
837 vpq = &vm_page_queues[PQ_INACTIVE];
839 TAILQ_FOREACH(m, &vpq->pl, pageq) {
840 if (m->hold_count || m->busy)
842 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
845 /* Avoid deadlock and lock recursion. */
846 if (pmap > locked_pmap)
848 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
850 oldpmap = pmap_switch(pmap);
851 pte = pmap_find_vhpt(va);
852 KASSERT(pte != NULL, ("pte"));
853 pmap_remove_pte(pmap, pte, va, pv, 1);
854 pmap_switch(oldpmap);
855 if (pmap != locked_pmap)
857 if (allocated_pv == NULL)
863 if (allocated_pv == NULL) {
864 if (vpq == &vm_page_queues[PQ_INACTIVE]) {
865 vpq = &vm_page_queues[PQ_ACTIVE];
868 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
870 return (allocated_pv);
874 * Conditionally create a pv entry.
877 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
881 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
882 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
883 if (pv_entry_count < pv_entry_high_water &&
884 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
888 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
889 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
890 m->md.pv_list_count++;
897 * Add an ia64_lpte to the VHPT.
900 pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
902 struct ia64_bucket *bckt;
903 struct ia64_lpte *vhpte;
906 /* Can fault, so get it out of the way. */
907 pte_pa = ia64_tpa((vm_offset_t)pte);
909 vhpte = (struct ia64_lpte *)ia64_thash(va);
910 bckt = (struct ia64_bucket *)vhpte->chain;
912 mtx_lock_spin(&bckt->mutex);
913 pte->chain = bckt->chain;
915 bckt->chain = pte_pa;
919 mtx_unlock_spin(&bckt->mutex);
923 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
924 * worked or an appropriate error code otherwise.
927 pmap_remove_vhpt(vm_offset_t va)
929 struct ia64_bucket *bckt;
930 struct ia64_lpte *pte;
931 struct ia64_lpte *lpte;
932 struct ia64_lpte *vhpte;
936 vhpte = (struct ia64_lpte *)ia64_thash(va);
937 bckt = (struct ia64_bucket *)vhpte->chain;
940 mtx_lock_spin(&bckt->mutex);
942 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
943 while (chain != 0 && pte->tag != tag) {
946 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
949 mtx_unlock_spin(&bckt->mutex);
953 /* Snip this pv_entry out of the collision chain. */
955 bckt->chain = pte->chain;
957 lpte->chain = pte->chain;
961 mtx_unlock_spin(&bckt->mutex);
966 * Find the ia64_lpte for the given va, if any.
968 static struct ia64_lpte *
969 pmap_find_vhpt(vm_offset_t va)
971 struct ia64_bucket *bckt;
972 struct ia64_lpte *pte;
976 pte = (struct ia64_lpte *)ia64_thash(va);
977 bckt = (struct ia64_bucket *)pte->chain;
979 mtx_lock_spin(&bckt->mutex);
981 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
982 while (chain != 0 && pte->tag != tag) {
984 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
986 mtx_unlock_spin(&bckt->mutex);
987 return ((chain != 0) ? pte : NULL);
991 * Remove an entry from the list of managed mappings.
994 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
997 if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
998 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
999 if (pmap == pv->pv_pmap && va == pv->pv_va)
1003 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1004 if (va == pv->pv_va)
1011 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1012 m->md.pv_list_count--;
1013 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1014 vm_page_flag_clear(m, PG_WRITEABLE);
1016 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1025 * Create a pv entry for page at pa for
1029 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1033 pv = get_pv_entry(pmap);
1037 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1038 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1039 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1040 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1041 m->md.pv_list_count++;
1045 * Routine: pmap_extract
1047 * Extract the physical page address associated
1048 * with the given map/virtual_address pair.
1051 pmap_extract(pmap_t pmap, vm_offset_t va)
1053 struct ia64_lpte *pte;
1059 oldpmap = pmap_switch(pmap);
1060 pte = pmap_find_vhpt(va);
1061 if (pte != NULL && pmap_present(pte))
1063 pmap_switch(oldpmap);
1069 * Routine: pmap_extract_and_hold
1071 * Atomically extract and hold the physical page
1072 * with the given pmap and virtual address pair
1073 * if that mapping permits the given protection.
1076 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1078 struct ia64_lpte *pte;
1083 vm_page_lock_queues();
1085 oldpmap = pmap_switch(pmap);
1086 pte = pmap_find_vhpt(va);
1087 if (pte != NULL && pmap_present(pte) &&
1088 (pmap_prot(pte) & prot) == prot) {
1089 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1092 vm_page_unlock_queues();
1093 pmap_switch(oldpmap);
1098 /***************************************************
1099 * Low level mapping routines.....
1100 ***************************************************/
1103 * Find the kernel lpte for mapping the given virtual address, which
1104 * must be in the part of region 5 which we can cover with our kernel
1107 static struct ia64_lpte *
1108 pmap_find_kpte(vm_offset_t va)
1110 struct ia64_lpte **dir1;
1111 struct ia64_lpte *leaf;
1113 KASSERT((va >> 61) == 5,
1114 ("kernel mapping 0x%lx not in region 5", va));
1115 KASSERT(va < kernel_vm_end,
1116 ("kernel mapping 0x%lx out of range", va));
1118 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1119 leaf = dir1[KPTE_DIR1_INDEX(va)];
1120 return (&leaf[KPTE_PTE_INDEX(va)]);
1124 * Find a pte suitable for mapping a user-space address. If one exists
1125 * in the VHPT, that one will be returned, otherwise a new pte is
1128 static struct ia64_lpte *
1129 pmap_find_pte(vm_offset_t va)
1131 struct ia64_lpte *pte;
1133 if (va >= VM_MAXUSER_ADDRESS)
1134 return pmap_find_kpte(va);
1136 pte = pmap_find_vhpt(va);
1138 pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1139 pte->tag = 1UL << 63;
1145 * Free a pte which is now unused. This simply returns it to the zone
1146 * allocator if it is a user mapping. For kernel mappings, clear the
1147 * valid bit to make it clear that the mapping is not currently used.
1150 pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1152 if (va < VM_MAXUSER_ADDRESS)
1153 uma_zfree(ptezone, pte);
1155 pmap_clear_present(pte);
1158 static PMAP_INLINE void
1159 pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1161 static long prot2ar[4] = {
1162 PTE_AR_R, /* VM_PROT_NONE */
1163 PTE_AR_RW, /* VM_PROT_WRITE */
1164 PTE_AR_RX|PTE_ED, /* VM_PROT_EXECUTE */
1165 PTE_AR_RWX|PTE_ED /* VM_PROT_WRITE|VM_PROT_EXECUTE */
1168 pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1169 pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1170 pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1171 ? PTE_PL_KERN : PTE_PL_USER;
1172 pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1176 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1177 * the pte was orginally valid, then its assumed to already be in the
1179 * This functions does not set the protection bits. It's expected
1180 * that those have been set correctly prior to calling this function.
1183 pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1184 boolean_t wired, boolean_t managed)
1187 pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED;
1188 pte->pte |= PTE_PRESENT | PTE_MA_WB;
1189 pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1190 pte->pte |= (wired) ? PTE_WIRED : 0;
1191 pte->pte |= pa & PTE_PPN_MASK;
1193 pte->itir = PAGE_SHIFT << 2;
1195 pte->tag = ia64_ttag(va);
1199 * Remove the (possibly managed) mapping represented by pte from the
1203 pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1204 pv_entry_t pv, int freepte)
1209 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1210 ("removing pte for non-current pmap"));
1213 * First remove from the VHPT.
1215 error = pmap_remove_vhpt(va);
1219 pmap_invalidate_page(pmap, va);
1221 if (pmap_wired(pte))
1222 pmap->pm_stats.wired_count -= 1;
1224 pmap->pm_stats.resident_count -= 1;
1225 if (pmap_managed(pte)) {
1226 m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1227 if (pmap_dirty(pte))
1229 if (pmap_accessed(pte))
1230 vm_page_flag_set(m, PG_REFERENCED);
1232 error = pmap_remove_entry(pmap, m, va, pv);
1235 pmap_free_pte(pte, va);
1241 * Extract the physical page address associated with a kernel
1245 pmap_kextract(vm_offset_t va)
1247 struct ia64_lpte *pte;
1250 KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1252 /* Regions 6 and 7 are direct mapped. */
1253 if (va >= IA64_RR_BASE(6))
1254 return (IA64_RR_MASK(va));
1256 /* EPC gateway page? */
1257 gwpage = (vm_offset_t)ia64_get_k5();
1258 if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1259 return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1261 /* Bail out if the virtual address is beyond our limits. */
1262 if (va >= kernel_vm_end)
1265 pte = pmap_find_kpte(va);
1266 if (!pmap_present(pte))
1268 return (pmap_ppn(pte) | (va & PAGE_MASK));
1272 * Add a list of wired pages to the kva this routine is only used for
1273 * temporary kernel mappings that do not need to have page modification
1274 * or references recorded. Note that old mappings are simply written
1275 * over. The page is effectively wired, but it's customary to not have
1276 * the PTE reflect that, nor update statistics.
1279 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1281 struct ia64_lpte *pte;
1284 for (i = 0; i < count; i++) {
1285 pte = pmap_find_kpte(va);
1286 if (pmap_present(pte))
1287 pmap_invalidate_page(kernel_pmap, va);
1289 pmap_enter_vhpt(pte, va);
1290 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1291 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1297 * this routine jerks page mappings from the
1298 * kernel -- it is meant only for temporary mappings.
1301 pmap_qremove(vm_offset_t va, int count)
1303 struct ia64_lpte *pte;
1306 for (i = 0; i < count; i++) {
1307 pte = pmap_find_kpte(va);
1308 if (pmap_present(pte)) {
1309 pmap_remove_vhpt(va);
1310 pmap_invalidate_page(kernel_pmap, va);
1311 pmap_clear_present(pte);
1318 * Add a wired page to the kva. As for pmap_qenter(), it's customary
1319 * to not have the PTE reflect that, nor update statistics.
1322 pmap_kenter(vm_offset_t va, vm_offset_t pa)
1324 struct ia64_lpte *pte;
1326 pte = pmap_find_kpte(va);
1327 if (pmap_present(pte))
1328 pmap_invalidate_page(kernel_pmap, va);
1330 pmap_enter_vhpt(pte, va);
1331 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1332 pmap_set_pte(pte, va, pa, FALSE, FALSE);
1336 * Remove a page from the kva
1339 pmap_kremove(vm_offset_t va)
1341 struct ia64_lpte *pte;
1343 pte = pmap_find_kpte(va);
1344 if (pmap_present(pte)) {
1345 pmap_remove_vhpt(va);
1346 pmap_invalidate_page(kernel_pmap, va);
1347 pmap_clear_present(pte);
1352 * Used to map a range of physical addresses into kernel
1353 * virtual address space.
1355 * The value passed in '*virt' is a suggested virtual address for
1356 * the mapping. Architectures which can support a direct-mapped
1357 * physical to virtual region can return the appropriate address
1358 * within that region, leaving '*virt' unchanged. Other
1359 * architectures should map the pages starting at '*virt' and
1360 * update '*virt' with the first usable address after the mapped
1364 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1366 return IA64_PHYS_TO_RR7(start);
1370 * Remove a single page from a process address space
1373 pmap_remove_page(pmap_t pmap, vm_offset_t va)
1375 struct ia64_lpte *pte;
1377 KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1378 ("removing page for non-current pmap"));
1380 pte = pmap_find_vhpt(va);
1382 pmap_remove_pte(pmap, pte, va, 0, 1);
1387 * Remove the given range of addresses from the specified map.
1389 * It is assumed that the start and end are properly
1390 * rounded to the page size.
1393 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1398 struct ia64_lpte *pte;
1400 if (pmap->pm_stats.resident_count == 0)
1403 vm_page_lock_queues();
1405 oldpmap = pmap_switch(pmap);
1408 * special handling of removing one page. a very
1409 * common operation and easy to short circuit some
1412 if (sva + PAGE_SIZE == eva) {
1413 pmap_remove_page(pmap, sva);
1417 if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1418 TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1420 if (va >= sva && va < eva) {
1421 pte = pmap_find_vhpt(va);
1422 KASSERT(pte != NULL, ("pte"));
1423 pmap_remove_pte(pmap, pte, va, pv, 1);
1427 for (va = sva; va < eva; va += PAGE_SIZE) {
1428 pte = pmap_find_vhpt(va);
1430 pmap_remove_pte(pmap, pte, va, 0, 1);
1435 vm_page_unlock_queues();
1436 pmap_switch(oldpmap);
1441 * Routine: pmap_remove_all
1443 * Removes this physical page from
1444 * all physical maps in which it resides.
1445 * Reflects back modify bits to the pager.
1448 * Original versions of this routine were very
1449 * inefficient because they iteratively called
1450 * pmap_remove (slow...)
1454 pmap_remove_all(vm_page_t m)
1459 #if defined(DIAGNOSTIC)
1461 * XXX This makes pmap_remove_all() illegal for non-managed pages!
1463 if (m->flags & PG_FICTITIOUS) {
1464 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1467 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1468 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1469 struct ia64_lpte *pte;
1470 pmap_t pmap = pv->pv_pmap;
1471 vm_offset_t va = pv->pv_va;
1474 oldpmap = pmap_switch(pmap);
1475 pte = pmap_find_vhpt(va);
1476 KASSERT(pte != NULL, ("pte"));
1477 if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1478 panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1479 pmap_remove_pte(pmap, pte, va, pv, 1);
1480 pmap_switch(oldpmap);
1483 vm_page_flag_clear(m, PG_WRITEABLE);
1487 * Set the physical protection on the
1488 * specified range of this map as requested.
1491 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1494 struct ia64_lpte *pte;
1496 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1497 pmap_remove(pmap, sva, eva);
1501 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1502 (VM_PROT_WRITE|VM_PROT_EXECUTE))
1505 if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1506 panic("pmap_protect: unaligned addresses");
1508 vm_page_lock_queues();
1510 oldpmap = pmap_switch(pmap);
1513 * If page is invalid, skip this page
1515 pte = pmap_find_vhpt(sva);
1521 if (pmap_prot(pte) != prot) {
1522 if (pmap_managed(pte)) {
1523 vm_offset_t pa = pmap_ppn(pte);
1524 vm_page_t m = PHYS_TO_VM_PAGE(pa);
1525 if (pmap_dirty(pte)) {
1527 pmap_clear_dirty(pte);
1529 if (pmap_accessed(pte)) {
1530 vm_page_flag_set(m, PG_REFERENCED);
1531 pmap_clear_accessed(pte);
1534 pmap_pte_prot(pmap, pte, prot);
1535 pmap_invalidate_page(pmap, sva);
1540 vm_page_unlock_queues();
1541 pmap_switch(oldpmap);
1546 * Insert the given physical page (p) at
1547 * the specified virtual address (v) in the
1548 * target physical map with the protection requested.
1550 * If specified, the page will be wired down, meaning
1551 * that the related pte can not be reclaimed.
1553 * NB: This is the only routine which MAY NOT lazy-evaluate
1554 * or lose information. That is, this routine must actually
1555 * insert this page into the given map NOW.
1558 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1559 vm_prot_t prot, boolean_t wired)
1564 struct ia64_lpte origpte;
1565 struct ia64_lpte *pte;
1568 vm_page_lock_queues();
1570 oldpmap = pmap_switch(pmap);
1574 if (va > VM_MAX_KERNEL_ADDRESS)
1575 panic("pmap_enter: toobig");
1579 * Find (or create) a pte for the given mapping.
1581 while ((pte = pmap_find_pte(va)) == NULL) {
1582 pmap_switch(oldpmap);
1584 vm_page_unlock_queues();
1586 vm_page_lock_queues();
1588 oldpmap = pmap_switch(pmap);
1591 if (!pmap_present(pte)) {
1593 pmap_enter_vhpt(pte, va);
1595 opa = pmap_ppn(pte);
1597 pa = VM_PAGE_TO_PHYS(m);
1600 * Mapping has not changed, must be protection or wiring change.
1604 * Wiring change, just update stats. We don't worry about
1605 * wiring PT pages as they remain resident as long as there
1606 * are valid mappings in them. Hence, if a user page is wired,
1607 * the PT page will be also.
1609 if (wired && !pmap_wired(&origpte))
1610 pmap->pm_stats.wired_count++;
1611 else if (!wired && pmap_wired(&origpte))
1612 pmap->pm_stats.wired_count--;
1614 managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1617 * We might be turning off write access to the page,
1618 * so we go ahead and sense modify status.
1620 if (managed && pmap_dirty(&origpte))
1623 pmap_invalidate_page(pmap, va);
1628 * Mapping has changed, invalidate old range and fall
1629 * through to handle validating new mapping.
1632 pmap_remove_pte(pmap, pte, va, 0, 0);
1633 pmap_enter_vhpt(pte, va);
1637 * Enter on the PV list if part of our managed memory.
1639 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1640 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1641 ("pmap_enter: managed mapping within the clean submap"));
1642 pmap_insert_entry(pmap, va, m);
1647 * Increment counters
1649 pmap->pm_stats.resident_count++;
1651 pmap->pm_stats.wired_count++;
1656 * Now validate mapping with desired protection/wiring. This
1657 * adds the pte to the VHPT if necessary.
1659 pmap_pte_prot(pmap, pte, prot);
1660 pmap_set_pte(pte, va, pa, wired, managed);
1662 if ((prot & VM_PROT_WRITE) != 0)
1663 vm_page_flag_set(m, PG_WRITEABLE);
1664 vm_page_unlock_queues();
1665 pmap_switch(oldpmap);
1670 * Maps a sequence of resident pages belonging to the same object.
1671 * The sequence begins with the given page m_start. This page is
1672 * mapped at the given virtual address start. Each subsequent page is
1673 * mapped at a virtual address that is offset from start by the same
1674 * amount as the page is offset from m_start within the object. The
1675 * last page in the sequence is the page with the largest offset from
1676 * m_start that can be mapped at a virtual address less than the given
1677 * virtual address end. Not every virtual page between start and end
1678 * is mapped; only those for which a resident page exists with the
1679 * corresponding offset from m_start are mapped.
1682 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1683 vm_page_t m_start, vm_prot_t prot)
1687 vm_pindex_t diff, psize;
1689 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1690 psize = atop(end - start);
1693 oldpmap = pmap_switch(pmap);
1694 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1695 pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1696 m = TAILQ_NEXT(m, listq);
1698 pmap_switch(oldpmap);
1703 * this code makes some *MAJOR* assumptions:
1704 * 1. Current pmap & pmap exists.
1707 * 4. No page table pages.
1708 * but is *MUCH* faster than pmap_enter...
1712 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1717 oldpmap = pmap_switch(pmap);
1718 pmap_enter_quick_locked(pmap, va, m, prot);
1719 pmap_switch(oldpmap);
1724 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1727 struct ia64_lpte *pte;
1730 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1731 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1732 ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1733 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1734 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1736 if ((pte = pmap_find_pte(va)) == NULL)
1739 if (!pmap_present(pte)) {
1740 /* Enter on the PV list if the page is managed. */
1741 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1742 if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1743 pmap_free_pte(pte, va);
1750 /* Increment counters. */
1751 pmap->pm_stats.resident_count++;
1753 /* Initialise with R/O protection and enter into VHPT. */
1754 pmap_enter_vhpt(pte, va);
1755 pmap_pte_prot(pmap, pte,
1756 prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1757 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1762 * pmap_object_init_pt preloads the ptes for a given object
1763 * into the specified pmap. This eliminates the blast of soft
1764 * faults on process startup and immediately after an mmap.
1767 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1768 vm_object_t object, vm_pindex_t pindex,
1772 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1773 KASSERT(object->type == OBJT_DEVICE,
1774 ("pmap_object_init_pt: non-device object"));
1778 * Routine: pmap_change_wiring
1779 * Function: Change the wiring attribute for a map/virtual-address
1781 * In/out conditions:
1782 * The mapping must already exist in the pmap.
1785 pmap_change_wiring(pmap, va, wired)
1786 register pmap_t pmap;
1791 struct ia64_lpte *pte;
1794 oldpmap = pmap_switch(pmap);
1796 pte = pmap_find_vhpt(va);
1797 KASSERT(pte != NULL, ("pte"));
1798 if (wired && !pmap_wired(pte)) {
1799 pmap->pm_stats.wired_count++;
1800 pmap_set_wired(pte);
1801 } else if (!wired && pmap_wired(pte)) {
1802 pmap->pm_stats.wired_count--;
1803 pmap_clear_wired(pte);
1806 pmap_switch(oldpmap);
1813 * Copy the range specified by src_addr/len
1814 * from the source map to the range dst_addr/len
1815 * in the destination map.
1817 * This routine is only advisory and need not do anything.
1821 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1822 vm_offset_t src_addr)
1828 * pmap_zero_page zeros the specified hardware page by
1829 * mapping it into virtual memory and using bzero to clear
1834 pmap_zero_page(vm_page_t m)
1836 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1837 bzero((caddr_t) va, PAGE_SIZE);
1842 * pmap_zero_page_area zeros the specified hardware page by
1843 * mapping it into virtual memory and using bzero to clear
1846 * off and size must reside within a single page.
1850 pmap_zero_page_area(vm_page_t m, int off, int size)
1852 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1853 bzero((char *)(caddr_t)va + off, size);
1858 * pmap_zero_page_idle zeros the specified hardware page by
1859 * mapping it into virtual memory and using bzero to clear
1860 * its contents. This is for the vm_idlezero process.
1864 pmap_zero_page_idle(vm_page_t m)
1866 vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1867 bzero((caddr_t) va, PAGE_SIZE);
1872 * pmap_copy_page copies the specified (machine independent)
1873 * page by mapping the page into virtual memory and using
1874 * bcopy to copy the page, one machine dependent page at a
1878 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1880 vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1881 vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1882 bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1886 * Returns true if the pmap's pv is one of the first
1887 * 16 pvs linked to from this page. This count may
1888 * be changed upwards or downwards in the future; it
1889 * is only necessary that true be returned for a small
1890 * subset of pmaps for proper page aging.
1893 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1898 if (m->flags & PG_FICTITIOUS)
1902 * Not found, check current mappings returning immediately if found.
1904 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1905 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1906 if (pv->pv_pmap == pmap) {
1917 * pmap_page_wired_mappings:
1919 * Return the number of managed mappings to the given physical page
1923 pmap_page_wired_mappings(vm_page_t m)
1925 struct ia64_lpte *pte;
1926 pmap_t oldpmap, pmap;
1931 if ((m->flags & PG_FICTITIOUS) != 0)
1933 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1934 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1937 oldpmap = pmap_switch(pmap);
1938 pte = pmap_find_vhpt(pv->pv_va);
1939 KASSERT(pte != NULL, ("pte"));
1940 if (pmap_wired(pte))
1942 pmap_switch(oldpmap);
1949 * Remove all pages from specified address space
1950 * this aids process exit speeds. Also, this code
1951 * is special cased for current process only, but
1952 * can have the more generic (and slightly slower)
1953 * mode enabled. This is much faster than pmap_remove
1954 * in the case of running down an entire address space.
1957 pmap_remove_pages(pmap_t pmap)
1962 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1963 printf("warning: pmap_remove_pages called with non-current pmap\n");
1967 vm_page_lock_queues();
1969 oldpmap = pmap_switch(pmap);
1971 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1972 struct ia64_lpte *pte;
1974 npv = TAILQ_NEXT(pv, pv_plist);
1976 pte = pmap_find_vhpt(pv->pv_va);
1977 KASSERT(pte != NULL, ("pte"));
1978 if (!pmap_wired(pte))
1979 pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1982 pmap_switch(oldpmap);
1984 vm_page_unlock_queues();
1988 * pmap_ts_referenced:
1990 * Return a count of reference bits for a page, clearing those bits.
1991 * It is not necessary for every reference bit to be cleared, but it
1992 * is necessary that 0 only be returned when there are truly no
1993 * reference bits set.
1995 * XXX: The exact number of bits to check and clear is a matter that
1996 * should be tested and standardized at some point in the future for
1997 * optimal aging of shared pages.
2000 pmap_ts_referenced(vm_page_t m)
2002 struct ia64_lpte *pte;
2007 if (m->flags & PG_FICTITIOUS)
2010 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2011 PMAP_LOCK(pv->pv_pmap);
2012 oldpmap = pmap_switch(pv->pv_pmap);
2013 pte = pmap_find_vhpt(pv->pv_va);
2014 KASSERT(pte != NULL, ("pte"));
2015 if (pmap_accessed(pte)) {
2017 pmap_clear_accessed(pte);
2018 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2020 pmap_switch(oldpmap);
2021 PMAP_UNLOCK(pv->pv_pmap);
2030 * Return whether or not the specified physical page was modified
2031 * in any physical maps.
2034 pmap_is_modified(vm_page_t m)
2036 struct ia64_lpte *pte;
2042 if (m->flags & PG_FICTITIOUS)
2045 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2046 PMAP_LOCK(pv->pv_pmap);
2047 oldpmap = pmap_switch(pv->pv_pmap);
2048 pte = pmap_find_vhpt(pv->pv_va);
2049 pmap_switch(oldpmap);
2050 KASSERT(pte != NULL, ("pte"));
2051 rv = pmap_dirty(pte) ? TRUE : FALSE;
2052 PMAP_UNLOCK(pv->pv_pmap);
2061 * pmap_is_prefaultable:
2063 * Return whether or not the specified virtual address is elgible
2067 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2069 struct ia64_lpte *pte;
2071 pte = pmap_find_vhpt(addr);
2072 if (pte != NULL && pmap_present(pte))
2078 * Clear the modify bits on the specified physical page.
2081 pmap_clear_modify(vm_page_t m)
2083 struct ia64_lpte *pte;
2087 if (m->flags & PG_FICTITIOUS)
2090 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2091 PMAP_LOCK(pv->pv_pmap);
2092 oldpmap = pmap_switch(pv->pv_pmap);
2093 pte = pmap_find_vhpt(pv->pv_va);
2094 KASSERT(pte != NULL, ("pte"));
2095 if (pmap_dirty(pte)) {
2096 pmap_clear_dirty(pte);
2097 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2099 pmap_switch(oldpmap);
2100 PMAP_UNLOCK(pv->pv_pmap);
2105 * pmap_clear_reference:
2107 * Clear the reference bit on the specified physical page.
2110 pmap_clear_reference(vm_page_t m)
2112 struct ia64_lpte *pte;
2116 if (m->flags & PG_FICTITIOUS)
2119 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2120 PMAP_LOCK(pv->pv_pmap);
2121 oldpmap = pmap_switch(pv->pv_pmap);
2122 pte = pmap_find_vhpt(pv->pv_va);
2123 KASSERT(pte != NULL, ("pte"));
2124 if (pmap_accessed(pte)) {
2125 pmap_clear_accessed(pte);
2126 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2128 pmap_switch(oldpmap);
2129 PMAP_UNLOCK(pv->pv_pmap);
2134 * Clear the write and modified bits in each of the given page's mappings.
2137 pmap_remove_write(vm_page_t m)
2139 struct ia64_lpte *pte;
2140 pmap_t oldpmap, pmap;
2144 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2145 if ((m->flags & PG_FICTITIOUS) != 0 ||
2146 (m->flags & PG_WRITEABLE) == 0)
2148 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2151 oldpmap = pmap_switch(pmap);
2152 pte = pmap_find_vhpt(pv->pv_va);
2153 KASSERT(pte != NULL, ("pte"));
2154 prot = pmap_prot(pte);
2155 if ((prot & VM_PROT_WRITE) != 0) {
2156 if (pmap_dirty(pte)) {
2158 pmap_clear_dirty(pte);
2160 prot &= ~VM_PROT_WRITE;
2161 pmap_pte_prot(pmap, pte, prot);
2162 pmap_invalidate_page(pmap, pv->pv_va);
2164 pmap_switch(oldpmap);
2167 vm_page_flag_clear(m, PG_WRITEABLE);
2171 * Map a set of physical memory pages into the kernel virtual
2172 * address space. Return a pointer to where it is mapped. This
2173 * routine is intended to be used for mapping device memory,
2177 pmap_mapdev(vm_offset_t pa, vm_size_t size)
2179 return (void*) IA64_PHYS_TO_RR6(pa);
2183 * 'Unmap' a range mapped by pmap_mapdev().
2186 pmap_unmapdev(vm_offset_t va, vm_size_t size)
2192 * perform the pmap work for mincore
2195 pmap_mincore(pmap_t pmap, vm_offset_t addr)
2198 struct ia64_lpte *pte, tpte;
2202 oldpmap = pmap_switch(pmap);
2203 pte = pmap_find_vhpt(addr);
2208 pmap_switch(oldpmap);
2214 if (pmap_present(pte)) {
2218 val = MINCORE_INCORE;
2219 if (!pmap_managed(pte))
2224 m = PHYS_TO_VM_PAGE(pa);
2229 if (pmap_dirty(pte))
2230 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2233 * Modified by someone
2235 vm_page_lock_queues();
2236 if (pmap_is_modified(m))
2237 val |= MINCORE_MODIFIED_OTHER;
2238 vm_page_unlock_queues();
2243 if (pmap_accessed(pte))
2244 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2247 * Referenced by someone
2249 vm_page_lock_queues();
2250 if (pmap_ts_referenced(m)) {
2251 val |= MINCORE_REFERENCED_OTHER;
2252 vm_page_flag_set(m, PG_REFERENCED);
2254 vm_page_unlock_queues();
2261 pmap_activate(struct thread *td)
2263 pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2267 pmap_switch(pmap_t pm)
2273 prevpm = PCPU_GET(current_pmap);
2277 atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2279 for (i = 0; i < 5; i++) {
2280 ia64_set_rr(IA64_RR_BASE(i),
2281 (i << 8)|(PAGE_SHIFT << 2)|1);
2284 for (i = 0; i < 5; i++) {
2285 ia64_set_rr(IA64_RR_BASE(i),
2286 (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2288 atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2290 PCPU_SET(current_pmap, pm);
2299 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2305 #include "opt_ddb.h"
2309 #include <ddb/ddb.h>
2311 static const char* psnames[] = {
2312 "1B", "2B", "4B", "8B",
2313 "16B", "32B", "64B", "128B",
2314 "256B", "512B", "1K", "2K",
2315 "4K", "8K", "16K", "32K",
2316 "64K", "128K", "256K", "512K",
2317 "1M", "2M", "4M", "8M",
2318 "16M", "32M", "64M", "128M",
2319 "256M", "512M", "1G", "2G"
2325 struct ia64_pal_result res;
2333 static const char *manames[] = {
2334 "WB", "bad", "bad", "bad",
2335 "UC", "UCE", "WC", "NaT",
2338 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2339 if (res.pal_status != 0) {
2340 db_printf("Can't get VM summary\n");
2345 maxtr = (res.pal_result[0] >> 40) & 0xff;
2347 maxtr = (res.pal_result[0] >> 32) & 0xff;
2349 db_printf("V RID Virtual Page Physical Page PgSz ED AR PL D A MA P KEY\n");
2350 for (i = 0; i <= maxtr; i++) {
2351 bzero(&buf, sizeof(buf));
2352 res = ia64_call_pal_stacked_physical
2353 (PAL_VM_TR_READ, i, type, ia64_tpa((uint64_t) &buf));
2354 if (!(res.pal_result[0] & 1))
2355 buf.pte &= ~PTE_AR_MASK;
2356 if (!(res.pal_result[0] & 2))
2357 buf.pte &= ~PTE_PL_MASK;
2358 if (!(res.pal_result[0] & 4))
2359 pmap_clear_dirty(&buf);
2360 if (!(res.pal_result[0] & 8))
2361 buf.pte &= ~PTE_MA_MASK;
2362 db_printf("%d %06x %013lx %013lx %4s %d %d %d %d %d %-3s "
2363 "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2364 buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2365 psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2366 (buf.pte & PTE_ED) ? 1 : 0,
2367 (int)(buf.pte & PTE_AR_MASK) >> 9,
2368 (int)(buf.pte & PTE_PL_MASK) >> 7,
2369 (pmap_dirty(&buf)) ? 1 : 0,
2370 (pmap_accessed(&buf)) ? 1 : 0,
2371 manames[(buf.pte & PTE_MA_MASK) >> 2],
2372 (pmap_present(&buf)) ? 1 : 0,
2373 (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2377 DB_COMMAND(itr, db_itr)
2382 DB_COMMAND(dtr, db_dtr)
2387 DB_COMMAND(rr, db_rr)
2393 printf("RR RID PgSz VE\n");
2394 for (i = 0; i < 8; i++) {
2395 __asm __volatile ("mov %0=rr[%1]"
2397 : "r"(IA64_RR_BASE(i)));
2398 *(uint64_t *) &rr = t;
2399 printf("%d %06x %4s %d\n",
2400 i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2404 DB_COMMAND(thash, db_thash)
2409 db_printf("%p\n", (void *) ia64_thash(addr));
2412 DB_COMMAND(ttag, db_ttag)
2417 db_printf("0x%lx\n", ia64_ttag(addr));
2420 DB_COMMAND(kpte, db_kpte)
2422 struct ia64_lpte *pte;
2425 db_printf("usage: kpte <kva>\n");
2428 if (addr < VM_MIN_KERNEL_ADDRESS) {
2429 db_printf("kpte: error: invalid <kva>\n");
2432 pte = pmap_find_kpte(addr);
2433 db_printf("kpte at %p:\n", pte);
2434 db_printf(" pte =%016lx\n", pte->pte);
2435 db_printf(" itir =%016lx\n", pte->itir);
2436 db_printf(" tag =%016lx\n", pte->tag);
2437 db_printf(" chain=%016lx\n", pte->chain);