2 * Copyright (c) 2006 Kip Macy <kmacy@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include "opt_kstack_pages.h"
32 #include "opt_msgbuf.h"
34 #include "opt_trap_trace.h"
36 #include <sys/param.h>
37 #include <sys/kernel.h>
41 #include <sys/msgbuf.h>
42 #include <sys/mutex.h>
45 #include <sys/sched.h>
46 #include <sys/sysctl.h>
47 #include <sys/systm.h>
48 #include <sys/vmmeter.h>
50 #include <dev/ofw/openfirm.h>
53 #include <vm/vm_page.h>
54 #include <vm/vm_param.h>
55 #include <vm/vm_kern.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_extern.h>
59 #include <vm/vm_pageout.h>
60 #include <vm/vm_pager.h>
61 #include <vm/vm_phys.h>
64 #include <machine/cpu.h>
65 #include <machine/frame.h>
66 #include <machine/instr.h>
67 #include <machine/md_var.h>
68 #include <machine/metadata.h>
69 #include <machine/ofw_mem.h>
70 #include <machine/mmu.h>
71 #include <machine/smp.h>
72 #include <machine/tlb.h>
73 #include <machine/tte.h>
74 #include <machine/tte_hash.h>
75 #include <machine/pcb.h>
76 #include <machine/pstate.h>
77 #include <machine/tsb.h>
79 #include <machine/hypervisorvar.h>
80 #include <machine/hv_api.h>
83 void trap_trace_report(int);
89 #ifndef PMAP_SHPGPERPROC
90 #define PMAP_SHPGPERPROC 200
94 * Virtual and physical address of message buffer.
96 struct msgbuf *msgbufp;
97 vm_paddr_t msgbuf_phys;
100 * Map of physical memory reagions.
102 vm_paddr_t phys_avail[128];
103 vm_paddr_t phys_avail_tmp[128];
104 static struct ofw_mem_region mra[128];
105 static struct ofw_map translations[128];
106 static int translations_size;
109 struct ofw_mem_region sparc64_memreg[128];
112 extern vm_paddr_t mmu_fault_status_area;
115 * First and last available kernel virtual addresses.
117 vm_offset_t virtual_avail;
118 vm_offset_t virtual_end;
119 vm_offset_t kernel_vm_end;
120 vm_offset_t vm_max_kernel_address;
122 #ifndef PMAP_SHPGPERPROC
123 #define PMAP_SHPGPERPROC 200
126 * Data for the pv entry allocation mechanism
128 static uma_zone_t pvzone;
129 static struct vm_object pvzone_obj;
130 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
132 static int pmap_debug_range = 1;
133 static int use_256M_pages = 1;
135 static struct mtx pmap_ctx_lock;
136 static uint16_t ctx_stack[PMAP_CONTEXT_MAX];
137 static int ctx_stack_top;
139 static int permanent_mappings = 0;
140 static uint64_t nucleus_memory;
141 static uint64_t nucleus_mappings[4];
145 struct pmap kernel_pmap_store;
147 hv_tsb_info_t kernel_td[MAX_TSB_INFO];
150 * This should be determined at boot time
151 * with tiny TLBS it doesn't make sense to try and selectively
152 * invalidate more than this
154 #define MAX_INVALIDATES 32
155 #define MAX_TSB_CLEARS 128
158 * Allocate physical memory for use in pmap_bootstrap.
160 static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size);
163 * If user pmap is processed with pmap_remove and with pmap_remove and the
164 * resident count drops to 0, there are no more pages to remove, so we
167 #define PMAP_REMOVE_DONE(pm) \
168 ((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
171 * Kernel MMU interface
173 #define curthread_pmap vmspace_pmap(curthread->td_proc->p_vmspace)
176 #define KDPRINTF if (pmap_debug) printf
178 if (curthread_pmap && (curthread_pmap->pm_context != 0) && ((PCPU_GET(cpumask) & curthread_pmap->pm_active) == 0)) \
179 panic("cpumask(0x%x) & active (0x%x) == 0 pid == %d\n", \
180 PCPU_GET(cpumask), curthread_pmap->pm_active, curthread->td_proc->p_pid); \
181 if (pmap_debug) printf
186 #define KDPRINTF(...)
190 static void free_pv_entry(pv_entry_t pv);
191 static pv_entry_t get_pv_entry(pmap_t locked_pmap);
193 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
194 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va);
195 static void pmap_remove_tte(pmap_t pmap, tte_t tte_data, vm_offset_t va);
196 static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot);
197 static void pmap_tsb_reset(pmap_t pmap);
198 static void pmap_tsb_resize(pmap_t pmap);
199 static void pmap_tte_hash_resize(pmap_t pmap);
201 void pmap_set_ctx_panic(uint64_t error, vm_paddr_t tsb_ra, pmap_t pmap);
203 struct tsb_resize_info {
204 uint64_t tri_tsbscratch;
209 * Quick sort callout for comparing memory regions.
211 static int mr_cmp(const void *a, const void *b);
212 static int om_cmp(const void *a, const void *b);
214 mr_cmp(const void *a, const void *b)
216 const struct ofw_mem_region *mra;
217 const struct ofw_mem_region *mrb;
221 if (mra->mr_start < mrb->mr_start)
223 else if (mra->mr_start > mrb->mr_start)
229 om_cmp(const void *a, const void *b)
231 const struct ofw_map *oma;
232 const struct ofw_map *omb;
236 if (oma->om_start < omb->om_start)
238 else if (oma->om_start > omb->om_start)
245 free_context(uint16_t ctx)
247 mtx_lock_spin(&pmap_ctx_lock);
248 ctx_stack[ctx_stack_top++] = ctx;
249 mtx_unlock_spin(&pmap_ctx_lock);
251 KASSERT(ctx_stack_top < PMAP_CONTEXT_MAX,
252 ("context stack overrun - system error"));
255 static __inline uint16_t
260 mtx_lock_spin(&pmap_ctx_lock);
261 ctx = ctx_stack[--ctx_stack_top];
262 mtx_unlock_spin(&pmap_ctx_lock);
264 KASSERT(ctx_stack_top > 0,
265 ("context stack underrun - need to implement context stealing"));
271 free_pv_entry(pv_entry_t pv)
274 uma_zfree(pvzone, pv);
278 * get a new pv_entry, allocating a block from the system
282 get_pv_entry(pmap_t locked_pmap)
284 static const struct timeval printinterval = { 60, 0 };
285 static struct timeval lastprint;
286 struct vpgqueues *vpq;
289 pv_entry_t allocated_pv, next_pv, pv;
293 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
294 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
295 allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
296 if (allocated_pv != NULL) {
298 if (pv_entry_count > pv_entry_high_water)
301 return (allocated_pv);
305 * Reclaim pv entries: At first, destroy mappings to inactive
306 * pages. After that, if a pv entry is still needed, destroy
307 * mappings to active pages.
309 if (ratecheck(&lastprint, &printinterval))
310 printf("Approaching the limit on PV entries, "
311 "increase the vm.pmap.shpgperproc tunable.\n");
313 vpq = &vm_page_queues[PQ_INACTIVE];
315 TAILQ_FOREACH(m, &vpq->pl, pageq) {
316 if (m->hold_count || m->busy)
318 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
321 /* Avoid deadlock and lock recursion. */
322 if (pmap > locked_pmap)
324 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
326 pmap->pm_stats.resident_count--;
328 tte_data = tte_hash_delete(pmap->pm_hash, va);
330 KASSERT((tte_data & VTD_WIRED) == 0,
331 ("get_pv_entry: wired pte %#jx", (uintmax_t)tte_data));
332 if (tte_data & VTD_REF)
333 vm_page_flag_set(m, PG_REFERENCED);
334 if (tte_data & VTD_W) {
335 KASSERT((tte_data & VTD_SW_W),
336 ("get_pv_entry: modified page not writable: va: %lx, tte: %lx",
341 pmap_invalidate_page(pmap, va, TRUE);
342 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
343 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
344 if (TAILQ_EMPTY(&m->md.pv_list))
345 vm_page_flag_clear(m, PG_WRITEABLE);
346 m->md.pv_list_count--;
348 if (pmap != locked_pmap)
350 if (allocated_pv == NULL)
356 if (allocated_pv == NULL) {
357 if (vpq == &vm_page_queues[PQ_INACTIVE]) {
358 vpq = &vm_page_queues[PQ_ACTIVE];
361 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
363 return (allocated_pv);
367 * Allocate a physical page of memory directly from the phys_avail map.
368 * Can only be called from pmap_bootstrap before avail start and end are
372 pmap_bootstrap_alloc(vm_size_t size)
377 size = round_page(size);
379 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
380 if (phys_avail[i + 1] - phys_avail[i] < size)
383 phys_avail[i] += size;
384 pmap_scrub_pages(pa, size);
387 panic("pmap_bootstrap_alloc");
391 * Activate a user pmap. The pmap must be activated before its address space
392 * can be accessed in any way.
395 pmap_activate(struct thread *td)
397 pmap_t pmap, oldpmap;
401 pmap = vmspace_pmap(td->td_proc->p_vmspace);
402 oldpmap = PCPU_GET(curpmap);
404 atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
405 atomic_set_int(&pmap->pm_tlbactive, PCPU_GET(cpumask));
406 atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
408 oldpmap->pm_active &= ~1;
409 pmap->pm_active |= 1;
410 pmap->pm_tlbactive |= 1;
413 pmap->pm_hashscratch = tte_hash_set_scratchpad_user(pmap->pm_hash, pmap->pm_context);
414 pmap->pm_tsbscratch = tsb_set_scratchpad_user(&pmap->pm_tsb);
415 pmap->pm_tsb_miss_count = pmap->pm_tsb_cap_miss_count = 0;
417 PCPU_SET(curpmap, pmap);
418 if (pmap->pm_context != 0)
419 if ((err = hv_mmu_tsb_ctxnon0(1, pmap->pm_tsb_ra)) != H_EOK)
420 panic("failed to set TSB 0x%lx - context == %ld\n",
421 pmap->pm_tsb_ra, pmap->pm_context);
422 stxa(MMU_CID_S, ASI_MMU_CONTEXTID, pmap->pm_context);
428 pmap_addr_hint(vm_object_t object, vm_offset_t va, vm_size_t size)
434 * Bootstrap the system enough to run with virtual memory.
437 pmap_bootstrap(vm_offset_t ekva)
441 vm_paddr_t pa, tsb_8k_pa, tsb_4m_pa, kernel_hash_pa, nucleus_memory_start;
442 vm_size_t physsz, virtsz, kernel_hash_shift;
443 ihandle_t pmem, vmem;
445 uint64_t tsb_8k_size, tsb_4m_size, error, physmem_tunable, physmemstart_tunable;
446 vm_paddr_t real_phys_avail[128], tmp_phys_avail[128], bounds;
449 if ((vmem = OF_finddevice("/virtual-memory")) == -1)
450 panic("pmap_bootstrap: finddevice /virtual-memory");
451 if ((sz = OF_getproplen(vmem, "translations")) == -1)
452 panic("pmap_bootstrap: getproplen translations");
453 if (sizeof(translations) < sz)
454 panic("pmap_bootstrap: translations too small");
455 bzero(translations, sz);
456 if (OF_getprop(vmem, "translations", translations, sz) == -1)
457 panic("pmap_bootstrap: getprop /virtual-memory/translations");
458 sz /= sizeof(*translations);
459 translations_size = sz;
460 nucleus_memory_start = 0;
461 CTR0(KTR_PMAP, "pmap_bootstrap: translations");
462 qsort(translations, sz, sizeof (*translations), om_cmp);
464 for (i = 0; i < sz; i++) {
465 KDPRINTF("om_size=%ld om_start=%lx om_tte=%lx\n",
466 translations[i].om_size, translations[i].om_start,
467 translations[i].om_tte);
468 if ((translations[i].om_start >= KERNBASE) &&
469 (translations[i].om_start <= KERNBASE + 3*PAGE_SIZE_4M)) {
470 for (j = 0; j < translations[i].om_size; j += PAGE_SIZE_4M) {
471 KDPRINTF("mapping permanent translation\n");
472 pa = TTE_GET_PA(translations[i].om_tte) + j;
473 va = translations[i].om_start + j;
474 error = hv_mmu_map_perm_addr(va, KCONTEXT,
475 pa | TTE_KERNEL | VTD_4M, MAP_ITLB | MAP_DTLB);
477 panic("map_perm_addr returned error=%ld", error);
479 if ((nucleus_memory_start == 0) || (pa < nucleus_memory_start))
480 nucleus_memory_start = pa;
481 printf("nucleus_mappings[%d] = 0x%lx\n", permanent_mappings, pa);
482 nucleus_mappings[permanent_mappings++] = pa;
483 nucleus_memory += PAGE_SIZE_4M;
485 mp_add_nucleus_mapping(va, pa|TTE_KERNEL|VTD_4M);
492 * Find out what physical memory is available from the prom and
493 * initialize the phys_avail array. This must be done before
494 * pmap_bootstrap_alloc is called.
496 if ((pmem = OF_finddevice("/memory")) == -1)
497 panic("pmap_bootstrap: finddevice /memory");
498 if ((sz = OF_getproplen(pmem, "available")) == -1)
499 panic("pmap_bootstrap: getproplen /memory/available");
500 if (sizeof(vm_paddr_t)*128 < sz) /* FIXME */
501 panic("pmap_bootstrap: phys_avail too small");
502 if (sizeof(mra) < sz)
503 panic("pmap_bootstrap: mra too small");
505 if (OF_getprop(pmem, "available", mra, sz) == -1)
506 panic("pmap_bootstrap: getprop /memory/available");
509 CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
511 qsort(mra, sz, sizeof (*mra), mr_cmp);
512 physmemstart_tunable = physmem_tunable = physmem = physsz = 0;
514 if (TUNABLE_ULONG_FETCH("hw.physmemstart", &physmemstart_tunable)) {
515 KDPRINTF("desired physmemstart=0x%lx\n", physmemstart_tunable);
517 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) {
518 physmem = atop(physmem_tunable);
519 KDPRINTF("desired physmem=0x%lx\n", physmem_tunable);
521 if ((physmem_tunable != 0) && (physmemstart_tunable != 0))
522 physmem_tunable += physmemstart_tunable;
524 bzero(real_phys_avail, sizeof(real_phys_avail));
525 bzero(tmp_phys_avail, sizeof(tmp_phys_avail));
527 for (i = 0, j = 0; i < sz; i++) {
529 KDPRINTF("start=%#lx size=%#lx\n", mra[i].mr_start, mra[i].mr_size);
530 if (mra[i].mr_size < PAGE_SIZE_4M)
533 if ((mra[i].mr_start & PAGE_MASK_4M) || (mra[i].mr_size & PAGE_MASK_4M)) {
534 uint64_t newstart, roundup;
535 newstart = ((mra[i].mr_start + (PAGE_MASK_4M)) & ~PAGE_MASK_4M);
536 roundup = newstart - mra[i].mr_start;
537 size = (mra[i].mr_size - roundup) & ~PAGE_MASK_4M;
538 mra[i].mr_start = newstart;
539 if (size < PAGE_SIZE_4M)
541 mra[i].mr_size = size;
543 real_phys_avail[j] = mra[i].mr_start;
544 if (physmem_tunable != 0 && ((physsz + mra[i].mr_size) >= physmem_tunable)) {
545 mra[i].mr_size = physmem_tunable - physsz;
546 physsz = physmem_tunable;
547 real_phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
550 physsz += mra[i].mr_size;
551 real_phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
554 physmem = btoc(physsz - physmemstart_tunable);
557 * This is needed for versions of OFW that would allocate us memory
558 * and then forget to remove it from the available ranges ...
559 * as well as for compensating for the above move of nucleus pages
561 for (i = 0, j = 0, bounds = (1UL<<32); real_phys_avail[i] != 0; i += 2) {
562 vm_paddr_t start = real_phys_avail[i];
563 uint64_t end = real_phys_avail[i + 1];
564 CTR2(KTR_PMAP, "start=%#lx size=%#lx\n", start, end);
565 KDPRINTF("real_phys start=%#lx end=%#lx\n", start, end);
567 * Is kernel memory at the beginning of range?
569 if (nucleus_memory_start == start) {
570 start += nucleus_memory;
573 * Is kernel memory at the end of range?
575 if (nucleus_memory_start == (end - nucleus_memory))
576 end -= nucleus_memory;
578 if (physmemstart_tunable != 0 &&
579 (end < physmemstart_tunable))
582 if (physmemstart_tunable != 0 &&
583 ((start < physmemstart_tunable))) {
584 start = physmemstart_tunable;
588 * Is kernel memory in the middle somewhere?
590 if ((nucleus_memory_start > start) &&
591 (nucleus_memory_start < end)) {
592 phys_avail[j] = start;
593 phys_avail[j+1] = nucleus_memory_start;
594 start = nucleus_memory_start + nucleus_memory;
598 * Break phys_avail up on 4GB boundaries to try
599 * to work around PCI-e allocation bug
600 * we rely on the fact that kernel memory is allocated
601 * from the first 4GB of physical memory
603 while (bounds < start)
606 while (bounds < end) {
607 phys_avail[j] = start;
608 phys_avail[j + 1] = bounds;
613 phys_avail[j] = start;
614 phys_avail[j + 1] = end;
619 * Merge nucleus memory in to real_phys_avail
622 for (i = 0; real_phys_avail[i] != 0; i += 2) {
623 if (real_phys_avail[i] == nucleus_memory_start + nucleus_memory)
624 real_phys_avail[i] -= nucleus_memory;
626 if (real_phys_avail[i + 1] == nucleus_memory_start)
627 real_phys_avail[i + 1] += nucleus_memory;
629 if (real_phys_avail[i + 1] == real_phys_avail[i + 2]) {
630 real_phys_avail[i + 1] = real_phys_avail[i + 3];
631 for (k = i + 2; real_phys_avail[k] != 0; k += 2) {
632 real_phys_avail[k] = real_phys_avail[k + 2];
633 real_phys_avail[k + 1] = real_phys_avail[k + 3];
637 for (i = 0; phys_avail[i] != 0; i += 2)
638 if (pmap_debug_range || pmap_debug)
639 printf("phys_avail[%d]=0x%lx phys_avail[%d]=0x%lx\n",
640 i, phys_avail[i], i+1, phys_avail[i+1]);
643 * Shuffle the memory range containing the 256MB page with
644 * nucleus_memory to the beginning of the phys_avail array
645 * so that physical memory from that page is preferentially
648 for (j = 0; phys_avail[j] != 0; j += 2)
649 if (nucleus_memory_start < phys_avail[j])
652 * Don't shuffle unless we have a full 256M page in the range
653 * our kernel malloc appears to be horribly brittle
655 if ((phys_avail[j + 1] - phys_avail[j]) <
656 (PAGE_SIZE_256M - nucleus_memory))
659 for (i = j, k = 0; phys_avail[i] != 0; k++, i++)
660 tmp_phys_avail[k] = phys_avail[i];
661 for (i = 0; i < j; i++)
662 tmp_phys_avail[k + i] = phys_avail[i];
663 for (i = 0; i < 128; i++)
664 phys_avail[i] = tmp_phys_avail[i];
667 for (i = 0; real_phys_avail[i] != 0; i += 2)
668 if (pmap_debug_range || pmap_debug)
669 printf("real_phys_avail[%d]=0x%lx real_phys_avail[%d]=0x%lx\n",
670 i, real_phys_avail[i], i+1, real_phys_avail[i+1]);
672 for (i = 0; phys_avail[i] != 0; i += 2)
673 if (pmap_debug_range || pmap_debug)
674 printf("phys_avail[%d]=0x%lx phys_avail[%d]=0x%lx\n",
675 i, phys_avail[i], i+1, phys_avail[i+1]);
677 * Calculate the size of kernel virtual memory, and the size and mask
678 * for the kernel tsb.
680 virtsz = roundup(physsz, PAGE_SIZE_4M << (PAGE_SHIFT - TTE_SHIFT));
681 vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
684 * Set the start and end of kva. The kernel is loaded at the first
685 * available 4 meg super page, so round up to the end of the page.
687 virtual_avail = roundup2(ekva, PAGE_SIZE_4M);
688 virtual_end = vm_max_kernel_address;
689 kernel_vm_end = vm_max_kernel_address;
692 * Allocate and map a 4MB page for the kernel hashtable
696 kernel_hash_shift = 10; /* PAGE_SIZE_4M*2 */
698 kernel_hash_shift = 6; /* PAGE_SIZE_8K*64 */
701 kernel_hash_pa = pmap_bootstrap_alloc((1<<(kernel_hash_shift + PAGE_SHIFT)));
702 if (kernel_hash_pa & PAGE_MASK_4M)
703 panic("pmap_bootstrap: hashtable pa unaligned\n");
705 * Set up TSB descriptors for the hypervisor
709 tsb_8k_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
711 /* avoid alignment complaints from the hypervisor */
712 tsb_8k_size = PAGE_SIZE_4M;
715 tsb_8k_pa = pmap_bootstrap_alloc(tsb_8k_size);
716 if (tsb_8k_pa & PAGE_MASK_4M)
717 panic("pmap_bootstrap: tsb unaligned\n");
718 KDPRINTF("tsb_8k_size is 0x%lx, tsb_8k_pa is 0x%lx\n", tsb_8k_size, tsb_8k_pa);
720 tsb_4m_size = (virtsz >> (PAGE_SHIFT_4M - TTE_SHIFT)) << 3;
721 tsb_4m_pa = pmap_bootstrap_alloc(tsb_4m_size);
723 kernel_td[TSB8K_INDEX].hti_idxpgsz = TTE8K;
724 kernel_td[TSB8K_INDEX].hti_assoc = 1;
725 kernel_td[TSB8K_INDEX].hti_ntte = (tsb_8k_size >> TTE_SHIFT);
726 kernel_td[TSB8K_INDEX].hti_ctx_index = 0;
727 kernel_td[TSB8K_INDEX].hti_pgszs = TSB8K;
728 kernel_td[TSB8K_INDEX].hti_rsvd = 0;
729 kernel_td[TSB8K_INDEX].hti_ra = tsb_8k_pa;
732 * Initialize kernel's private TSB from 8K page TSB
735 kernel_pmap->pm_tsb.hti_idxpgsz = TTE8K;
736 kernel_pmap->pm_tsb.hti_assoc = 1;
737 kernel_pmap->pm_tsb.hti_ntte = (tsb_8k_size >> TTE_SHIFT);
738 kernel_pmap->pm_tsb.hti_ctx_index = 0;
739 kernel_pmap->pm_tsb.hti_pgszs = TSB8K;
740 kernel_pmap->pm_tsb.hti_rsvd = 0;
741 kernel_pmap->pm_tsb.hti_ra = tsb_8k_pa;
743 kernel_pmap->pm_tsb_ra = vtophys((vm_offset_t)&kernel_pmap->pm_tsb);
744 tsb_set_scratchpad_kernel(&kernel_pmap->pm_tsb);
747 * Initialize kernel TSB for 4M pages
748 * currently (not by design) used for permanent mappings
752 KDPRINTF("tsb_4m_pa is 0x%lx tsb_4m_size is 0x%lx\n", tsb_4m_pa, tsb_4m_size);
753 kernel_td[TSB4M_INDEX].hti_idxpgsz = TTE4M;
754 kernel_td[TSB4M_INDEX].hti_assoc = 1;
755 kernel_td[TSB4M_INDEX].hti_ntte = (tsb_4m_size >> TTE_SHIFT);
756 kernel_td[TSB4M_INDEX].hti_ctx_index = 0;
757 kernel_td[TSB4M_INDEX].hti_pgszs = TSB4M|TSB256M;
758 kernel_td[TSB4M_INDEX].hti_rsvd = 0;
759 kernel_td[TSB4M_INDEX].hti_ra = tsb_4m_pa;
761 * allocate MMU fault status areas for all CPUS
763 mmu_fault_status_area = pmap_bootstrap_alloc(MMFSA_SIZE*MAXCPU);
766 * Allocate and map the message buffer.
768 msgbuf_phys = pmap_bootstrap_alloc(MSGBUF_SIZE);
769 msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(msgbuf_phys);
772 * Allocate a kernel stack with guard page for thread0 and map it into
775 pa = pmap_bootstrap_alloc(KSTACK_PAGES*PAGE_SIZE);
777 virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE;
778 kstack0 = virtual_avail;
779 virtual_avail += KSTACK_PAGES * PAGE_SIZE;
780 for (i = 0; i < KSTACK_PAGES; i++) {
781 pa = kstack0_phys + i * PAGE_SIZE;
782 va = kstack0 + i * PAGE_SIZE;
783 tsb_set_tte_real(&kernel_td[TSB8K_INDEX], va, va,
784 pa | TTE_KERNEL | VTD_8K, 0);
787 * Calculate the last available physical address.
789 for (i = 0; phys_avail[i + 2] != 0; i += 2)
790 KDPRINTF("phys_avail[%d]=0x%lx phys_avail[%d]=0x%lx\n",
791 i, phys_avail[i], i+1, phys_avail[i+1]);
792 KDPRINTF("phys_avail[%d]=0x%lx phys_avail[%d]=0x%lx\n",
793 i, phys_avail[i], i+1, phys_avail[i+1]);
795 Maxmem = sparc64_btop(phys_avail[i + 1]);
798 * Add the prom mappings to the kernel tsb.
800 for (i = 0; i < sz; i++) {
802 "translation: start=%#lx size=%#lx tte=%#lx",
803 translations[i].om_start, translations[i].om_size,
804 translations[i].om_tte);
805 KDPRINTF("om_size=%ld om_start=%lx om_tte=%lx\n",
806 translations[i].om_size, translations[i].om_start,
807 translations[i].om_tte);
809 if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
810 translations[i].om_start > VM_MAX_PROM_ADDRESS)
813 for (off = 0; off < translations[i].om_size;
815 va = translations[i].om_start + off;
816 pa = TTE_GET_PA(translations[i].om_tte) + off;
817 tsb_assert_invalid(&kernel_td[TSB8K_INDEX], va);
818 tsb_set_tte_real(&kernel_td[TSB8K_INDEX], va, va, pa |
819 TTE_KERNEL | VTD_8K, 0);
823 if ((error = hv_mmu_tsb_ctx0(MAX_TSB_INFO,
824 vtophys((vm_offset_t)kernel_td))) != H_EOK)
825 panic("failed to set ctx0 TSBs error: %ld", error);
828 mp_set_tsb_desc_ra(vtophys((vm_offset_t)&kernel_td));
831 * setup direct mappings
834 for (i = 0, pa = real_phys_avail[i]; pa != 0; i += 2, pa = real_phys_avail[i]) {
835 vm_paddr_t tag_pa = 0, next_pa = 0;
836 uint64_t size_bits = VTD_4M;
837 while (pa < real_phys_avail[i + 1]) {
838 if (use_256M_pages &&
839 (pa & PAGE_MASK_256M) == 0 &&
840 ((pa + PAGE_SIZE_256M) <= real_phys_avail[i + 1])) {
842 size_bits = VTD_256M;
843 next_pa = pa + PAGE_SIZE_256M;
844 } else if (next_pa <= pa) {
848 tsb_assert_invalid(&kernel_td[TSB4M_INDEX], TLB_PHYS_TO_DIRECT(pa));
849 tsb_set_tte_real(&kernel_td[TSB4M_INDEX], TLB_PHYS_TO_DIRECT(pa),
850 TLB_PHYS_TO_DIRECT(pa),
851 tag_pa | TTE_KERNEL | size_bits, 0);
857 * Get the available physical memory ranges from /memory/reg. These
858 * are only used for kernel dumps, but it may not be wise to do prom
859 * calls in that situation.
861 if ((sz = OF_getproplen(pmem, "reg")) == -1)
862 panic("pmap_bootstrap: getproplen /memory/reg");
863 if (sizeof(sparc64_memreg) < sz)
864 panic("pmap_bootstrap: sparc64_memreg too small");
865 if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
866 panic("pmap_bootstrap: getprop /memory/reg");
867 sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
871 pm->pm_tlbactive = ~0;
873 PMAP_LOCK_INIT(kernel_pmap);
875 TAILQ_INIT(&kernel_pmap->pm_pvlist);
878 * This could happen earlier - but I put it here to avoid
879 * attempts to do updates until they're legal
881 pm->pm_hash = tte_hash_kernel_create(TLB_PHYS_TO_DIRECT(kernel_hash_pa), kernel_hash_shift,
882 pmap_bootstrap_alloc(PAGE_SIZE));
883 pm->pm_hashscratch = tte_hash_set_scratchpad_kernel(pm->pm_hash);
885 for (i = 0; i < translations_size; i++) {
886 KDPRINTF("om_size=%ld om_start=%lx om_tte=%lx\n",
887 translations[i].om_size, translations[i].om_start,
888 translations[i].om_tte);
890 if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
891 translations[i].om_start > VM_MAX_PROM_ADDRESS) {
892 KDPRINTF("skipping\n");
895 for (off = 0; off < translations[i].om_size; off += PAGE_SIZE) {
896 va = translations[i].om_start + off;
897 pa = TTE_GET_PA(translations[i].om_tte) + off;
898 tte_hash_insert(pm->pm_hash, va, pa | TTE_KERNEL | VTD_8K);
900 KDPRINTF("set om_size=%ld om_start=%lx om_tte=%lx\n",
901 translations[i].om_size, translations[i].om_start,
902 translations[i].om_tte);
904 for (i = 0; i < KSTACK_PAGES; i++) {
905 pa = kstack0_phys + i * PAGE_SIZE;
906 va = kstack0 + i * PAGE_SIZE;
907 tte_hash_insert(pm->pm_hash, va, pa | TTE_KERNEL | VTD_8K);
910 * Add direct mappings to hash
914 /* hash only supports 8k pages */
915 for (pa = PAGE_SIZE_4M; pa < phys_avail[2]; pa += PAGE_SIZE_4M)
916 tte_hash_insert(pm->pm_hash, TLB_PHYS_TO_DIRECT(pa),
917 pa | TTE_KERNEL | VTD_4M);
922 printf("pmap_bootstrap done\n");
928 * Routine: pmap_change_wiring
929 * Function: Change the wiring attribute for a map/virtual-address
932 * The mapping must already exist in the pmap.
935 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
939 iswired = tte_get_virt_bit(pmap, va, VTD_WIRED);
941 if (wired && !iswired) {
942 pmap->pm_stats.wired_count++;
943 tte_set_virt_bit(pmap, va, VTD_WIRED);
944 } else if (!wired && iswired) {
945 pmap->pm_stats.wired_count--;
946 tte_clear_virt_bit(pmap, va, VTD_WIRED);
952 pmap_clear_modify(vm_page_t m)
954 KDPRINTF("pmap_clear_modify(0x%lx)\n", VM_PAGE_TO_PHYS(m));
955 tte_clear_phys_bit(m, VTD_W);
959 pmap_clear_reference(vm_page_t m)
961 KDPRINTF("pmap_clear_reference(0x%lx)\n", VM_PAGE_TO_PHYS(m));
962 tte_clear_phys_bit(m, VTD_REF);
966 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
967 vm_size_t len, vm_offset_t src_addr)
969 vm_offset_t addr, end_addr;
971 end_addr = src_addr + len;
973 * Don't let optional prefaulting of pages make us go
974 * way below the low water mark of free pages or way
975 * above high water mark of used pv entries.
977 if (cnt.v_free_count < cnt.v_free_reserved ||
978 pv_entry_count > pv_entry_high_water)
982 vm_page_lock_queues();
983 if (dst_pmap < src_pmap) {
990 for (addr = src_addr; addr < end_addr; addr += PAGE_SIZE) {
994 tte_data = tte_hash_lookup(src_pmap->pm_hash, addr);
996 if ((tte_data & VTD_MANAGED) != 0) {
997 if (tte_hash_lookup(dst_pmap->pm_hash, addr) == 0) {
998 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
1000 tte_hash_insert(dst_pmap->pm_hash, addr, tte_data & ~(VTD_W|VTD_REF|VTD_WIRED));
1001 dst_pmap->pm_stats.resident_count++;
1002 pmap_insert_entry(dst_pmap, addr, m);
1006 vm_page_unlock_queues();
1007 PMAP_UNLOCK(src_pmap);
1008 PMAP_UNLOCK(dst_pmap);
1012 pmap_copy_page(vm_page_t src, vm_page_t dst)
1014 vm_paddr_t srcpa, dstpa;
1015 srcpa = VM_PAGE_TO_PHYS(src);
1016 dstpa = VM_PAGE_TO_PHYS(dst);
1018 novbcopy((char *)TLB_PHYS_TO_DIRECT(srcpa), (char *)TLB_PHYS_TO_DIRECT(dstpa), PAGE_SIZE);
1023 static __inline void
1024 pmap_add_tte(pmap_t pmap, vm_offset_t va, vm_page_t m, tte_t *tte_data, int wired)
1028 pmap->pm_stats.wired_count++;
1030 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1031 pmap_insert_entry(pmap, va, m);
1032 *tte_data |= VTD_MANAGED;
1037 * Map the given physical page at the specified virtual address in the
1038 * target pmap with the protection requested. If specified the page
1039 * will be wired down.
1042 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1046 uint64_t tte_data, otte_data;
1050 if (pmap->pm_context)
1051 DPRINTF("pmap_enter(va=%lx, pa=0x%lx, prot=%x)\n", va,
1052 VM_PAGE_TO_PHYS(m), prot);
1056 vm_page_lock_queues();
1059 tte_data = pa = VM_PAGE_TO_PHYS(m);
1060 otte_data = tte_hash_delete(pmap->pm_hash, va);
1061 opa = TTE_GET_PA(otte_data);
1065 * This is a new mapping
1067 pmap->pm_stats.resident_count++;
1068 pmap_add_tte(pmap, va, m, &tte_data, wired);
1070 } else if (pa != opa) {
1072 * Mapping has changed, handle validating new mapping.
1075 if (otte_data & VTD_WIRED)
1076 pmap->pm_stats.wired_count--;
1078 if (otte_data & VTD_MANAGED) {
1079 om = PHYS_TO_VM_PAGE(opa);
1080 pmap_remove_entry(pmap, om, va);
1083 pmap_add_tte(pmap, va, m, &tte_data, wired);
1085 } else /* (pa == opa) */ {
1087 * Mapping has not changed, must be protection or wiring change.
1091 * Wiring change, just update stats. We don't worry about
1092 * wiring PT pages as they remain resident as long as there
1093 * are valid mappings in them. Hence, if a user page is wired,
1094 * the PT page will be also.
1096 if (wired && ((otte_data & VTD_WIRED) == 0))
1097 pmap->pm_stats.wired_count++;
1098 else if (!wired && (otte_data & VTD_WIRED))
1099 pmap->pm_stats.wired_count--;
1102 * We might be turning off write access to the page,
1103 * so we go ahead and sense modify status.
1105 if (otte_data & VTD_MANAGED) {
1107 tte_data |= VTD_MANAGED;
1112 * Now validate mapping with desired protection/wiring.
1114 if ((prot & VM_PROT_WRITE) != 0) {
1115 tte_data |= VTD_SW_W;
1116 vm_page_flag_set(m, PG_WRITEABLE);
1118 if ((prot & VM_PROT_EXECUTE) != 0)
1121 tte_data |= VTD_WIRED;
1122 if (pmap == kernel_pmap)
1126 if ((otte_data & ~(VTD_W|VTD_REF)) != tte_data) {
1127 if (otte_data & VTD_V) {
1128 if (otte_data & VTD_REF) {
1129 if (otte_data & VTD_MANAGED)
1130 vm_page_flag_set(om, PG_REFERENCED);
1131 if ((opa != pa) || ((opa & VTD_X) != (pa & VTD_X)))
1134 if (otte_data & VTD_W) {
1135 if (otte_data & VTD_MANAGED)
1137 if ((pa & VTD_SW_W) != 0)
1141 pmap_invalidate_page(pmap, va, TRUE);
1146 tte_hash_insert(pmap->pm_hash, va, tte_data|TTE_MINFLAGS|VTD_REF);
1148 * XXX this needs to be locked for the threaded / kernel case
1150 tsb_set_tte(&pmap->pm_tsb, va, tte_data|TTE_MINFLAGS|VTD_REF,
1153 if (tte_hash_needs_resize(pmap->pm_hash))
1154 pmap_tte_hash_resize(pmap);
1157 * 512 is an arbitrary number of tsb misses
1159 if (0 && pmap->pm_context != 0 && pmap->pm_tsb_miss_count > 512)
1160 pmap_tsb_resize(pmap);
1162 vm_page_unlock_queues();
1168 * Maps a sequence of resident pages belonging to the same object.
1169 * The sequence begins with the given page m_start. This page is
1170 * mapped at the given virtual address start. Each subsequent page is
1171 * mapped at a virtual address that is offset from start by the same
1172 * amount as the page is offset from m_start within the object. The
1173 * last page in the sequence is the page with the largest offset from
1174 * m_start that can be mapped at a virtual address less than the given
1175 * virtual address end. Not every virtual page between start and end
1176 * is mapped; only those for which a resident page exists with the
1177 * corresponding offset from m_start are mapped.
1180 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1181 vm_page_t m_start, vm_prot_t prot)
1184 vm_pindex_t diff, psize;
1186 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1187 psize = atop(end - start);
1190 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1191 pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1192 m = TAILQ_NEXT(m, listq);
1198 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1201 pmap_enter_quick_locked(pmap, va, m, prot);
1206 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1211 if (pmap->pm_context)
1212 KDPRINTF("pmap_enter_quick(ctx=0x%lx va=%lx, pa=0x%lx prot=%x)\n",
1213 pmap->pm_context, va, VM_PAGE_TO_PHYS(m), prot);
1215 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1216 if (tte_hash_lookup(pmap->pm_hash, va))
1219 tte_data = VM_PAGE_TO_PHYS(m);
1221 * Enter on the PV list if part of our managed memory. Note that we
1222 * raise IPL while manipulating pv_table since pmap_enter can be
1223 * called at interrupt time.
1225 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
1226 pmap_insert_entry(pmap, va, m);
1227 tte_data |= VTD_MANAGED;
1230 pmap->pm_stats.resident_count++;
1232 if ((prot & VM_PROT_EXECUTE) != 0)
1235 tte_hash_insert(pmap->pm_hash, va, tte_data | TTE_MINFLAGS);
1239 * Extract the physical page address associated with the given
1240 * map/virtual_address pair.
1243 pmap_extract(pmap_t pmap, vm_offset_t va)
1248 tte_data = tte_hash_lookup(pmap->pm_hash, va);
1249 pa = TTE_GET_PA(tte_data) | (va & TTE_GET_PAGE_MASK(tte_data));
1255 * Atomically extract and hold the physical page with the given
1256 * pmap and virtual address pair if that mapping permits the given
1260 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1266 vm_page_lock_queues();
1268 tte_data = tte_hash_lookup(pmap->pm_hash, va);
1269 if (tte_data != 0 &&
1270 ((tte_data & VTD_SW_W) || (prot & VM_PROT_WRITE) == 0)) {
1271 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
1274 vm_page_unlock_queues();
1281 pmap_alloc_zeroed_contig_pages(int npages, uint64_t alignment)
1289 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
1290 m = vm_phys_alloc_contig(npages, phys_avail[i],
1291 phys_avail[i + 1], alignment, (1UL<<34));
1296 printf("vm_phys_alloc_contig failed - waiting to retry\n");
1301 for (i = 0, tm = m; i < npages; i++, tm++) {
1303 if ((tm->flags & PG_ZERO) == 0)
1306 ptr = (void *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1312 pmap_free_contig_pages(void *ptr, int npages)
1317 m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS((vm_offset_t)ptr));
1318 for (i = 0; i < npages; i++, m++) {
1320 atomic_subtract_int(&cnt.v_wire_count, 1);
1326 pmap_growkernel(vm_offset_t addr)
1335 /* allocate pv_entry zones */
1336 int shpgperproc = PMAP_SHPGPERPROC;
1338 for (ctx_stack_top = 1; ctx_stack_top < PMAP_CONTEXT_MAX; ctx_stack_top++)
1339 ctx_stack[ctx_stack_top] = ctx_stack_top;
1341 mtx_init(&pmap_ctx_lock, "ctx lock", NULL, MTX_SPIN);
1344 * Initialize the address space (zone) for the pv entries. Set a
1345 * high water mark so that the system can recover from excessive
1346 * numbers of pv entries.
1348 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
1349 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
1350 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
1351 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
1352 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
1353 pv_entry_high_water = 9 * (pv_entry_max / 10);
1354 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
1361 * Create a pv entry for page at pa for
1365 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1369 KDPRINTF("pmap_insert_entry(va=0x%lx, pa=0x%lx)\n", va, VM_PAGE_TO_PHYS(m));
1370 pv = get_pv_entry(pmap);
1374 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1375 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1376 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1377 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1378 m->md.pv_list_count++;
1382 static int trap_trace_report_done;
1387 pmap_ipi(pmap_t pmap, char *func, uint64_t arg1, uint64_t arg2)
1390 int i, cpu_count, retried;
1392 cpumask_t cpumask, active, curactive;
1393 cpumask_t active_total, ackmask;
1401 cpumask = PCPU_GET(cpumask);
1402 cpulist = PCPU_GET(cpulist);
1405 if (rdpr(pil) != 14)
1406 panic("pil %ld != 14", rdpr(pil));
1408 #ifndef CPUMASK_NOT_BEING_ERRONEOUSLY_CHANGED
1409 /* by definition cpumask should have curcpu's bit set */
1410 if (cpumask != (1 << curcpu))
1411 panic("cpumask(0x%x) != (1 << curcpu) (0x%x)\n",
1412 cpumask, (1 << curcpu));
1416 if ((active_total = (pmap->pm_tlbactive & ~cpumask)) == 0)
1419 if (pmap->pm_context != 0)
1420 active_total = active = (pmap->pm_tlbactive & ~cpumask);
1423 active_total = active = PCPU_GET(other_cpus);
1430 for (i = curactive = cpu_count = 0, cpus = active; i < mp_ncpus && cpus; i++, cpus = (cpus>>1)) {
1431 if ((cpus & 0x1) == 0)
1434 curactive |= (1 << i);
1435 cpulist[cpu_count] = (uint16_t)i;
1440 cpu_ipi_selected(cpu_count, cpulist, (uint64_t)func, (uint64_t)arg1,
1441 (uint64_t)arg2, (uint64_t *)&ackmask);
1443 while (ackmask != curactive) {
1451 printf("cpu with cpumask=0x%x appears to not be responding to ipis\n",
1452 curactive & ~ackmask);
1455 if (!trap_trace_report_done) {
1456 trap_trace_report_done = 1;
1457 for (j = 0; j < MAXCPU; j++)
1458 if (((1 << j) & curactive & ~ackmask) != 0) {
1459 struct pcpu *pc = pcpu_find(j);
1460 printf("pcpu pad 0x%jx 0x%jx 0x%jx 0x%jx 0x%jx 0x%jx 0x%jx\n",
1461 pc->pad[0], pc->pad[1], pc->pad[2], pc->pad[3],
1462 pc->pad[4], pc->pad[5], pc->pad[6]);
1463 trap_trace_report(j);
1468 hv_cpu_state((uint64_t)ffs64(curactive & ~ackmask), &cpu_state);
1469 printf("cpu_state of %ld is %ld\n", ffs64(curactive & ~ackmask), cpu_state);
1471 printf("I'm going to send off another ipi just to confirm that it isn't a memory barrier bug\n"
1472 "and then I'm going to panic\n");
1478 panic(" ackmask=0x%x active=0x%x\n", ackmask, curactive);
1482 active_total |= curactive;
1483 if ((active = ((pmap->pm_tlbactive & all_cpus) & ~(active_total|cpumask))) != 0) {
1484 printf("pmap_ipi: retrying");
1488 return (active_total);
1493 pmap_invalidate_page(pmap_t pmap, vm_offset_t va, int cleartsb)
1496 if (cleartsb == TRUE)
1497 tsb_clear_tte(&pmap->pm_tsb, va);
1499 DPRINTF("pmap_invalidate_page(va=0x%lx)\n", va);
1501 invlpg(va, pmap->pm_context);
1503 pmap_ipi(pmap, (void *)tl_invlpg, (uint64_t)va, (uint64_t)pmap->pm_context);
1509 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int cleartsb)
1511 vm_offset_t tva, invlrngva;
1516 if ((eva - sva) == PAGE_SIZE) {
1517 pmap_invalidate_page(pmap, sva, cleartsb);
1522 KASSERT(sva < eva, ("invalidating negative or zero range sva=0x%lx eva=0x%lx", sva, eva));
1524 if (cleartsb == TRUE)
1525 tsb_clear_range(&pmap->pm_tsb, sva, eva);
1528 if ((sva - eva) < PAGE_SIZE*64) {
1529 for (tva = sva; tva < eva; tva += PAGE_SIZE_8K)
1530 invlpg(tva, pmap->pm_context);
1532 } else if (pmap->pm_context) {
1534 invlctx(pmap->pm_context);
1541 invlrngva = sva | ((eva - sva) >> PAGE_SHIFT);
1542 active = pmap_ipi(pmap, (void *)func, pmap->pm_context, invlrngva);
1543 active &= ~pmap->pm_active;
1544 atomic_clear_int(&pmap->pm_tlbactive, active);
1550 pmap_invalidate_all(pmap_t pmap)
1553 KASSERT(pmap != kernel_pmap, ("invalidate_all called on kernel_pmap"));
1555 tsb_clear(&pmap->pm_tsb);
1558 invlctx(pmap->pm_context);
1560 pmap_ipi(pmap, tl_invlctx, pmap->pm_context, 0);
1561 pmap->pm_tlbactive = pmap->pm_active;
1567 pmap_is_modified(vm_page_t m)
1570 return (tte_get_phys_bit(m, VTD_W));
1575 pmap_is_prefaultable(pmap_t pmap, vm_offset_t va)
1577 return (tte_hash_lookup(pmap->pm_hash, va) == 0);
1581 * Extract the physical page address associated with the given kernel virtual
1586 pmap_kextract(vm_offset_t va)
1592 if (va > KERNBASE && va < KERNBASE + nucleus_memory) {
1594 offset = va - KERNBASE;
1595 pa = nucleus_mappings[offset >> 22] | (va & PAGE_MASK_4M);
1597 if ((pa == 0) && (tte_data = tsb_lookup_tte(va, 0)) != 0)
1598 pa = TTE_GET_PA(tte_data) | (va & TTE_GET_PAGE_MASK(tte_data));
1600 if ((pa == 0) && (tte_data = tte_hash_lookup(kernel_pmap->pm_hash, va)) != 0)
1601 pa = TTE_GET_PA(tte_data) | (va & TTE_GET_PAGE_MASK(tte_data));
1607 * Map a range of physical addresses into kernel virtual address space.
1609 * The value passed in *virt is a suggested virtual address for the mapping.
1610 * Architectures which can support a direct-mapped physical to virtual region
1611 * can return the appropriate address within that region, leaving '*virt'
1615 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1617 return TLB_PHYS_TO_DIRECT(start);
1621 pmap_mincore(pmap_t pmap, vm_offset_t addr)
1627 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
1628 vm_pindex_t index, vm_size_t size)
1630 printf("pmap_object_init_pt\n");
1635 * Returns true if the pmap's pv is one of the first
1636 * 16 pvs linked to from this page. This count may
1637 * be changed upwards or downwards in the future; it
1638 * is only necessary that true be returned for a small
1639 * subset of pmaps for proper page aging.
1642 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1647 if (m->flags & PG_FICTITIOUS)
1650 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1651 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1652 if (pv->pv_pmap == pmap) {
1663 * Initialize a vm_page's machine-dependent fields.
1666 pmap_page_init(vm_page_t m)
1669 TAILQ_INIT(&m->md.pv_list);
1670 m->md.pv_list_count = 0;
1673 * Lower the permission for all mappings to a given page.
1676 pmap_remove_write(vm_page_t m)
1678 if ((m->flags & PG_WRITEABLE) == 0)
1680 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1681 tte_clear_phys_bit(m, VTD_SW_W|VTD_W);
1682 vm_page_flag_clear(m, PG_WRITEABLE);
1685 * Initialize the pmap associated with process 0.
1688 pmap_pinit0(pmap_t pmap)
1690 PMAP_LOCK_INIT(pmap);
1691 pmap->pm_active = pmap->pm_tlbactive = ~0;
1692 pmap->pm_context = 0;
1693 pmap->pm_tsb_ra = kernel_pmap->pm_tsb_ra;
1694 pmap->pm_hash = kernel_pmap->pm_hash;
1696 PCPU_SET(curpmap, pmap);
1698 TAILQ_INIT(&pmap->pm_pvlist);
1699 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1703 * Initialize a preallocated and zeroed pmap structure, such as one in a
1704 * vmspace structure.
1707 pmap_pinit(pmap_t pmap)
1711 pmap->pm_context = get_context();
1712 pmap->pm_tsb_ra = vtophys(&pmap->pm_tsb);
1714 vm_page_lock_queues();
1715 pmap->pm_hash = tte_hash_create(pmap->pm_context, &pmap->pm_hashscratch);
1716 tsb_init(&pmap->pm_tsb, &pmap->pm_tsbscratch, TSB_INIT_SHIFT);
1717 vm_page_unlock_queues();
1718 pmap->pm_tsb_miss_count = pmap->pm_tsb_cap_miss_count = 0;
1719 pmap->pm_active = pmap->pm_tlbactive = 0;
1720 for (i = 0; i < TSB_MAX_RESIZE; i++)
1721 pmap->pm_old_tsb_ra[i] = 0;
1723 TAILQ_INIT(&pmap->pm_pvlist);
1724 PMAP_LOCK_INIT(pmap);
1725 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1729 * Set the physical protection on the specified range of this map as requested.
1732 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1739 DPRINTF("pmap_protect(0x%lx, 0x%lx, %d)\n", sva, eva, prot);
1741 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1742 pmap_remove(pmap, sva, eva);
1746 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1747 (VM_PROT_WRITE|VM_PROT_EXECUTE))
1750 clearbits = anychanged = 0;
1752 if ((prot & VM_PROT_WRITE) == 0)
1753 clearbits |= (VTD_W|VTD_SW_W);
1754 if ((prot & VM_PROT_EXECUTE) == 0)
1757 vm_page_lock_queues();
1759 for (tva = sva; tva < eva; tva += PAGE_SIZE) {
1763 if ((otte_data = tte_hash_clear_bits(pmap->pm_hash, tva,
1767 * XXX technically we should do a shootdown if it
1768 * was referenced and was executable - but is not now
1770 if (!anychanged && (otte_data & VTD_W))
1773 if (otte_data & VTD_MANAGED) {
1776 if (otte_data & VTD_REF) {
1777 m = PHYS_TO_VM_PAGE(TTE_GET_PA(otte_data));
1778 vm_page_flag_set(m, PG_REFERENCED);
1780 if (otte_data & VTD_W) {
1781 m = PHYS_TO_VM_PAGE(TTE_GET_PA(otte_data));
1787 vm_page_unlock_queues();
1789 pmap_invalidate_range(pmap, sva, eva, TRUE);
1794 * Map a list of wired pages into kernel virtual address space. This is
1795 * intended for temporary mappings which do not need page modification or
1796 * references recorded. Existing mappings in the region are overwritten.
1799 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
1806 while (count-- > 0) {
1807 otte |= tte_hash_update(kernel_pmap->pm_hash, va,
1808 VM_PAGE_TO_PHYS(*m) | TTE_KERNEL | VTD_8K);
1812 if ((otte & VTD_REF) != 0)
1813 pmap_invalidate_range(kernel_pmap, sva, va, FALSE);
1817 * Remove page mappings from kernel virtual address space. Intended for
1818 * temporary mappings entered by pmap_qenter.
1821 pmap_qremove(vm_offset_t sva, int count)
1829 while (count-- > 0) {
1830 otte |= tte_hash_delete(kernel_pmap->pm_hash, va);
1833 if ((otte & VTD_REF) != 0)
1834 pmap_invalidate_range(kernel_pmap, sva, va, TRUE);
1838 * Release any resources held by the given physical map.
1839 * Called when a pmap initialized by pmap_pinit is being released.
1840 * Should only be called if the map contains no valid mappings.
1843 pmap_release(pmap_t pmap)
1845 KASSERT(pmap->pm_stats.resident_count == 0,
1846 ("pmap_release: pmap resident count %ld != 0",
1847 pmap->pm_stats.resident_count));
1849 tsb_deinit(&pmap->pm_tsb);
1850 tte_hash_destroy(pmap->pm_hash);
1851 free_context(pmap->pm_context);
1852 PMAP_LOCK_DESTROY(pmap);
1856 * Remove the given range of addresses from the specified map.
1859 pmap_remove(pmap_t pmap, vm_offset_t start, vm_offset_t end)
1865 * Perform an unsynchronized read. This is, however, safe.
1867 if (pmap->pm_stats.resident_count == 0)
1870 DPRINTF("pmap_remove(start=0x%lx, end=0x%lx)\n",
1873 vm_page_lock_queues();
1875 for (tva = start; tva < end; tva += PAGE_SIZE) {
1876 if ((tte_data = tte_hash_delete(pmap->pm_hash, tva)) == 0)
1878 pmap_remove_tte(pmap, tte_data, tva);
1879 if (tte_data & (VTD_REF|VTD_W))
1882 vm_page_unlock_queues();
1884 pmap_invalidate_range(pmap, start, end, TRUE);
1889 * Routine: pmap_remove_all
1891 * Removes this physical page from
1892 * all physical maps in which it resides.
1893 * Reflects back modify bits to the pager.
1896 * Original versions of this routine were very
1897 * inefficient because they iteratively called
1898 * pmap_remove (slow...)
1902 pmap_remove_all(vm_page_t m)
1906 DPRINTF("pmap_remove_all 0x%lx\n", VM_PAGE_TO_PHYS(m));
1908 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1909 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1910 PMAP_LOCK(pv->pv_pmap);
1911 pv->pv_pmap->pm_stats.resident_count--;
1913 tte_data = tte_hash_delete(pv->pv_pmap->pm_hash, pv->pv_va);
1915 if (tte_data & VTD_WIRED)
1916 pv->pv_pmap->pm_stats.wired_count--;
1917 if (tte_data & VTD_REF)
1918 vm_page_flag_set(m, PG_REFERENCED);
1921 * Update the vm_page_t clean and reference bits.
1923 if (tte_data & VTD_W) {
1924 KASSERT((tte_data & VTD_SW_W),
1925 ("pmap_remove_all: modified page not writable: va: %lx, tte: %lx",
1926 pv->pv_va, tte_data));
1930 pmap_invalidate_page(pv->pv_pmap, pv->pv_va, TRUE);
1931 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1932 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1933 m->md.pv_list_count--;
1934 PMAP_UNLOCK(pv->pv_pmap);
1937 vm_page_flag_clear(m, PG_WRITEABLE);
1941 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
1944 if (pmap != kernel_pmap)
1945 DPRINTF("pmap_remove_entry(va=0x%lx, pa=0x%lx)\n", va, VM_PAGE_TO_PHYS(m));
1946 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1947 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1948 if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1949 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1950 if (pmap == pv->pv_pmap && va == pv->pv_va)
1954 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1955 if (va == pv->pv_va)
1959 KASSERT(pv != NULL, ("pmap_remove_entry: pv not found va=0x%lx pa=0x%lx", va, VM_PAGE_TO_PHYS(m)));
1960 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1961 m->md.pv_list_count--;
1962 if (TAILQ_EMPTY(&m->md.pv_list))
1963 vm_page_flag_clear(m, PG_WRITEABLE);
1964 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1970 pmap_remove_pages(pmap_t pmap)
1977 DPRINTF("pmap_remove_pages(ctx=0x%lx)\n", pmap->pm_context);
1978 vm_page_lock_queues();
1980 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1981 tte_data = tte_hash_delete(pmap->pm_hash, pv->pv_va);
1983 if (tte_data == 0) {
1984 printf("TTE IS ZERO @ VA %016lx\n", pv->pv_va);
1987 if (tte_data & VTD_WIRED) {
1988 panic("wired page in process not handled correctly");
1989 pmap->pm_stats.wired_count--;
1991 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
1993 pmap->pm_stats.resident_count--;
1995 if (tte_data & VTD_W) {
1999 npv = TAILQ_NEXT(pv, pv_plist);
2000 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
2002 m->md.pv_list_count--;
2003 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2004 if (TAILQ_EMPTY(&m->md.pv_list))
2005 vm_page_flag_clear(m, PG_WRITEABLE);
2009 pmap->pm_hash = tte_hash_reset(pmap->pm_hash, &pmap->pm_hashscratch);
2011 pmap_tsb_reset(pmap);
2013 vm_page_unlock_queues();
2014 pmap_invalidate_all(pmap);
2019 pmap_tsb_reset(pmap_t pmap)
2023 for (i = 1; i < TSB_MAX_RESIZE && pmap->pm_old_tsb_ra[i]; i++) {
2024 pmap_free_contig_pages((void *)TLB_PHYS_TO_DIRECT(pmap->pm_old_tsb_ra[i]),
2025 (1 << (TSB_INIT_SHIFT + i)));
2026 pmap->pm_old_tsb_ra[i] = 0;
2028 if (pmap->pm_old_tsb_ra[0] != 0) {
2029 vm_paddr_t tsb_pa = pmap->pm_tsb.hti_ra;
2030 int size = tsb_size(&pmap->pm_tsb);
2031 pmap->pm_tsb.hti_ntte = (1 << (TSB_INIT_SHIFT + PAGE_SHIFT - TTE_SHIFT));
2032 pmap->pm_tsb.hti_ra = pmap->pm_old_tsb_ra[0];
2033 pmap_free_contig_pages((void *)TLB_PHYS_TO_DIRECT(tsb_pa), size);
2034 pmap->pm_tsbscratch = pmap->pm_tsb.hti_ra | (uint64_t)TSB_INIT_SHIFT;
2035 pmap->pm_old_tsb_ra[0] = 0;
2040 pmap_scrub_pages(vm_paddr_t pa, int64_t size)
2042 uint64_t bytes_zeroed;
2044 hv_mem_scrub(pa, size, &bytes_zeroed);
2046 size -= bytes_zeroed;
2051 pmap_remove_tte(pmap_t pmap, tte_t tte_data, vm_offset_t va)
2056 if (pmap != kernel_pmap)
2057 DPRINTF("pmap_remove_tte(va=0x%lx, pa=0x%lx)\n", va, TTE_GET_PA(tte_data));
2059 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2060 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2061 if (tte_data & VTD_WIRED)
2062 pmap->pm_stats.wired_count--;
2064 pmap->pm_stats.resident_count--;
2066 if (tte_data & VTD_MANAGED) {
2067 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
2068 if (tte_data & VTD_W) {
2071 if (tte_data & VTD_REF)
2072 vm_page_flag_set(m, PG_REFERENCED);
2073 pmap_remove_entry(pmap, m, va);
2077 /* resize the tsb if the number of capacity misses is greater than 1/4 of
2081 pmap_tsb_resize(pmap_t pmap)
2083 uint32_t miss_count;
2084 uint32_t cap_miss_count;
2085 struct tsb_resize_info info;
2086 hv_tsb_info_t hvtsb;
2087 uint64_t tsbscratch;
2089 KASSERT(pmap == curthread_pmap, ("operating on non-current pmap"));
2090 miss_count = pmap->pm_tsb_miss_count;
2091 cap_miss_count = pmap->pm_tsb_cap_miss_count;
2092 int npages_shift = tsb_page_shift(pmap);
2094 if (npages_shift < (TSB_INIT_SHIFT + TSB_MAX_RESIZE) &&
2095 cap_miss_count > (miss_count >> 1)) {
2096 DPRINTF("resizing tsb for proc=%s pid=%d\n",
2097 curthread->td_proc->p_comm, curthread->td_proc->p_pid);
2098 pmap->pm_old_tsb_ra[npages_shift - TSB_INIT_SHIFT] = pmap->pm_tsb.hti_ra;
2100 /* double TSB size */
2101 tsb_init(&hvtsb, &tsbscratch, npages_shift + 1);
2105 bcopy(&hvtsb, &pmap->pm_tsb, sizeof(hv_tsb_info_t));
2106 pmap->pm_tsbscratch = tsb_set_scratchpad_user(&pmap->pm_tsb);
2108 if (hv_mmu_tsb_ctxnon0(1, pmap->pm_tsb_ra) != H_EOK)
2109 panic("failed to set TSB 0x%lx - context == %ld\n",
2110 pmap->pm_tsb_ra, pmap->pm_context);
2111 info.tri_tsbscratch = pmap->pm_tsbscratch;
2112 info.tri_tsb_ra = pmap->pm_tsb_ra;
2113 pmap_ipi(pmap, tl_tsbupdate, pmap->pm_context, vtophys(&info));
2114 pmap->pm_tlbactive = pmap->pm_active;
2117 bcopy(&hvtsb, &pmap->pm_tsb, sizeof(hvtsb));
2118 if (hv_mmu_tsb_ctxnon0(1, pmap->pm_tsb_ra) != H_EOK)
2119 panic("failed to set TSB 0x%lx - context == %ld\n",
2120 pmap->pm_tsb_ra, pmap->pm_context);
2121 pmap->pm_tsbscratch = tsb_set_scratchpad_user(&pmap->pm_tsb);
2124 pmap->pm_tsb_miss_count = 0;
2125 pmap->pm_tsb_cap_miss_count = 0;
2129 pmap_tte_hash_resize(pmap_t pmap)
2131 tte_hash_t old_th = pmap->pm_hash;
2133 pmap->pm_hash = tte_hash_resize(pmap->pm_hash);
2135 if (curthread->td_proc->p_numthreads != 1)
2136 pmap_ipi(pmap, tl_ttehashupdate, pmap->pm_context, pmap->pm_hashscratch);
2138 pmap->pm_hashscratch = tte_hash_set_scratchpad_user(pmap->pm_hash, pmap->pm_context);
2140 tte_hash_destroy(old_th);
2144 * pmap_ts_referenced:
2146 * Return a count of reference bits for a page, clearing those bits.
2147 * It is not necessary for every reference bit to be cleared, but it
2148 * is necessary that 0 only be returned when there are truly no
2149 * reference bits set.
2151 * XXX: The exact number of bits to check and clear is a matter that
2152 * should be tested and standardized at some point in the future for
2153 * optimal aging of shared pages.
2157 pmap_ts_referenced(vm_page_t m)
2161 pv_entry_t pv, pvf, pvn;
2166 if (m->flags & PG_FICTITIOUS)
2169 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2170 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
2175 pvn = TAILQ_NEXT(pv, pv_list);
2177 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2179 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
2183 otte_data = tte_hash_clear_bits(pmap->pm_hash, pv->pv_va, VTD_REF);
2184 if ((otte_data & VTD_REF) != 0) {
2185 pmap_invalidate_page(pmap, pv->pv_va, TRUE);
2195 } while ((pv = pvn) != NULL && pv != pvf);
2201 pmap_zero_page(vm_page_t m)
2203 hwblkclr((void *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)), PAGE_SIZE);
2207 pmap_zero_page_area(vm_page_t m, int off, int size)
2212 pa = VM_PAGE_TO_PHYS(m);
2213 va = TLB_PHYS_TO_DIRECT(pa);
2214 if (off == 0 && size == PAGE_SIZE)
2215 hwblkclr((void *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)), PAGE_SIZE);
2217 bzero((char *)(va + off), size);
2222 pmap_zero_page_idle(vm_page_t m)
2224 hwblkclr((void *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)), PAGE_SIZE);
2228 pmap_set_ctx_panic(uint64_t error, vm_paddr_t tsb_ra, pmap_t pmap)
2230 panic("setting ctxnon0 failed ctx=0x%lx hvtsb_ra=0x%lx tsbscratch=0x%lx error=0x%lx",
2231 pmap->pm_context, tsb_ra, pmap->pm_tsbscratch, error);