2 * Copyright (c) 2006 Kip Macy <kmacy@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include "opt_kstack_pages.h"
32 #include "opt_msgbuf.h"
34 #include "opt_trap_trace.h"
36 #include <sys/param.h>
37 #include <sys/kernel.h>
41 #include <sys/msgbuf.h>
42 #include <sys/mutex.h>
45 #include <sys/sched.h>
46 #include <sys/sysctl.h>
47 #include <sys/systm.h>
48 #include <sys/vmmeter.h>
50 #include <dev/ofw/openfirm.h>
53 #include <vm/vm_page.h>
54 #include <vm/vm_param.h>
55 #include <vm/vm_kern.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_extern.h>
59 #include <vm/vm_pageout.h>
60 #include <vm/vm_pager.h>
61 #include <vm/vm_phys.h>
64 #include <machine/cpu.h>
65 #include <machine/frame.h>
66 #include <machine/instr.h>
67 #include <machine/md_var.h>
68 #include <machine/metadata.h>
69 #include <machine/ofw_mem.h>
70 #include <machine/mmu.h>
71 #include <machine/smp.h>
72 #include <machine/tlb.h>
73 #include <machine/tte.h>
74 #include <machine/tte_hash.h>
75 #include <machine/pcb.h>
76 #include <machine/pstate.h>
77 #include <machine/tsb.h>
79 #include <machine/hypervisorvar.h>
80 #include <machine/hv_api.h>
83 void trap_trace_report(int);
89 #ifndef PMAP_SHPGPERPROC
90 #define PMAP_SHPGPERPROC 200
94 * Virtual and physical address of message buffer.
96 struct msgbuf *msgbufp;
97 vm_paddr_t msgbuf_phys;
100 * Map of physical memory reagions.
102 vm_paddr_t phys_avail[128];
103 vm_paddr_t phys_avail_tmp[128];
104 static struct ofw_mem_region mra[128];
105 static struct ofw_map translations[128];
106 static int translations_size;
109 struct ofw_mem_region sparc64_memreg[128];
112 extern vm_paddr_t mmu_fault_status_area;
115 * First and last available kernel virtual addresses.
117 vm_offset_t virtual_avail;
118 vm_offset_t virtual_end;
119 vm_offset_t kernel_vm_end;
120 vm_offset_t vm_max_kernel_address;
122 #ifndef PMAP_SHPGPERPROC
123 #define PMAP_SHPGPERPROC 200
126 * Data for the pv entry allocation mechanism
128 static uma_zone_t pvzone;
129 static struct vm_object pvzone_obj;
130 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
132 static int pmap_debug_range = 1;
133 static int use_256M_pages = 1;
135 static struct mtx pmap_ctx_lock;
136 static uint16_t ctx_stack[PMAP_CONTEXT_MAX];
137 static int ctx_stack_top;
139 static int permanent_mappings = 0;
140 static uint64_t nucleus_memory;
141 static uint64_t nucleus_mappings[4];
145 struct pmap kernel_pmap_store;
147 hv_tsb_info_t kernel_td[MAX_TSB_INFO];
150 * This should be determined at boot time
151 * with tiny TLBS it doesn't make sense to try and selectively
152 * invalidate more than this
154 #define MAX_INVALIDATES 32
155 #define MAX_TSB_CLEARS 128
158 * Allocate physical memory for use in pmap_bootstrap.
160 static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size);
163 * If user pmap is processed with pmap_remove and with pmap_remove and the
164 * resident count drops to 0, there are no more pages to remove, so we
167 #define PMAP_REMOVE_DONE(pm) \
168 ((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
171 * Kernel MMU interface
173 #define curthread_pmap vmspace_pmap(curthread->td_proc->p_vmspace)
176 #define KDPRINTF if (pmap_debug) printf
178 if (curthread_pmap && (curthread_pmap->pm_context != 0) && ((PCPU_GET(cpumask) & curthread_pmap->pm_active) == 0)) \
179 panic("cpumask(0x%x) & active (0x%x) == 0 pid == %d\n", \
180 PCPU_GET(cpumask), curthread_pmap->pm_active, curthread->td_proc->p_pid); \
181 if (pmap_debug) printf
186 #define KDPRINTF(...)
190 static void free_pv_entry(pv_entry_t pv);
191 static pv_entry_t get_pv_entry(pmap_t locked_pmap);
193 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
194 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va);
195 static void pmap_remove_tte(pmap_t pmap, tte_t tte_data, vm_offset_t va);
196 static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot);
197 static void pmap_tsb_reset(pmap_t pmap);
198 static void pmap_tsb_resize(pmap_t pmap);
199 static void pmap_tte_hash_resize(pmap_t pmap);
201 void pmap_set_ctx_panic(uint64_t error, vm_paddr_t tsb_ra, pmap_t pmap);
203 struct tsb_resize_info {
204 uint64_t tri_tsbscratch;
209 * Quick sort callout for comparing memory regions.
211 static int mr_cmp(const void *a, const void *b);
212 static int om_cmp(const void *a, const void *b);
214 mr_cmp(const void *a, const void *b)
216 const struct ofw_mem_region *mra;
217 const struct ofw_mem_region *mrb;
221 if (mra->mr_start < mrb->mr_start)
223 else if (mra->mr_start > mrb->mr_start)
229 om_cmp(const void *a, const void *b)
231 const struct ofw_map *oma;
232 const struct ofw_map *omb;
236 if (oma->om_start < omb->om_start)
238 else if (oma->om_start > omb->om_start)
245 free_context(uint16_t ctx)
247 mtx_lock_spin(&pmap_ctx_lock);
248 ctx_stack[ctx_stack_top++] = ctx;
249 mtx_unlock_spin(&pmap_ctx_lock);
251 KASSERT(ctx_stack_top < PMAP_CONTEXT_MAX,
252 ("context stack overrun - system error"));
255 static __inline uint16_t
260 mtx_lock_spin(&pmap_ctx_lock);
261 ctx = ctx_stack[--ctx_stack_top];
262 mtx_unlock_spin(&pmap_ctx_lock);
264 KASSERT(ctx_stack_top > 0,
265 ("context stack underrun - need to implement context stealing"));
271 free_pv_entry(pv_entry_t pv)
274 uma_zfree(pvzone, pv);
278 * get a new pv_entry, allocating a block from the system
282 get_pv_entry(pmap_t locked_pmap)
284 static const struct timeval printinterval = { 60, 0 };
285 static struct timeval lastprint;
286 struct vpgqueues *vpq;
289 pv_entry_t allocated_pv, next_pv, pv;
293 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
294 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
295 allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
296 if (allocated_pv != NULL) {
298 if (pv_entry_count > pv_entry_high_water)
301 return (allocated_pv);
305 * Reclaim pv entries: At first, destroy mappings to inactive
306 * pages. After that, if a pv entry is still needed, destroy
307 * mappings to active pages.
309 if (ratecheck(&lastprint, &printinterval))
310 printf("Approaching the limit on PV entries, "
311 "increase the vm.pmap.shpgperproc tunable.\n");
313 vpq = &vm_page_queues[PQ_INACTIVE];
315 TAILQ_FOREACH(m, &vpq->pl, pageq) {
316 if (m->hold_count || m->busy)
318 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
321 /* Avoid deadlock and lock recursion. */
322 if (pmap > locked_pmap)
324 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
326 pmap->pm_stats.resident_count--;
328 tte_data = tte_hash_delete(pmap->pm_hash, va);
330 KASSERT((tte_data & VTD_WIRED) == 0,
331 ("get_pv_entry: wired pte %#jx", (uintmax_t)tte_data));
332 if (tte_data & VTD_REF)
333 vm_page_flag_set(m, PG_REFERENCED);
334 if (tte_data & VTD_W) {
335 KASSERT((tte_data & VTD_SW_W),
336 ("get_pv_entry: modified page not writable: va: %lx, tte: %lx",
341 pmap_invalidate_page(pmap, va, TRUE);
342 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
343 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
344 if (TAILQ_EMPTY(&m->md.pv_list))
345 vm_page_flag_clear(m, PG_WRITEABLE);
346 m->md.pv_list_count--;
348 if (pmap != locked_pmap)
350 if (allocated_pv == NULL)
356 if (allocated_pv == NULL) {
357 if (vpq == &vm_page_queues[PQ_INACTIVE]) {
358 vpq = &vm_page_queues[PQ_ACTIVE];
361 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
363 return (allocated_pv);
367 * Allocate a physical page of memory directly from the phys_avail map.
368 * Can only be called from pmap_bootstrap before avail start and end are
372 pmap_bootstrap_alloc(vm_size_t size)
377 size = round_page(size);
379 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
380 if (phys_avail[i + 1] - phys_avail[i] < size)
383 phys_avail[i] += size;
384 pmap_scrub_pages(pa, size);
387 panic("pmap_bootstrap_alloc");
391 * Activate a user pmap. The pmap must be activated before its address space
392 * can be accessed in any way.
395 pmap_activate(struct thread *td)
397 pmap_t pmap, oldpmap;
401 pmap = vmspace_pmap(td->td_proc->p_vmspace);
402 oldpmap = PCPU_GET(curpmap);
404 atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
405 atomic_set_int(&pmap->pm_tlbactive, PCPU_GET(cpumask));
406 atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
408 oldpmap->pm_active &= ~1;
409 pmap->pm_active |= 1;
410 pmap->pm_tlbactive |= 1;
413 pmap->pm_hashscratch = tte_hash_set_scratchpad_user(pmap->pm_hash, pmap->pm_context);
414 pmap->pm_tsbscratch = tsb_set_scratchpad_user(&pmap->pm_tsb);
415 pmap->pm_tsb_miss_count = pmap->pm_tsb_cap_miss_count = 0;
417 PCPU_SET(curpmap, pmap);
418 if (pmap->pm_context != 0)
419 if ((err = hv_mmu_tsb_ctxnon0(1, pmap->pm_tsb_ra)) != H_EOK)
420 panic("failed to set TSB 0x%lx - context == %ld\n",
421 pmap->pm_tsb_ra, pmap->pm_context);
422 stxa(MMU_CID_S, ASI_MMU_CONTEXTID, pmap->pm_context);
428 * Increase the starting virtual address of the given mapping if a
429 * different alignment might result in more superpage mappings.
432 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
433 vm_offset_t *addr, vm_size_t size)
438 * Bootstrap the system enough to run with virtual memory.
441 pmap_bootstrap(vm_offset_t ekva)
445 vm_paddr_t pa, tsb_8k_pa, tsb_4m_pa, kernel_hash_pa, nucleus_memory_start;
446 vm_size_t physsz, virtsz, kernel_hash_shift;
447 ihandle_t pmem, vmem;
449 uint64_t tsb_8k_size, tsb_4m_size, error, physmem_tunable, physmemstart_tunable;
450 vm_paddr_t real_phys_avail[128], tmp_phys_avail[128], bounds;
453 if ((vmem = OF_finddevice("/virtual-memory")) == -1)
454 panic("pmap_bootstrap: finddevice /virtual-memory");
455 if ((sz = OF_getproplen(vmem, "translations")) == -1)
456 panic("pmap_bootstrap: getproplen translations");
457 if (sizeof(translations) < sz)
458 panic("pmap_bootstrap: translations too small");
459 bzero(translations, sz);
460 if (OF_getprop(vmem, "translations", translations, sz) == -1)
461 panic("pmap_bootstrap: getprop /virtual-memory/translations");
462 sz /= sizeof(*translations);
463 translations_size = sz;
464 nucleus_memory_start = 0;
465 CTR0(KTR_PMAP, "pmap_bootstrap: translations");
466 qsort(translations, sz, sizeof (*translations), om_cmp);
468 for (i = 0; i < sz; i++) {
469 KDPRINTF("om_size=%ld om_start=%lx om_tte=%lx\n",
470 translations[i].om_size, translations[i].om_start,
471 translations[i].om_tte);
472 if ((translations[i].om_start >= KERNBASE) &&
473 (translations[i].om_start <= KERNBASE + 3*PAGE_SIZE_4M)) {
474 for (j = 0; j < translations[i].om_size; j += PAGE_SIZE_4M) {
475 KDPRINTF("mapping permanent translation\n");
476 pa = TTE_GET_PA(translations[i].om_tte) + j;
477 va = translations[i].om_start + j;
478 error = hv_mmu_map_perm_addr(va, KCONTEXT,
479 pa | TTE_KERNEL | VTD_4M, MAP_ITLB | MAP_DTLB);
481 panic("map_perm_addr returned error=%ld", error);
483 if ((nucleus_memory_start == 0) || (pa < nucleus_memory_start))
484 nucleus_memory_start = pa;
485 printf("nucleus_mappings[%d] = 0x%lx\n", permanent_mappings, pa);
486 nucleus_mappings[permanent_mappings++] = pa;
487 nucleus_memory += PAGE_SIZE_4M;
489 mp_add_nucleus_mapping(va, pa|TTE_KERNEL|VTD_4M);
496 * Find out what physical memory is available from the prom and
497 * initialize the phys_avail array. This must be done before
498 * pmap_bootstrap_alloc is called.
500 if ((pmem = OF_finddevice("/memory")) == -1)
501 panic("pmap_bootstrap: finddevice /memory");
502 if ((sz = OF_getproplen(pmem, "available")) == -1)
503 panic("pmap_bootstrap: getproplen /memory/available");
504 if (sizeof(vm_paddr_t)*128 < sz) /* FIXME */
505 panic("pmap_bootstrap: phys_avail too small");
506 if (sizeof(mra) < sz)
507 panic("pmap_bootstrap: mra too small");
509 if (OF_getprop(pmem, "available", mra, sz) == -1)
510 panic("pmap_bootstrap: getprop /memory/available");
513 CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
515 qsort(mra, sz, sizeof (*mra), mr_cmp);
516 physmemstart_tunable = physmem_tunable = physmem = physsz = 0;
518 if (TUNABLE_ULONG_FETCH("hw.physmemstart", &physmemstart_tunable)) {
519 KDPRINTF("desired physmemstart=0x%lx\n", physmemstart_tunable);
521 if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) {
522 physmem = atop(physmem_tunable);
523 KDPRINTF("desired physmem=0x%lx\n", physmem_tunable);
525 if ((physmem_tunable != 0) && (physmemstart_tunable != 0))
526 physmem_tunable += physmemstart_tunable;
528 bzero(real_phys_avail, sizeof(real_phys_avail));
529 bzero(tmp_phys_avail, sizeof(tmp_phys_avail));
531 for (i = 0, j = 0; i < sz; i++) {
533 KDPRINTF("start=%#lx size=%#lx\n", mra[i].mr_start, mra[i].mr_size);
534 if (mra[i].mr_size < PAGE_SIZE_4M)
537 if ((mra[i].mr_start & PAGE_MASK_4M) || (mra[i].mr_size & PAGE_MASK_4M)) {
538 uint64_t newstart, roundup;
539 newstart = ((mra[i].mr_start + (PAGE_MASK_4M)) & ~PAGE_MASK_4M);
540 roundup = newstart - mra[i].mr_start;
541 size = (mra[i].mr_size - roundup) & ~PAGE_MASK_4M;
542 mra[i].mr_start = newstart;
543 if (size < PAGE_SIZE_4M)
545 mra[i].mr_size = size;
547 real_phys_avail[j] = mra[i].mr_start;
548 if (physmem_tunable != 0 && ((physsz + mra[i].mr_size) >= physmem_tunable)) {
549 mra[i].mr_size = physmem_tunable - physsz;
550 physsz = physmem_tunable;
551 real_phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
554 physsz += mra[i].mr_size;
555 real_phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
558 physmem = btoc(physsz - physmemstart_tunable);
561 * This is needed for versions of OFW that would allocate us memory
562 * and then forget to remove it from the available ranges ...
563 * as well as for compensating for the above move of nucleus pages
565 for (i = 0, j = 0, bounds = (1UL<<32); real_phys_avail[i] != 0; i += 2) {
566 vm_paddr_t start = real_phys_avail[i];
567 uint64_t end = real_phys_avail[i + 1];
568 CTR2(KTR_PMAP, "start=%#lx size=%#lx\n", start, end);
569 KDPRINTF("real_phys start=%#lx end=%#lx\n", start, end);
571 * Is kernel memory at the beginning of range?
573 if (nucleus_memory_start == start) {
574 start += nucleus_memory;
577 * Is kernel memory at the end of range?
579 if (nucleus_memory_start == (end - nucleus_memory))
580 end -= nucleus_memory;
582 if (physmemstart_tunable != 0 &&
583 (end < physmemstart_tunable))
586 if (physmemstart_tunable != 0 &&
587 ((start < physmemstart_tunable))) {
588 start = physmemstart_tunable;
592 * Is kernel memory in the middle somewhere?
594 if ((nucleus_memory_start > start) &&
595 (nucleus_memory_start < end)) {
596 phys_avail[j] = start;
597 phys_avail[j+1] = nucleus_memory_start;
598 start = nucleus_memory_start + nucleus_memory;
602 * Break phys_avail up on 4GB boundaries to try
603 * to work around PCI-e allocation bug
604 * we rely on the fact that kernel memory is allocated
605 * from the first 4GB of physical memory
607 while (bounds < start)
610 while (bounds < end) {
611 phys_avail[j] = start;
612 phys_avail[j + 1] = bounds;
617 phys_avail[j] = start;
618 phys_avail[j + 1] = end;
623 * Merge nucleus memory in to real_phys_avail
626 for (i = 0; real_phys_avail[i] != 0; i += 2) {
627 if (real_phys_avail[i] == nucleus_memory_start + nucleus_memory)
628 real_phys_avail[i] -= nucleus_memory;
630 if (real_phys_avail[i + 1] == nucleus_memory_start)
631 real_phys_avail[i + 1] += nucleus_memory;
633 if (real_phys_avail[i + 1] == real_phys_avail[i + 2]) {
634 real_phys_avail[i + 1] = real_phys_avail[i + 3];
635 for (k = i + 2; real_phys_avail[k] != 0; k += 2) {
636 real_phys_avail[k] = real_phys_avail[k + 2];
637 real_phys_avail[k + 1] = real_phys_avail[k + 3];
641 for (i = 0; phys_avail[i] != 0; i += 2)
642 if (pmap_debug_range || pmap_debug)
643 printf("phys_avail[%d]=0x%lx phys_avail[%d]=0x%lx\n",
644 i, phys_avail[i], i+1, phys_avail[i+1]);
647 * Shuffle the memory range containing the 256MB page with
648 * nucleus_memory to the beginning of the phys_avail array
649 * so that physical memory from that page is preferentially
652 for (j = 0; phys_avail[j] != 0; j += 2)
653 if (nucleus_memory_start < phys_avail[j])
656 * Don't shuffle unless we have a full 256M page in the range
657 * our kernel malloc appears to be horribly brittle
659 if ((phys_avail[j + 1] - phys_avail[j]) <
660 (PAGE_SIZE_256M - nucleus_memory))
663 for (i = j, k = 0; phys_avail[i] != 0; k++, i++)
664 tmp_phys_avail[k] = phys_avail[i];
665 for (i = 0; i < j; i++)
666 tmp_phys_avail[k + i] = phys_avail[i];
667 for (i = 0; i < 128; i++)
668 phys_avail[i] = tmp_phys_avail[i];
671 for (i = 0; real_phys_avail[i] != 0; i += 2)
672 if (pmap_debug_range || pmap_debug)
673 printf("real_phys_avail[%d]=0x%lx real_phys_avail[%d]=0x%lx\n",
674 i, real_phys_avail[i], i+1, real_phys_avail[i+1]);
676 for (i = 0; phys_avail[i] != 0; i += 2)
677 if (pmap_debug_range || pmap_debug)
678 printf("phys_avail[%d]=0x%lx phys_avail[%d]=0x%lx\n",
679 i, phys_avail[i], i+1, phys_avail[i+1]);
681 * Calculate the size of kernel virtual memory, and the size and mask
682 * for the kernel tsb.
684 virtsz = roundup(physsz, PAGE_SIZE_4M << (PAGE_SHIFT - TTE_SHIFT));
685 vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
688 * Set the start and end of kva. The kernel is loaded at the first
689 * available 4 meg super page, so round up to the end of the page.
691 virtual_avail = roundup2(ekva, PAGE_SIZE_4M);
692 virtual_end = vm_max_kernel_address;
693 kernel_vm_end = vm_max_kernel_address;
696 * Allocate and map a 4MB page for the kernel hashtable
700 kernel_hash_shift = 10; /* PAGE_SIZE_4M*2 */
702 kernel_hash_shift = 6; /* PAGE_SIZE_8K*64 */
705 kernel_hash_pa = pmap_bootstrap_alloc((1<<(kernel_hash_shift + PAGE_SHIFT)));
706 if (kernel_hash_pa & PAGE_MASK_4M)
707 panic("pmap_bootstrap: hashtable pa unaligned\n");
709 * Set up TSB descriptors for the hypervisor
713 tsb_8k_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
715 /* avoid alignment complaints from the hypervisor */
716 tsb_8k_size = PAGE_SIZE_4M;
719 tsb_8k_pa = pmap_bootstrap_alloc(tsb_8k_size);
720 if (tsb_8k_pa & PAGE_MASK_4M)
721 panic("pmap_bootstrap: tsb unaligned\n");
722 KDPRINTF("tsb_8k_size is 0x%lx, tsb_8k_pa is 0x%lx\n", tsb_8k_size, tsb_8k_pa);
724 tsb_4m_size = (virtsz >> (PAGE_SHIFT_4M - TTE_SHIFT)) << 3;
725 tsb_4m_pa = pmap_bootstrap_alloc(tsb_4m_size);
727 kernel_td[TSB8K_INDEX].hti_idxpgsz = TTE8K;
728 kernel_td[TSB8K_INDEX].hti_assoc = 1;
729 kernel_td[TSB8K_INDEX].hti_ntte = (tsb_8k_size >> TTE_SHIFT);
730 kernel_td[TSB8K_INDEX].hti_ctx_index = 0;
731 kernel_td[TSB8K_INDEX].hti_pgszs = TSB8K;
732 kernel_td[TSB8K_INDEX].hti_rsvd = 0;
733 kernel_td[TSB8K_INDEX].hti_ra = tsb_8k_pa;
736 * Initialize kernel's private TSB from 8K page TSB
739 kernel_pmap->pm_tsb.hti_idxpgsz = TTE8K;
740 kernel_pmap->pm_tsb.hti_assoc = 1;
741 kernel_pmap->pm_tsb.hti_ntte = (tsb_8k_size >> TTE_SHIFT);
742 kernel_pmap->pm_tsb.hti_ctx_index = 0;
743 kernel_pmap->pm_tsb.hti_pgszs = TSB8K;
744 kernel_pmap->pm_tsb.hti_rsvd = 0;
745 kernel_pmap->pm_tsb.hti_ra = tsb_8k_pa;
747 kernel_pmap->pm_tsb_ra = vtophys((vm_offset_t)&kernel_pmap->pm_tsb);
748 tsb_set_scratchpad_kernel(&kernel_pmap->pm_tsb);
751 * Initialize kernel TSB for 4M pages
752 * currently (not by design) used for permanent mappings
756 KDPRINTF("tsb_4m_pa is 0x%lx tsb_4m_size is 0x%lx\n", tsb_4m_pa, tsb_4m_size);
757 kernel_td[TSB4M_INDEX].hti_idxpgsz = TTE4M;
758 kernel_td[TSB4M_INDEX].hti_assoc = 1;
759 kernel_td[TSB4M_INDEX].hti_ntte = (tsb_4m_size >> TTE_SHIFT);
760 kernel_td[TSB4M_INDEX].hti_ctx_index = 0;
761 kernel_td[TSB4M_INDEX].hti_pgszs = TSB4M|TSB256M;
762 kernel_td[TSB4M_INDEX].hti_rsvd = 0;
763 kernel_td[TSB4M_INDEX].hti_ra = tsb_4m_pa;
765 * allocate MMU fault status areas for all CPUS
767 mmu_fault_status_area = pmap_bootstrap_alloc(MMFSA_SIZE*MAXCPU);
770 * Allocate and map the message buffer.
772 msgbuf_phys = pmap_bootstrap_alloc(MSGBUF_SIZE);
773 msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(msgbuf_phys);
776 * Allocate a kernel stack with guard page for thread0 and map it into
779 pa = pmap_bootstrap_alloc(KSTACK_PAGES*PAGE_SIZE);
781 virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE;
782 kstack0 = virtual_avail;
783 virtual_avail += KSTACK_PAGES * PAGE_SIZE;
784 for (i = 0; i < KSTACK_PAGES; i++) {
785 pa = kstack0_phys + i * PAGE_SIZE;
786 va = kstack0 + i * PAGE_SIZE;
787 tsb_set_tte_real(&kernel_td[TSB8K_INDEX], va, va,
788 pa | TTE_KERNEL | VTD_8K, 0);
791 * Calculate the last available physical address.
793 for (i = 0; phys_avail[i + 2] != 0; i += 2)
794 KDPRINTF("phys_avail[%d]=0x%lx phys_avail[%d]=0x%lx\n",
795 i, phys_avail[i], i+1, phys_avail[i+1]);
796 KDPRINTF("phys_avail[%d]=0x%lx phys_avail[%d]=0x%lx\n",
797 i, phys_avail[i], i+1, phys_avail[i+1]);
799 Maxmem = sparc64_btop(phys_avail[i + 1]);
802 * Add the prom mappings to the kernel tsb.
804 for (i = 0; i < sz; i++) {
806 "translation: start=%#lx size=%#lx tte=%#lx",
807 translations[i].om_start, translations[i].om_size,
808 translations[i].om_tte);
809 KDPRINTF("om_size=%ld om_start=%lx om_tte=%lx\n",
810 translations[i].om_size, translations[i].om_start,
811 translations[i].om_tte);
813 if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
814 translations[i].om_start > VM_MAX_PROM_ADDRESS)
817 for (off = 0; off < translations[i].om_size;
819 va = translations[i].om_start + off;
820 pa = TTE_GET_PA(translations[i].om_tte) + off;
821 tsb_assert_invalid(&kernel_td[TSB8K_INDEX], va);
822 tsb_set_tte_real(&kernel_td[TSB8K_INDEX], va, va, pa |
823 TTE_KERNEL | VTD_8K, 0);
827 if ((error = hv_mmu_tsb_ctx0(MAX_TSB_INFO,
828 vtophys((vm_offset_t)kernel_td))) != H_EOK)
829 panic("failed to set ctx0 TSBs error: %ld", error);
832 mp_set_tsb_desc_ra(vtophys((vm_offset_t)&kernel_td));
835 * setup direct mappings
838 for (i = 0, pa = real_phys_avail[i]; pa != 0; i += 2, pa = real_phys_avail[i]) {
839 vm_paddr_t tag_pa = 0, next_pa = 0;
840 uint64_t size_bits = VTD_4M;
841 while (pa < real_phys_avail[i + 1]) {
842 if (use_256M_pages &&
843 (pa & PAGE_MASK_256M) == 0 &&
844 ((pa + PAGE_SIZE_256M) <= real_phys_avail[i + 1])) {
846 size_bits = VTD_256M;
847 next_pa = pa + PAGE_SIZE_256M;
848 } else if (next_pa <= pa) {
852 tsb_assert_invalid(&kernel_td[TSB4M_INDEX], TLB_PHYS_TO_DIRECT(pa));
853 tsb_set_tte_real(&kernel_td[TSB4M_INDEX], TLB_PHYS_TO_DIRECT(pa),
854 TLB_PHYS_TO_DIRECT(pa),
855 tag_pa | TTE_KERNEL | size_bits, 0);
861 * Get the available physical memory ranges from /memory/reg. These
862 * are only used for kernel dumps, but it may not be wise to do prom
863 * calls in that situation.
865 if ((sz = OF_getproplen(pmem, "reg")) == -1)
866 panic("pmap_bootstrap: getproplen /memory/reg");
867 if (sizeof(sparc64_memreg) < sz)
868 panic("pmap_bootstrap: sparc64_memreg too small");
869 if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
870 panic("pmap_bootstrap: getprop /memory/reg");
871 sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
875 pm->pm_tlbactive = ~0;
877 PMAP_LOCK_INIT(kernel_pmap);
879 TAILQ_INIT(&kernel_pmap->pm_pvlist);
882 * This could happen earlier - but I put it here to avoid
883 * attempts to do updates until they're legal
885 pm->pm_hash = tte_hash_kernel_create(TLB_PHYS_TO_DIRECT(kernel_hash_pa), kernel_hash_shift,
886 pmap_bootstrap_alloc(PAGE_SIZE));
887 pm->pm_hashscratch = tte_hash_set_scratchpad_kernel(pm->pm_hash);
889 for (i = 0; i < translations_size; i++) {
890 KDPRINTF("om_size=%ld om_start=%lx om_tte=%lx\n",
891 translations[i].om_size, translations[i].om_start,
892 translations[i].om_tte);
894 if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
895 translations[i].om_start > VM_MAX_PROM_ADDRESS) {
896 KDPRINTF("skipping\n");
899 for (off = 0; off < translations[i].om_size; off += PAGE_SIZE) {
900 va = translations[i].om_start + off;
901 pa = TTE_GET_PA(translations[i].om_tte) + off;
902 tte_hash_insert(pm->pm_hash, va, pa | TTE_KERNEL | VTD_8K);
904 KDPRINTF("set om_size=%ld om_start=%lx om_tte=%lx\n",
905 translations[i].om_size, translations[i].om_start,
906 translations[i].om_tte);
908 for (i = 0; i < KSTACK_PAGES; i++) {
909 pa = kstack0_phys + i * PAGE_SIZE;
910 va = kstack0 + i * PAGE_SIZE;
911 tte_hash_insert(pm->pm_hash, va, pa | TTE_KERNEL | VTD_8K);
914 * Add direct mappings to hash
918 /* hash only supports 8k pages */
919 for (pa = PAGE_SIZE_4M; pa < phys_avail[2]; pa += PAGE_SIZE_4M)
920 tte_hash_insert(pm->pm_hash, TLB_PHYS_TO_DIRECT(pa),
921 pa | TTE_KERNEL | VTD_4M);
926 printf("pmap_bootstrap done\n");
932 * Routine: pmap_change_wiring
933 * Function: Change the wiring attribute for a map/virtual-address
936 * The mapping must already exist in the pmap.
939 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
943 iswired = tte_get_virt_bit(pmap, va, VTD_WIRED);
945 if (wired && !iswired) {
946 pmap->pm_stats.wired_count++;
947 tte_set_virt_bit(pmap, va, VTD_WIRED);
948 } else if (!wired && iswired) {
949 pmap->pm_stats.wired_count--;
950 tte_clear_virt_bit(pmap, va, VTD_WIRED);
956 pmap_clear_modify(vm_page_t m)
958 KDPRINTF("pmap_clear_modify(0x%lx)\n", VM_PAGE_TO_PHYS(m));
959 tte_clear_phys_bit(m, VTD_W);
963 pmap_clear_reference(vm_page_t m)
965 KDPRINTF("pmap_clear_reference(0x%lx)\n", VM_PAGE_TO_PHYS(m));
966 tte_clear_phys_bit(m, VTD_REF);
970 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
971 vm_size_t len, vm_offset_t src_addr)
973 vm_offset_t addr, end_addr;
975 end_addr = src_addr + len;
977 * Don't let optional prefaulting of pages make us go
978 * way below the low water mark of free pages or way
979 * above high water mark of used pv entries.
981 if (cnt.v_free_count < cnt.v_free_reserved ||
982 pv_entry_count > pv_entry_high_water)
986 vm_page_lock_queues();
987 if (dst_pmap < src_pmap) {
994 for (addr = src_addr; addr < end_addr; addr += PAGE_SIZE) {
998 tte_data = tte_hash_lookup(src_pmap->pm_hash, addr);
1000 if ((tte_data & VTD_MANAGED) != 0) {
1001 if (tte_hash_lookup(dst_pmap->pm_hash, addr) == 0) {
1002 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
1004 tte_hash_insert(dst_pmap->pm_hash, addr, tte_data & ~(VTD_W|VTD_REF|VTD_WIRED));
1005 dst_pmap->pm_stats.resident_count++;
1006 pmap_insert_entry(dst_pmap, addr, m);
1010 vm_page_unlock_queues();
1011 PMAP_UNLOCK(src_pmap);
1012 PMAP_UNLOCK(dst_pmap);
1016 pmap_copy_page(vm_page_t src, vm_page_t dst)
1018 vm_paddr_t srcpa, dstpa;
1019 srcpa = VM_PAGE_TO_PHYS(src);
1020 dstpa = VM_PAGE_TO_PHYS(dst);
1022 novbcopy((char *)TLB_PHYS_TO_DIRECT(srcpa), (char *)TLB_PHYS_TO_DIRECT(dstpa), PAGE_SIZE);
1027 static __inline void
1028 pmap_add_tte(pmap_t pmap, vm_offset_t va, vm_page_t m, tte_t *tte_data, int wired)
1032 pmap->pm_stats.wired_count++;
1034 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1035 pmap_insert_entry(pmap, va, m);
1036 *tte_data |= VTD_MANAGED;
1041 * Map the given physical page at the specified virtual address in the
1042 * target pmap with the protection requested. If specified the page
1043 * will be wired down.
1046 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1047 vm_prot_t prot, boolean_t wired)
1050 uint64_t tte_data, otte_data;
1054 if (pmap->pm_context)
1055 DPRINTF("pmap_enter(va=%lx, pa=0x%lx, prot=%x)\n", va,
1056 VM_PAGE_TO_PHYS(m), prot);
1060 vm_page_lock_queues();
1063 tte_data = pa = VM_PAGE_TO_PHYS(m);
1064 otte_data = tte_hash_delete(pmap->pm_hash, va);
1065 opa = TTE_GET_PA(otte_data);
1069 * This is a new mapping
1071 pmap->pm_stats.resident_count++;
1072 pmap_add_tte(pmap, va, m, &tte_data, wired);
1074 } else if (pa != opa) {
1076 * Mapping has changed, handle validating new mapping.
1079 if (otte_data & VTD_WIRED)
1080 pmap->pm_stats.wired_count--;
1082 if (otte_data & VTD_MANAGED) {
1083 om = PHYS_TO_VM_PAGE(opa);
1084 pmap_remove_entry(pmap, om, va);
1087 pmap_add_tte(pmap, va, m, &tte_data, wired);
1089 } else /* (pa == opa) */ {
1091 * Mapping has not changed, must be protection or wiring change.
1095 * Wiring change, just update stats. We don't worry about
1096 * wiring PT pages as they remain resident as long as there
1097 * are valid mappings in them. Hence, if a user page is wired,
1098 * the PT page will be also.
1100 if (wired && ((otte_data & VTD_WIRED) == 0))
1101 pmap->pm_stats.wired_count++;
1102 else if (!wired && (otte_data & VTD_WIRED))
1103 pmap->pm_stats.wired_count--;
1106 * We might be turning off write access to the page,
1107 * so we go ahead and sense modify status.
1109 if (otte_data & VTD_MANAGED) {
1111 tte_data |= VTD_MANAGED;
1116 * Now validate mapping with desired protection/wiring.
1118 if ((prot & VM_PROT_WRITE) != 0) {
1119 tte_data |= VTD_SW_W;
1120 vm_page_flag_set(m, PG_WRITEABLE);
1122 if ((prot & VM_PROT_EXECUTE) != 0)
1125 tte_data |= VTD_WIRED;
1126 if (pmap == kernel_pmap)
1130 if ((otte_data & ~(VTD_W|VTD_REF)) != tte_data) {
1131 if (otte_data & VTD_V) {
1132 if (otte_data & VTD_REF) {
1133 if (otte_data & VTD_MANAGED)
1134 vm_page_flag_set(om, PG_REFERENCED);
1135 if ((opa != pa) || ((opa & VTD_X) != (pa & VTD_X)))
1138 if (otte_data & VTD_W) {
1139 if (otte_data & VTD_MANAGED)
1141 if ((pa & VTD_SW_W) != 0)
1145 pmap_invalidate_page(pmap, va, TRUE);
1150 tte_hash_insert(pmap->pm_hash, va, tte_data|TTE_MINFLAGS|VTD_REF);
1152 * XXX this needs to be locked for the threaded / kernel case
1154 tsb_set_tte(&pmap->pm_tsb, va, tte_data|TTE_MINFLAGS|VTD_REF,
1157 if (tte_hash_needs_resize(pmap->pm_hash))
1158 pmap_tte_hash_resize(pmap);
1161 * 512 is an arbitrary number of tsb misses
1163 if (0 && pmap->pm_context != 0 && pmap->pm_tsb_miss_count > 512)
1164 pmap_tsb_resize(pmap);
1166 vm_page_unlock_queues();
1172 * Maps a sequence of resident pages belonging to the same object.
1173 * The sequence begins with the given page m_start. This page is
1174 * mapped at the given virtual address start. Each subsequent page is
1175 * mapped at a virtual address that is offset from start by the same
1176 * amount as the page is offset from m_start within the object. The
1177 * last page in the sequence is the page with the largest offset from
1178 * m_start that can be mapped at a virtual address less than the given
1179 * virtual address end. Not every virtual page between start and end
1180 * is mapped; only those for which a resident page exists with the
1181 * corresponding offset from m_start are mapped.
1184 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1185 vm_page_t m_start, vm_prot_t prot)
1188 vm_pindex_t diff, psize;
1190 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1191 psize = atop(end - start);
1194 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1195 pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1196 m = TAILQ_NEXT(m, listq);
1202 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1205 pmap_enter_quick_locked(pmap, va, m, prot);
1210 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1215 if (pmap->pm_context)
1216 KDPRINTF("pmap_enter_quick(ctx=0x%lx va=%lx, pa=0x%lx prot=%x)\n",
1217 pmap->pm_context, va, VM_PAGE_TO_PHYS(m), prot);
1219 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1220 if (tte_hash_lookup(pmap->pm_hash, va))
1223 tte_data = VM_PAGE_TO_PHYS(m);
1225 * Enter on the PV list if part of our managed memory. Note that we
1226 * raise IPL while manipulating pv_table since pmap_enter can be
1227 * called at interrupt time.
1229 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
1230 pmap_insert_entry(pmap, va, m);
1231 tte_data |= VTD_MANAGED;
1234 pmap->pm_stats.resident_count++;
1236 if ((prot & VM_PROT_EXECUTE) != 0)
1239 tte_hash_insert(pmap->pm_hash, va, tte_data | TTE_MINFLAGS);
1243 * Extract the physical page address associated with the given
1244 * map/virtual_address pair.
1247 pmap_extract(pmap_t pmap, vm_offset_t va)
1252 tte_data = tte_hash_lookup(pmap->pm_hash, va);
1253 pa = TTE_GET_PA(tte_data) | (va & TTE_GET_PAGE_MASK(tte_data));
1259 * Atomically extract and hold the physical page with the given
1260 * pmap and virtual address pair if that mapping permits the given
1264 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1270 vm_page_lock_queues();
1272 tte_data = tte_hash_lookup(pmap->pm_hash, va);
1273 if (tte_data != 0 &&
1274 ((tte_data & VTD_SW_W) || (prot & VM_PROT_WRITE) == 0)) {
1275 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
1278 vm_page_unlock_queues();
1285 pmap_alloc_zeroed_contig_pages(int npages, uint64_t alignment)
1293 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
1294 m = vm_phys_alloc_contig(npages, phys_avail[i],
1295 phys_avail[i + 1], alignment, (1UL<<34));
1300 printf("vm_phys_alloc_contig failed - waiting to retry\n");
1305 for (i = 0, tm = m; i < npages; i++, tm++) {
1307 if ((tm->flags & PG_ZERO) == 0)
1310 ptr = (void *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1316 pmap_free_contig_pages(void *ptr, int npages)
1321 m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS((vm_offset_t)ptr));
1322 for (i = 0; i < npages; i++, m++) {
1324 atomic_subtract_int(&cnt.v_wire_count, 1);
1330 pmap_growkernel(vm_offset_t addr)
1339 /* allocate pv_entry zones */
1340 int shpgperproc = PMAP_SHPGPERPROC;
1342 for (ctx_stack_top = 1; ctx_stack_top < PMAP_CONTEXT_MAX; ctx_stack_top++)
1343 ctx_stack[ctx_stack_top] = ctx_stack_top;
1345 mtx_init(&pmap_ctx_lock, "ctx lock", NULL, MTX_SPIN);
1348 * Initialize the address space (zone) for the pv entries. Set a
1349 * high water mark so that the system can recover from excessive
1350 * numbers of pv entries.
1352 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
1353 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
1354 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
1355 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
1356 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
1357 pv_entry_high_water = 9 * (pv_entry_max / 10);
1358 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
1365 * Create a pv entry for page at pa for
1369 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1373 KDPRINTF("pmap_insert_entry(va=0x%lx, pa=0x%lx)\n", va, VM_PAGE_TO_PHYS(m));
1374 pv = get_pv_entry(pmap);
1378 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1379 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1380 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1381 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1382 m->md.pv_list_count++;
1386 static int trap_trace_report_done;
1391 pmap_ipi(pmap_t pmap, char *func, uint64_t arg1, uint64_t arg2)
1394 int i, cpu_count, retried;
1396 cpumask_t cpumask, active, curactive;
1397 cpumask_t active_total, ackmask;
1405 cpumask = PCPU_GET(cpumask);
1406 cpulist = PCPU_GET(cpulist);
1409 if (rdpr(pil) != 14)
1410 panic("pil %ld != 14", rdpr(pil));
1412 #ifndef CPUMASK_NOT_BEING_ERRONEOUSLY_CHANGED
1413 /* by definition cpumask should have curcpu's bit set */
1414 if (cpumask != (1 << curcpu))
1415 panic("cpumask(0x%x) != (1 << curcpu) (0x%x)\n",
1416 cpumask, (1 << curcpu));
1420 if ((active_total = (pmap->pm_tlbactive & ~cpumask)) == 0)
1423 if (pmap->pm_context != 0)
1424 active_total = active = (pmap->pm_tlbactive & ~cpumask);
1427 active_total = active = PCPU_GET(other_cpus);
1434 for (i = curactive = cpu_count = 0, cpus = active; i < mp_ncpus && cpus; i++, cpus = (cpus>>1)) {
1435 if ((cpus & 0x1) == 0)
1438 curactive |= (1 << i);
1439 cpulist[cpu_count] = (uint16_t)i;
1444 cpu_ipi_selected(cpu_count, cpulist, (uint64_t)func, (uint64_t)arg1,
1445 (uint64_t)arg2, (uint64_t *)&ackmask);
1447 while (ackmask != curactive) {
1455 printf("cpu with cpumask=0x%x appears to not be responding to ipis\n",
1456 curactive & ~ackmask);
1459 if (!trap_trace_report_done) {
1460 trap_trace_report_done = 1;
1461 for (j = 0; j < MAXCPU; j++)
1462 if (((1 << j) & curactive & ~ackmask) != 0) {
1463 struct pcpu *pc = pcpu_find(j);
1464 printf("pcpu pad 0x%jx 0x%jx 0x%jx 0x%jx 0x%jx 0x%jx 0x%jx\n",
1465 pc->pad[0], pc->pad[1], pc->pad[2], pc->pad[3],
1466 pc->pad[4], pc->pad[5], pc->pad[6]);
1467 trap_trace_report(j);
1472 hv_cpu_state((uint64_t)ffs64(curactive & ~ackmask), &cpu_state);
1473 printf("cpu_state of %ld is %ld\n", ffs64(curactive & ~ackmask), cpu_state);
1475 printf("I'm going to send off another ipi just to confirm that it isn't a memory barrier bug\n"
1476 "and then I'm going to panic\n");
1482 panic(" ackmask=0x%x active=0x%x\n", ackmask, curactive);
1486 active_total |= curactive;
1487 if ((active = ((pmap->pm_tlbactive & all_cpus) & ~(active_total|cpumask))) != 0) {
1488 printf("pmap_ipi: retrying");
1492 return (active_total);
1497 pmap_invalidate_page(pmap_t pmap, vm_offset_t va, int cleartsb)
1500 if (cleartsb == TRUE)
1501 tsb_clear_tte(&pmap->pm_tsb, va);
1503 DPRINTF("pmap_invalidate_page(va=0x%lx)\n", va);
1505 invlpg(va, pmap->pm_context);
1507 pmap_ipi(pmap, (void *)tl_invlpg, (uint64_t)va, (uint64_t)pmap->pm_context);
1513 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int cleartsb)
1515 vm_offset_t tva, invlrngva;
1520 if ((eva - sva) == PAGE_SIZE) {
1521 pmap_invalidate_page(pmap, sva, cleartsb);
1526 KASSERT(sva < eva, ("invalidating negative or zero range sva=0x%lx eva=0x%lx", sva, eva));
1528 if (cleartsb == TRUE)
1529 tsb_clear_range(&pmap->pm_tsb, sva, eva);
1532 if ((sva - eva) < PAGE_SIZE*64) {
1533 for (tva = sva; tva < eva; tva += PAGE_SIZE_8K)
1534 invlpg(tva, pmap->pm_context);
1536 } else if (pmap->pm_context) {
1538 invlctx(pmap->pm_context);
1545 invlrngva = sva | ((eva - sva) >> PAGE_SHIFT);
1546 active = pmap_ipi(pmap, (void *)func, pmap->pm_context, invlrngva);
1547 active &= ~pmap->pm_active;
1548 atomic_clear_int(&pmap->pm_tlbactive, active);
1554 pmap_invalidate_all(pmap_t pmap)
1557 KASSERT(pmap != kernel_pmap, ("invalidate_all called on kernel_pmap"));
1559 tsb_clear(&pmap->pm_tsb);
1562 invlctx(pmap->pm_context);
1564 pmap_ipi(pmap, tl_invlctx, pmap->pm_context, 0);
1565 pmap->pm_tlbactive = pmap->pm_active;
1571 pmap_is_modified(vm_page_t m)
1574 return (tte_get_phys_bit(m, VTD_W));
1579 pmap_is_prefaultable(pmap_t pmap, vm_offset_t va)
1581 return (tte_hash_lookup(pmap->pm_hash, va) == 0);
1585 * Extract the physical page address associated with the given kernel virtual
1590 pmap_kextract(vm_offset_t va)
1596 if (va > KERNBASE && va < KERNBASE + nucleus_memory) {
1598 offset = va - KERNBASE;
1599 pa = nucleus_mappings[offset >> 22] | (va & PAGE_MASK_4M);
1601 if ((pa == 0) && (tte_data = tsb_lookup_tte(va, 0)) != 0)
1602 pa = TTE_GET_PA(tte_data) | (va & TTE_GET_PAGE_MASK(tte_data));
1604 if ((pa == 0) && (tte_data = tte_hash_lookup(kernel_pmap->pm_hash, va)) != 0)
1605 pa = TTE_GET_PA(tte_data) | (va & TTE_GET_PAGE_MASK(tte_data));
1611 * Map a range of physical addresses into kernel virtual address space.
1613 * The value passed in *virt is a suggested virtual address for the mapping.
1614 * Architectures which can support a direct-mapped physical to virtual region
1615 * can return the appropriate address within that region, leaving '*virt'
1619 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1621 return TLB_PHYS_TO_DIRECT(start);
1625 pmap_mincore(pmap_t pmap, vm_offset_t addr)
1631 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
1632 vm_pindex_t index, vm_size_t size)
1634 printf("pmap_object_init_pt\n");
1639 * Returns true if the pmap's pv is one of the first
1640 * 16 pvs linked to from this page. This count may
1641 * be changed upwards or downwards in the future; it
1642 * is only necessary that true be returned for a small
1643 * subset of pmaps for proper page aging.
1646 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1651 if (m->flags & PG_FICTITIOUS)
1654 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1655 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1656 if (pv->pv_pmap == pmap) {
1667 * Initialize a vm_page's machine-dependent fields.
1670 pmap_page_init(vm_page_t m)
1673 TAILQ_INIT(&m->md.pv_list);
1674 m->md.pv_list_count = 0;
1678 * Return the number of managed mappings to the given physical page
1682 pmap_page_wired_mappings(vm_page_t m)
1690 if ((m->flags & PG_FICTITIOUS) != 0)
1692 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1693 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1696 tte_data = tte_hash_lookup(pmap->pm_hash, pv->pv_va);
1697 if ((tte_data & VTD_WIRED) != 0)
1705 * Lower the permission for all mappings to a given page.
1708 pmap_remove_write(vm_page_t m)
1710 if ((m->flags & PG_WRITEABLE) == 0)
1712 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1713 tte_clear_phys_bit(m, VTD_SW_W|VTD_W);
1714 vm_page_flag_clear(m, PG_WRITEABLE);
1717 * Initialize the pmap associated with process 0.
1720 pmap_pinit0(pmap_t pmap)
1722 PMAP_LOCK_INIT(pmap);
1723 pmap->pm_active = pmap->pm_tlbactive = ~0;
1724 pmap->pm_context = 0;
1725 pmap->pm_tsb_ra = kernel_pmap->pm_tsb_ra;
1726 pmap->pm_hash = kernel_pmap->pm_hash;
1728 PCPU_SET(curpmap, pmap);
1730 TAILQ_INIT(&pmap->pm_pvlist);
1731 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1735 * Initialize a preallocated and zeroed pmap structure, such as one in a
1736 * vmspace structure.
1739 pmap_pinit(pmap_t pmap)
1743 pmap->pm_context = get_context();
1744 pmap->pm_tsb_ra = vtophys(&pmap->pm_tsb);
1746 vm_page_lock_queues();
1747 pmap->pm_hash = tte_hash_create(pmap->pm_context, &pmap->pm_hashscratch);
1748 tsb_init(&pmap->pm_tsb, &pmap->pm_tsbscratch, TSB_INIT_SHIFT);
1749 vm_page_unlock_queues();
1750 pmap->pm_tsb_miss_count = pmap->pm_tsb_cap_miss_count = 0;
1751 pmap->pm_active = pmap->pm_tlbactive = 0;
1752 for (i = 0; i < TSB_MAX_RESIZE; i++)
1753 pmap->pm_old_tsb_ra[i] = 0;
1755 TAILQ_INIT(&pmap->pm_pvlist);
1756 PMAP_LOCK_INIT(pmap);
1757 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1762 * Set the physical protection on the specified range of this map as requested.
1765 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1772 DPRINTF("pmap_protect(0x%lx, 0x%lx, %d)\n", sva, eva, prot);
1774 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1775 pmap_remove(pmap, sva, eva);
1779 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1780 (VM_PROT_WRITE|VM_PROT_EXECUTE))
1783 clearbits = anychanged = 0;
1785 if ((prot & VM_PROT_WRITE) == 0)
1786 clearbits |= (VTD_W|VTD_SW_W);
1787 if ((prot & VM_PROT_EXECUTE) == 0)
1790 vm_page_lock_queues();
1792 for (tva = sva; tva < eva; tva += PAGE_SIZE) {
1796 if ((otte_data = tte_hash_clear_bits(pmap->pm_hash, tva,
1800 * XXX technically we should do a shootdown if it
1801 * was referenced and was executable - but is not now
1803 if (!anychanged && (otte_data & VTD_W))
1806 if (otte_data & VTD_MANAGED) {
1809 if (otte_data & VTD_REF) {
1810 m = PHYS_TO_VM_PAGE(TTE_GET_PA(otte_data));
1811 vm_page_flag_set(m, PG_REFERENCED);
1813 if (otte_data & VTD_W) {
1814 m = PHYS_TO_VM_PAGE(TTE_GET_PA(otte_data));
1820 vm_page_unlock_queues();
1822 pmap_invalidate_range(pmap, sva, eva, TRUE);
1827 * Map a list of wired pages into kernel virtual address space. This is
1828 * intended for temporary mappings which do not need page modification or
1829 * references recorded. Existing mappings in the region are overwritten.
1832 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
1839 while (count-- > 0) {
1840 otte |= tte_hash_update(kernel_pmap->pm_hash, va,
1841 VM_PAGE_TO_PHYS(*m) | TTE_KERNEL | VTD_8K);
1845 if ((otte & VTD_REF) != 0)
1846 pmap_invalidate_range(kernel_pmap, sva, va, FALSE);
1850 * Remove page mappings from kernel virtual address space. Intended for
1851 * temporary mappings entered by pmap_qenter.
1854 pmap_qremove(vm_offset_t sva, int count)
1862 while (count-- > 0) {
1863 otte |= tte_hash_delete(kernel_pmap->pm_hash, va);
1866 if ((otte & VTD_REF) != 0)
1867 pmap_invalidate_range(kernel_pmap, sva, va, TRUE);
1871 * Release any resources held by the given physical map.
1872 * Called when a pmap initialized by pmap_pinit is being released.
1873 * Should only be called if the map contains no valid mappings.
1876 pmap_release(pmap_t pmap)
1878 KASSERT(pmap->pm_stats.resident_count == 0,
1879 ("pmap_release: pmap resident count %ld != 0",
1880 pmap->pm_stats.resident_count));
1882 tsb_deinit(&pmap->pm_tsb);
1883 tte_hash_destroy(pmap->pm_hash);
1884 free_context(pmap->pm_context);
1885 PMAP_LOCK_DESTROY(pmap);
1889 * Remove the given range of addresses from the specified map.
1892 pmap_remove(pmap_t pmap, vm_offset_t start, vm_offset_t end)
1898 * Perform an unsynchronized read. This is, however, safe.
1900 if (pmap->pm_stats.resident_count == 0)
1903 DPRINTF("pmap_remove(start=0x%lx, end=0x%lx)\n",
1906 vm_page_lock_queues();
1908 for (tva = start; tva < end; tva += PAGE_SIZE) {
1909 if ((tte_data = tte_hash_delete(pmap->pm_hash, tva)) == 0)
1911 pmap_remove_tte(pmap, tte_data, tva);
1912 if (tte_data & (VTD_REF|VTD_W))
1915 vm_page_unlock_queues();
1917 pmap_invalidate_range(pmap, start, end, TRUE);
1922 * Routine: pmap_remove_all
1924 * Removes this physical page from
1925 * all physical maps in which it resides.
1926 * Reflects back modify bits to the pager.
1929 * Original versions of this routine were very
1930 * inefficient because they iteratively called
1931 * pmap_remove (slow...)
1935 pmap_remove_all(vm_page_t m)
1939 DPRINTF("pmap_remove_all 0x%lx\n", VM_PAGE_TO_PHYS(m));
1941 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1942 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1943 PMAP_LOCK(pv->pv_pmap);
1944 pv->pv_pmap->pm_stats.resident_count--;
1946 tte_data = tte_hash_delete(pv->pv_pmap->pm_hash, pv->pv_va);
1948 if (tte_data & VTD_WIRED)
1949 pv->pv_pmap->pm_stats.wired_count--;
1950 if (tte_data & VTD_REF)
1951 vm_page_flag_set(m, PG_REFERENCED);
1954 * Update the vm_page_t clean and reference bits.
1956 if (tte_data & VTD_W) {
1957 KASSERT((tte_data & VTD_SW_W),
1958 ("pmap_remove_all: modified page not writable: va: %lx, tte: %lx",
1959 pv->pv_va, tte_data));
1963 pmap_invalidate_page(pv->pv_pmap, pv->pv_va, TRUE);
1964 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1965 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1966 m->md.pv_list_count--;
1967 PMAP_UNLOCK(pv->pv_pmap);
1970 vm_page_flag_clear(m, PG_WRITEABLE);
1974 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
1977 if (pmap != kernel_pmap)
1978 DPRINTF("pmap_remove_entry(va=0x%lx, pa=0x%lx)\n", va, VM_PAGE_TO_PHYS(m));
1979 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1980 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1981 if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1982 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1983 if (pmap == pv->pv_pmap && va == pv->pv_va)
1987 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1988 if (va == pv->pv_va)
1992 KASSERT(pv != NULL, ("pmap_remove_entry: pv not found va=0x%lx pa=0x%lx", va, VM_PAGE_TO_PHYS(m)));
1993 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1994 m->md.pv_list_count--;
1995 if (TAILQ_EMPTY(&m->md.pv_list))
1996 vm_page_flag_clear(m, PG_WRITEABLE);
1997 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
2003 pmap_remove_pages(pmap_t pmap)
2010 DPRINTF("pmap_remove_pages(ctx=0x%lx)\n", pmap->pm_context);
2011 vm_page_lock_queues();
2013 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
2014 tte_data = tte_hash_delete(pmap->pm_hash, pv->pv_va);
2016 if (tte_data == 0) {
2017 printf("TTE IS ZERO @ VA %016lx\n", pv->pv_va);
2020 if (tte_data & VTD_WIRED) {
2021 panic("wired page in process not handled correctly");
2022 pmap->pm_stats.wired_count--;
2024 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
2026 pmap->pm_stats.resident_count--;
2028 if (tte_data & VTD_W) {
2032 npv = TAILQ_NEXT(pv, pv_plist);
2033 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
2035 m->md.pv_list_count--;
2036 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2037 if (TAILQ_EMPTY(&m->md.pv_list))
2038 vm_page_flag_clear(m, PG_WRITEABLE);
2042 pmap->pm_hash = tte_hash_reset(pmap->pm_hash, &pmap->pm_hashscratch);
2044 pmap_tsb_reset(pmap);
2046 vm_page_unlock_queues();
2047 pmap_invalidate_all(pmap);
2052 pmap_tsb_reset(pmap_t pmap)
2056 for (i = 1; i < TSB_MAX_RESIZE && pmap->pm_old_tsb_ra[i]; i++) {
2057 pmap_free_contig_pages((void *)TLB_PHYS_TO_DIRECT(pmap->pm_old_tsb_ra[i]),
2058 (1 << (TSB_INIT_SHIFT + i)));
2059 pmap->pm_old_tsb_ra[i] = 0;
2061 if (pmap->pm_old_tsb_ra[0] != 0) {
2062 vm_paddr_t tsb_pa = pmap->pm_tsb.hti_ra;
2063 int size = tsb_size(&pmap->pm_tsb);
2064 pmap->pm_tsb.hti_ntte = (1 << (TSB_INIT_SHIFT + PAGE_SHIFT - TTE_SHIFT));
2065 pmap->pm_tsb.hti_ra = pmap->pm_old_tsb_ra[0];
2066 pmap_free_contig_pages((void *)TLB_PHYS_TO_DIRECT(tsb_pa), size);
2067 pmap->pm_tsbscratch = pmap->pm_tsb.hti_ra | (uint64_t)TSB_INIT_SHIFT;
2068 pmap->pm_old_tsb_ra[0] = 0;
2073 pmap_scrub_pages(vm_paddr_t pa, int64_t size)
2075 uint64_t bytes_zeroed;
2077 hv_mem_scrub(pa, size, &bytes_zeroed);
2079 size -= bytes_zeroed;
2084 pmap_remove_tte(pmap_t pmap, tte_t tte_data, vm_offset_t va)
2089 if (pmap != kernel_pmap)
2090 DPRINTF("pmap_remove_tte(va=0x%lx, pa=0x%lx)\n", va, TTE_GET_PA(tte_data));
2092 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2093 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2094 if (tte_data & VTD_WIRED)
2095 pmap->pm_stats.wired_count--;
2097 pmap->pm_stats.resident_count--;
2099 if (tte_data & VTD_MANAGED) {
2100 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
2101 if (tte_data & VTD_W) {
2104 if (tte_data & VTD_REF)
2105 vm_page_flag_set(m, PG_REFERENCED);
2106 pmap_remove_entry(pmap, m, va);
2110 /* resize the tsb if the number of capacity misses is greater than 1/4 of
2114 pmap_tsb_resize(pmap_t pmap)
2116 uint32_t miss_count;
2117 uint32_t cap_miss_count;
2118 struct tsb_resize_info info;
2119 hv_tsb_info_t hvtsb;
2120 uint64_t tsbscratch;
2122 KASSERT(pmap == curthread_pmap, ("operating on non-current pmap"));
2123 miss_count = pmap->pm_tsb_miss_count;
2124 cap_miss_count = pmap->pm_tsb_cap_miss_count;
2125 int npages_shift = tsb_page_shift(pmap);
2127 if (npages_shift < (TSB_INIT_SHIFT + TSB_MAX_RESIZE) &&
2128 cap_miss_count > (miss_count >> 1)) {
2129 DPRINTF("resizing tsb for proc=%s pid=%d\n",
2130 curthread->td_proc->p_comm, curthread->td_proc->p_pid);
2131 pmap->pm_old_tsb_ra[npages_shift - TSB_INIT_SHIFT] = pmap->pm_tsb.hti_ra;
2133 /* double TSB size */
2134 tsb_init(&hvtsb, &tsbscratch, npages_shift + 1);
2138 bcopy(&hvtsb, &pmap->pm_tsb, sizeof(hv_tsb_info_t));
2139 pmap->pm_tsbscratch = tsb_set_scratchpad_user(&pmap->pm_tsb);
2141 if (hv_mmu_tsb_ctxnon0(1, pmap->pm_tsb_ra) != H_EOK)
2142 panic("failed to set TSB 0x%lx - context == %ld\n",
2143 pmap->pm_tsb_ra, pmap->pm_context);
2144 info.tri_tsbscratch = pmap->pm_tsbscratch;
2145 info.tri_tsb_ra = pmap->pm_tsb_ra;
2146 pmap_ipi(pmap, tl_tsbupdate, pmap->pm_context, vtophys(&info));
2147 pmap->pm_tlbactive = pmap->pm_active;
2150 bcopy(&hvtsb, &pmap->pm_tsb, sizeof(hvtsb));
2151 if (hv_mmu_tsb_ctxnon0(1, pmap->pm_tsb_ra) != H_EOK)
2152 panic("failed to set TSB 0x%lx - context == %ld\n",
2153 pmap->pm_tsb_ra, pmap->pm_context);
2154 pmap->pm_tsbscratch = tsb_set_scratchpad_user(&pmap->pm_tsb);
2157 pmap->pm_tsb_miss_count = 0;
2158 pmap->pm_tsb_cap_miss_count = 0;
2162 pmap_tte_hash_resize(pmap_t pmap)
2164 tte_hash_t old_th = pmap->pm_hash;
2166 pmap->pm_hash = tte_hash_resize(pmap->pm_hash);
2168 if (curthread->td_proc->p_numthreads != 1)
2169 pmap_ipi(pmap, tl_ttehashupdate, pmap->pm_context, pmap->pm_hashscratch);
2171 pmap->pm_hashscratch = tte_hash_set_scratchpad_user(pmap->pm_hash, pmap->pm_context);
2173 tte_hash_destroy(old_th);
2177 * pmap_ts_referenced:
2179 * Return a count of reference bits for a page, clearing those bits.
2180 * It is not necessary for every reference bit to be cleared, but it
2181 * is necessary that 0 only be returned when there are truly no
2182 * reference bits set.
2184 * XXX: The exact number of bits to check and clear is a matter that
2185 * should be tested and standardized at some point in the future for
2186 * optimal aging of shared pages.
2190 pmap_ts_referenced(vm_page_t m)
2194 pv_entry_t pv, pvf, pvn;
2199 if (m->flags & PG_FICTITIOUS)
2202 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2203 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
2208 pvn = TAILQ_NEXT(pv, pv_list);
2210 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2212 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
2216 otte_data = tte_hash_clear_bits(pmap->pm_hash, pv->pv_va, VTD_REF);
2217 if ((otte_data & VTD_REF) != 0) {
2218 pmap_invalidate_page(pmap, pv->pv_va, TRUE);
2228 } while ((pv = pvn) != NULL && pv != pvf);
2234 pmap_zero_page(vm_page_t m)
2236 hwblkclr((void *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)), PAGE_SIZE);
2240 pmap_zero_page_area(vm_page_t m, int off, int size)
2245 pa = VM_PAGE_TO_PHYS(m);
2246 va = TLB_PHYS_TO_DIRECT(pa);
2247 if (off == 0 && size == PAGE_SIZE)
2248 hwblkclr((void *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)), PAGE_SIZE);
2250 bzero((char *)(va + off), size);
2255 pmap_zero_page_idle(vm_page_t m)
2257 hwblkclr((void *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)), PAGE_SIZE);
2261 pmap_set_ctx_panic(uint64_t error, vm_paddr_t tsb_ra, pmap_t pmap)
2263 panic("setting ctxnon0 failed ctx=0x%lx hvtsb_ra=0x%lx tsbscratch=0x%lx error=0x%lx",
2264 pmap->pm_context, tsb_ra, pmap->pm_tsbscratch, error);