]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/sparc64/sparc64/pmap.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / sparc64 / sparc64 / pmap.c
1 /*-
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  * Copyright (c) 1994 John S. Dyson
5  * All rights reserved.
6  * Copyright (c) 1994 David Greenman
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * the Systems Programming Group of the University of Utah Computer
11  * Science Department and William Jolitz of UUNET Technologies Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
38  */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 /*
44  * Manages physical address maps.
45  *
46  * Since the information managed by this module is also stored by the
47  * logical address mapping module, this module may throw away valid virtual
48  * to physical mappings at almost any time.  However, invalidations of
49  * mappings must be done as requested.
50  *
51  * In order to cope with hardware architectures which make virtual to
52  * physical map invalidates expensive, this module may delay invalidate
53  * reduced protection operations until such time as they are actually
54  * necessary.  This module is given full information as to which processors
55  * are currently using which maps, and to when physical maps must be made
56  * correct.
57  */
58
59 #include "opt_kstack_pages.h"
60 #include "opt_pmap.h"
61
62 #include <sys/param.h>
63 #include <sys/kernel.h>
64 #include <sys/ktr.h>
65 #include <sys/lock.h>
66 #include <sys/msgbuf.h>
67 #include <sys/mutex.h>
68 #include <sys/proc.h>
69 #include <sys/rwlock.h>
70 #include <sys/smp.h>
71 #include <sys/sysctl.h>
72 #include <sys/systm.h>
73 #include <sys/vmmeter.h>
74
75 #include <dev/ofw/openfirm.h>
76
77 #include <vm/vm.h>
78 #include <vm/vm_param.h>
79 #include <vm/vm_kern.h>
80 #include <vm/vm_page.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_extern.h>
84 #include <vm/vm_pageout.h>
85 #include <vm/vm_pager.h>
86 #include <vm/vm_phys.h>
87
88 #include <machine/cache.h>
89 #include <machine/frame.h>
90 #include <machine/instr.h>
91 #include <machine/md_var.h>
92 #include <machine/metadata.h>
93 #include <machine/ofw_mem.h>
94 #include <machine/smp.h>
95 #include <machine/tlb.h>
96 #include <machine/tte.h>
97 #include <machine/tsb.h>
98 #include <machine/ver.h>
99
100 /*
101  * Virtual address of message buffer
102  */
103 struct msgbuf *msgbufp;
104
105 /*
106  * Map of physical memory reagions
107  */
108 vm_paddr_t phys_avail[128];
109 static struct ofw_mem_region mra[128];
110 struct ofw_mem_region sparc64_memreg[128];
111 int sparc64_nmemreg;
112 static struct ofw_map translations[128];
113 static int translations_size;
114
115 static vm_offset_t pmap_idle_map;
116 static vm_offset_t pmap_temp_map_1;
117 static vm_offset_t pmap_temp_map_2;
118
119 /*
120  * First and last available kernel virtual addresses
121  */
122 vm_offset_t virtual_avail;
123 vm_offset_t virtual_end;
124 vm_offset_t kernel_vm_end;
125
126 vm_offset_t vm_max_kernel_address;
127
128 /*
129  * Kernel pmap
130  */
131 struct pmap kernel_pmap_store;
132
133 struct rwlock_padalign tte_list_global_lock;
134
135 /*
136  * Allocate physical memory for use in pmap_bootstrap.
137  */
138 static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
139
140 static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
141 static void pmap_cache_remove(vm_page_t m, vm_offset_t va);
142 static int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2,
143     struct tte *tp, vm_offset_t va);
144
145 /*
146  * Map the given physical page at the specified virtual address in the
147  * target pmap with the protection requested.  If specified the page
148  * will be wired down.
149  *
150  * The page queues and pmap must be locked.
151  */
152 static void pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m,
153     vm_prot_t prot, boolean_t wired);
154
155 extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[];
156 extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[];
157 extern int tl1_dmmu_miss_patch_asi_1[];
158 extern int tl1_dmmu_miss_patch_quad_ldd_1[];
159 extern int tl1_dmmu_miss_patch_tsb_1[];
160 extern int tl1_dmmu_miss_patch_tsb_2[];
161 extern int tl1_dmmu_miss_patch_tsb_mask_1[];
162 extern int tl1_dmmu_miss_patch_tsb_mask_2[];
163 extern int tl1_dmmu_prot_patch_asi_1[];
164 extern int tl1_dmmu_prot_patch_quad_ldd_1[];
165 extern int tl1_dmmu_prot_patch_tsb_1[];
166 extern int tl1_dmmu_prot_patch_tsb_2[];
167 extern int tl1_dmmu_prot_patch_tsb_mask_1[];
168 extern int tl1_dmmu_prot_patch_tsb_mask_2[];
169 extern int tl1_immu_miss_patch_asi_1[];
170 extern int tl1_immu_miss_patch_quad_ldd_1[];
171 extern int tl1_immu_miss_patch_tsb_1[];
172 extern int tl1_immu_miss_patch_tsb_2[];
173 extern int tl1_immu_miss_patch_tsb_mask_1[];
174 extern int tl1_immu_miss_patch_tsb_mask_2[];
175
176 /*
177  * If user pmap is processed with pmap_remove and with pmap_remove and the
178  * resident count drops to 0, there are no more pages to remove, so we
179  * need not continue.
180  */
181 #define PMAP_REMOVE_DONE(pm) \
182         ((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
183
184 /*
185  * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
186  * and pmap_protect() instead of trying each virtual address.
187  */
188 #define PMAP_TSB_THRESH ((TSB_SIZE / 2) * PAGE_SIZE)
189
190 SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "");
191
192 PMAP_STATS_VAR(pmap_nenter);
193 PMAP_STATS_VAR(pmap_nenter_update);
194 PMAP_STATS_VAR(pmap_nenter_replace);
195 PMAP_STATS_VAR(pmap_nenter_new);
196 PMAP_STATS_VAR(pmap_nkenter);
197 PMAP_STATS_VAR(pmap_nkenter_oc);
198 PMAP_STATS_VAR(pmap_nkenter_stupid);
199 PMAP_STATS_VAR(pmap_nkremove);
200 PMAP_STATS_VAR(pmap_nqenter);
201 PMAP_STATS_VAR(pmap_nqremove);
202 PMAP_STATS_VAR(pmap_ncache_enter);
203 PMAP_STATS_VAR(pmap_ncache_enter_c);
204 PMAP_STATS_VAR(pmap_ncache_enter_oc);
205 PMAP_STATS_VAR(pmap_ncache_enter_cc);
206 PMAP_STATS_VAR(pmap_ncache_enter_coc);
207 PMAP_STATS_VAR(pmap_ncache_enter_nc);
208 PMAP_STATS_VAR(pmap_ncache_enter_cnc);
209 PMAP_STATS_VAR(pmap_ncache_remove);
210 PMAP_STATS_VAR(pmap_ncache_remove_c);
211 PMAP_STATS_VAR(pmap_ncache_remove_oc);
212 PMAP_STATS_VAR(pmap_ncache_remove_cc);
213 PMAP_STATS_VAR(pmap_ncache_remove_coc);
214 PMAP_STATS_VAR(pmap_ncache_remove_nc);
215 PMAP_STATS_VAR(pmap_nzero_page);
216 PMAP_STATS_VAR(pmap_nzero_page_c);
217 PMAP_STATS_VAR(pmap_nzero_page_oc);
218 PMAP_STATS_VAR(pmap_nzero_page_nc);
219 PMAP_STATS_VAR(pmap_nzero_page_area);
220 PMAP_STATS_VAR(pmap_nzero_page_area_c);
221 PMAP_STATS_VAR(pmap_nzero_page_area_oc);
222 PMAP_STATS_VAR(pmap_nzero_page_area_nc);
223 PMAP_STATS_VAR(pmap_nzero_page_idle);
224 PMAP_STATS_VAR(pmap_nzero_page_idle_c);
225 PMAP_STATS_VAR(pmap_nzero_page_idle_oc);
226 PMAP_STATS_VAR(pmap_nzero_page_idle_nc);
227 PMAP_STATS_VAR(pmap_ncopy_page);
228 PMAP_STATS_VAR(pmap_ncopy_page_c);
229 PMAP_STATS_VAR(pmap_ncopy_page_oc);
230 PMAP_STATS_VAR(pmap_ncopy_page_nc);
231 PMAP_STATS_VAR(pmap_ncopy_page_dc);
232 PMAP_STATS_VAR(pmap_ncopy_page_doc);
233 PMAP_STATS_VAR(pmap_ncopy_page_sc);
234 PMAP_STATS_VAR(pmap_ncopy_page_soc);
235
236 PMAP_STATS_VAR(pmap_nnew_thread);
237 PMAP_STATS_VAR(pmap_nnew_thread_oc);
238
239 static inline u_long dtlb_get_data(u_int tlb, u_int slot);
240
241 /*
242  * Quick sort callout for comparing memory regions
243  */
244 static int mr_cmp(const void *a, const void *b);
245 static int om_cmp(const void *a, const void *b);
246
247 static int
248 mr_cmp(const void *a, const void *b)
249 {
250         const struct ofw_mem_region *mra;
251         const struct ofw_mem_region *mrb;
252
253         mra = a;
254         mrb = b;
255         if (mra->mr_start < mrb->mr_start)
256                 return (-1);
257         else if (mra->mr_start > mrb->mr_start)
258                 return (1);
259         else
260                 return (0);
261 }
262
263 static int
264 om_cmp(const void *a, const void *b)
265 {
266         const struct ofw_map *oma;
267         const struct ofw_map *omb;
268
269         oma = a;
270         omb = b;
271         if (oma->om_start < omb->om_start)
272                 return (-1);
273         else if (oma->om_start > omb->om_start)
274                 return (1);
275         else
276                 return (0);
277 }
278
279 static inline u_long
280 dtlb_get_data(u_int tlb, u_int slot)
281 {
282         u_long data;
283         register_t s;
284
285         slot = TLB_DAR_SLOT(tlb, slot);
286         /*
287          * We read ASI_DTLB_DATA_ACCESS_REG twice back-to-back in order to
288          * work around errata of USIII and beyond.
289          */
290         s = intr_disable();
291         (void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
292         data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
293         intr_restore(s);
294         return (data);
295 }
296
297 /*
298  * Bootstrap the system enough to run with virtual memory.
299  */
300 void
301 pmap_bootstrap(u_int cpu_impl)
302 {
303         struct pmap *pm;
304         struct tte *tp;
305         vm_offset_t off;
306         vm_offset_t va;
307         vm_paddr_t pa;
308         vm_size_t physsz;
309         vm_size_t virtsz;
310         u_long data;
311         u_long vpn;
312         phandle_t pmem;
313         phandle_t vmem;
314         u_int dtlb_slots_avail;
315         int i;
316         int j;
317         int sz;
318         uint32_t asi;
319         uint32_t colors;
320         uint32_t ldd;
321
322         /*
323          * Set the kernel context.
324          */
325         pmap_set_kctx();
326
327         colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
328
329         /*
330          * Find out what physical memory is available from the PROM and
331          * initialize the phys_avail array.  This must be done before
332          * pmap_bootstrap_alloc is called.
333          */
334         if ((pmem = OF_finddevice("/memory")) == -1)
335                 OF_panic("%s: finddevice /memory", __func__);
336         if ((sz = OF_getproplen(pmem, "available")) == -1)
337                 OF_panic("%s: getproplen /memory/available", __func__);
338         if (sizeof(phys_avail) < sz)
339                 OF_panic("%s: phys_avail too small", __func__);
340         if (sizeof(mra) < sz)
341                 OF_panic("%s: mra too small", __func__);
342         bzero(mra, sz);
343         if (OF_getprop(pmem, "available", mra, sz) == -1)
344                 OF_panic("%s: getprop /memory/available", __func__);
345         sz /= sizeof(*mra);
346         CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
347         qsort(mra, sz, sizeof (*mra), mr_cmp);
348         physsz = 0;
349         getenv_quad("hw.physmem", &physmem);
350         physmem = btoc(physmem);
351         for (i = 0, j = 0; i < sz; i++, j += 2) {
352                 CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
353                     mra[i].mr_size);
354                 if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) {
355                         if (btoc(physsz) < physmem) {
356                                 phys_avail[j] = mra[i].mr_start;
357                                 phys_avail[j + 1] = mra[i].mr_start +
358                                     (ctob(physmem) - physsz);
359                                 physsz = ctob(physmem);
360                         }
361                         break;
362                 }
363                 phys_avail[j] = mra[i].mr_start;
364                 phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
365                 physsz += mra[i].mr_size;
366         }
367         physmem = btoc(physsz);
368
369         /*
370          * Calculate the size of kernel virtual memory, and the size and mask
371          * for the kernel TSB based on the phsyical memory size but limited
372          * by the amount of dTLB slots available for locked entries if we have
373          * to lock the TSB in the TLB (given that for spitfire-class CPUs all
374          * of the dt64 slots can hold locked entries but there is no large
375          * dTLB for unlocked ones, we don't use more than half of it for the
376          * TSB).
377          * Note that for reasons unknown OpenSolaris doesn't take advantage of
378          * ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III.  However, given that no
379          * public documentation is available for these, the latter just might
380          * not support it, yet.
381          */
382         if (cpu_impl == CPU_IMPL_SPARC64V ||
383             cpu_impl >= CPU_IMPL_ULTRASPARCIIIp) {
384                 tsb_kernel_ldd_phys = 1;
385                 virtsz = roundup(5 / 3 * physsz, PAGE_SIZE_4M <<
386                     (PAGE_SHIFT - TTE_SHIFT));
387         } else {
388                 dtlb_slots_avail = 0;
389                 for (i = 0; i < dtlb_slots; i++) {
390                         data = dtlb_get_data(cpu_impl ==
391                             CPU_IMPL_ULTRASPARCIII ? TLB_DAR_T16 :
392                             TLB_DAR_T32, i);
393                         if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
394                                 dtlb_slots_avail++;
395                 }
396 #ifdef SMP
397                 dtlb_slots_avail -= PCPU_PAGES;
398 #endif
399                 if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
400                     cpu_impl < CPU_IMPL_ULTRASPARCIII)
401                         dtlb_slots_avail /= 2;
402                 virtsz = roundup(physsz, PAGE_SIZE_4M <<
403                     (PAGE_SHIFT - TTE_SHIFT));
404                 virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) <<
405                     (PAGE_SHIFT - TTE_SHIFT));
406         }
407         vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
408         tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
409         tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
410
411         /*
412          * Allocate the kernel TSB and lock it in the TLB if necessary.
413          */
414         pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
415         if (pa & PAGE_MASK_4M)
416                 OF_panic("%s: TSB unaligned", __func__);
417         tsb_kernel_phys = pa;
418         if (tsb_kernel_ldd_phys == 0) {
419                 tsb_kernel =
420                     (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
421                 pmap_map_tsb();
422                 bzero(tsb_kernel, tsb_kernel_size);
423         } else {
424                 tsb_kernel =
425                     (struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys);
426                 aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size);
427         }
428
429         /*
430          * Allocate and map the dynamic per-CPU area for the BSP.
431          */
432         pa = pmap_bootstrap_alloc(DPCPU_SIZE, colors);
433         dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pa);
434
435         /*
436          * Allocate and map the message buffer.
437          */
438         pa = pmap_bootstrap_alloc(msgbufsize, colors);
439         msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
440
441         /*
442          * Patch the TSB addresses and mask as well as the ASIs used to load
443          * it into the trap table.
444          */
445
446 #define LDDA_R_I_R(rd, imm_asi, rs1, rs2)                               \
447         (EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) |     \
448             EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) |   \
449             EIF_F3_RS2(rs2))
450 #define OR_R_I_R(rd, imm13, rs1)                                        \
451         (EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) |       \
452             EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
453 #define SETHI(rd, imm22)                                                \
454         (EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) |   \
455             EIF_IMM((imm22) >> 10, 22))
456 #define WR_R_I(rd, imm13, rs1)                                          \
457         (EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) |       \
458             EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
459
460 #define PATCH_ASI(addr, asi) do {                                       \
461         if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0,                   \
462             IF_F3_RS1(addr[0])))                                        \
463                 OF_panic("%s: patched instructions have changed",       \
464                     __func__);                                          \
465         addr[0] |= EIF_IMM((asi), 13);                                  \
466         flush(addr);                                                    \
467 } while (0)
468
469 #define PATCH_LDD(addr, asi) do {                                       \
470         if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0,               \
471             IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0])))                    \
472                 OF_panic("%s: patched instructions have changed",       \
473                     __func__);                                          \
474         addr[0] |= EIF_F3_IMM_ASI(asi);                                 \
475         flush(addr);                                                    \
476 } while (0)
477
478 #define PATCH_TSB(addr, val) do {                                       \
479         if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||                 \
480             addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,                 \
481             IF_F3_RS1(addr[1])) ||                                      \
482             addr[3] != SETHI(IF_F2_RD(addr[3]), 0x0))                   \
483                 OF_panic("%s: patched instructions have changed",       \
484                     __func__);                                          \
485         addr[0] |= EIF_IMM((val) >> 42, 22);                            \
486         addr[1] |= EIF_IMM((val) >> 32, 10);                            \
487         addr[3] |= EIF_IMM((val) >> 10, 22);                            \
488         flush(addr);                                                    \
489         flush(addr + 1);                                                \
490         flush(addr + 3);                                                \
491 } while (0)
492
493 #define PATCH_TSB_MASK(addr, val) do {                                  \
494         if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||                 \
495             addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,                 \
496             IF_F3_RS1(addr[1])))                                        \
497                 OF_panic("%s: patched instructions have changed",       \
498                     __func__);                                          \
499         addr[0] |= EIF_IMM((val) >> 10, 22);                            \
500         addr[1] |= EIF_IMM((val), 10);                                  \
501         flush(addr);                                                    \
502         flush(addr + 1);                                                \
503 } while (0)
504
505         if (tsb_kernel_ldd_phys == 0) {
506                 asi = ASI_N;
507                 ldd = ASI_NUCLEUS_QUAD_LDD;
508                 off = (vm_offset_t)tsb_kernel;
509         } else {
510                 asi = ASI_PHYS_USE_EC;
511                 ldd = ASI_ATOMIC_QUAD_LDD_PHYS;
512                 off = (vm_offset_t)tsb_kernel_phys;
513         }
514         PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys);
515         PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1,
516             tsb_kernel_phys + tsb_kernel_size - 1);
517         PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi);
518         PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd);
519         PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off);
520         PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off);
521         PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask);
522         PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask);
523         PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi);
524         PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd);
525         PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off);
526         PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off);
527         PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask);
528         PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask);
529         PATCH_ASI(tl1_immu_miss_patch_asi_1, asi);
530         PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd);
531         PATCH_TSB(tl1_immu_miss_patch_tsb_1, off);
532         PATCH_TSB(tl1_immu_miss_patch_tsb_2, off);
533         PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask);
534         PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask);
535
536         /*
537          * Enter fake 8k pages for the 4MB kernel pages, so that
538          * pmap_kextract() will work for them.
539          */
540         for (i = 0; i < kernel_tlb_slots; i++) {
541                 pa = kernel_tlbs[i].te_pa;
542                 va = kernel_tlbs[i].te_va;
543                 for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
544                         tp = tsb_kvtotte(va + off);
545                         vpn = TV_VPN(va + off, TS_8K);
546                         data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF |
547                             TD_SW | TD_CP | TD_CV | TD_P | TD_W;
548                         pmap_bootstrap_set_tte(tp, vpn, data);
549                 }
550         }
551
552         /*
553          * Set the start and end of KVA.  The kernel is loaded starting
554          * at the first available 4MB super page, so we advance to the
555          * end of the last one used for it.
556          */
557         virtual_avail = KERNBASE + kernel_tlb_slots * PAGE_SIZE_4M;
558         virtual_end = vm_max_kernel_address;
559         kernel_vm_end = vm_max_kernel_address;
560
561         /*
562          * Allocate kva space for temporary mappings.
563          */
564         pmap_idle_map = virtual_avail;
565         virtual_avail += PAGE_SIZE * colors;
566         pmap_temp_map_1 = virtual_avail;
567         virtual_avail += PAGE_SIZE * colors;
568         pmap_temp_map_2 = virtual_avail;
569         virtual_avail += PAGE_SIZE * colors;
570
571         /*
572          * Allocate a kernel stack with guard page for thread0 and map it
573          * into the kernel TSB.  We must ensure that the virtual address is
574          * colored properly for corresponding CPUs, since we're allocating
575          * from phys_avail so the memory won't have an associated vm_page_t.
576          */
577         pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, colors);
578         kstack0_phys = pa;
579         virtual_avail += roundup(KSTACK_GUARD_PAGES, colors) * PAGE_SIZE;
580         kstack0 = virtual_avail;
581         virtual_avail += roundup(KSTACK_PAGES, colors) * PAGE_SIZE;
582         if (dcache_color_ignore == 0)
583                 KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
584                     ("pmap_bootstrap: kstack0 miscolored"));
585         for (i = 0; i < KSTACK_PAGES; i++) {
586                 pa = kstack0_phys + i * PAGE_SIZE;
587                 va = kstack0 + i * PAGE_SIZE;
588                 tp = tsb_kvtotte(va);
589                 vpn = TV_VPN(va, TS_8K);
590                 data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
591                     TD_CV | TD_P | TD_W;
592                 pmap_bootstrap_set_tte(tp, vpn, data);
593         }
594
595         /*
596          * Calculate the last available physical address.
597          */
598         for (i = 0; phys_avail[i + 2] != 0; i += 2)
599                 ;
600         Maxmem = sparc64_btop(phys_avail[i + 1]);
601
602         /*
603          * Add the PROM mappings to the kernel TSB.
604          */
605         if ((vmem = OF_finddevice("/virtual-memory")) == -1)
606                 OF_panic("%s: finddevice /virtual-memory", __func__);
607         if ((sz = OF_getproplen(vmem, "translations")) == -1)
608                 OF_panic("%s: getproplen translations", __func__);
609         if (sizeof(translations) < sz)
610                 OF_panic("%s: translations too small", __func__);
611         bzero(translations, sz);
612         if (OF_getprop(vmem, "translations", translations, sz) == -1)
613                 OF_panic("%s: getprop /virtual-memory/translations",
614                     __func__);
615         sz /= sizeof(*translations);
616         translations_size = sz;
617         CTR0(KTR_PMAP, "pmap_bootstrap: translations");
618         qsort(translations, sz, sizeof (*translations), om_cmp);
619         for (i = 0; i < sz; i++) {
620                 CTR3(KTR_PMAP,
621                     "translation: start=%#lx size=%#lx tte=%#lx",
622                     translations[i].om_start, translations[i].om_size,
623                     translations[i].om_tte);
624                 if ((translations[i].om_tte & TD_V) == 0)
625                         continue;
626                 if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
627                     translations[i].om_start > VM_MAX_PROM_ADDRESS)
628                         continue;
629                 for (off = 0; off < translations[i].om_size;
630                     off += PAGE_SIZE) {
631                         va = translations[i].om_start + off;
632                         tp = tsb_kvtotte(va);
633                         vpn = TV_VPN(va, TS_8K);
634                         data = ((translations[i].om_tte &
635                             ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) |
636                             (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
637                             cpu_impl < CPU_IMPL_ULTRASPARCIII ?
638                             (TD_DIAG_SF_MASK << TD_DIAG_SF_SHIFT) :
639                             (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) |
640                             (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) +
641                             off;
642                         pmap_bootstrap_set_tte(tp, vpn, data);
643                 }
644         }
645
646         /*
647          * Get the available physical memory ranges from /memory/reg.  These
648          * are only used for kernel dumps, but it may not be wise to do PROM
649          * calls in that situation.
650          */
651         if ((sz = OF_getproplen(pmem, "reg")) == -1)
652                 OF_panic("%s: getproplen /memory/reg", __func__);
653         if (sizeof(sparc64_memreg) < sz)
654                 OF_panic("%s: sparc64_memreg too small", __func__);
655         if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
656                 OF_panic("%s: getprop /memory/reg", __func__);
657         sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
658
659         /*
660          * Initialize the kernel pmap (which is statically allocated).
661          */
662         pm = kernel_pmap;
663         PMAP_LOCK_INIT(pm);
664         for (i = 0; i < MAXCPU; i++)
665                 pm->pm_context[i] = TLB_CTX_KERNEL;
666         CPU_FILL(&pm->pm_active);
667
668         /*
669          * Initialize the global tte list lock, which is more commonly
670          * known as the pmap pv global lock.
671          */
672         rw_init(&tte_list_global_lock, "pmap pv global");
673
674         /*
675          * Flush all non-locked TLB entries possibly left over by the
676          * firmware.
677          */
678         tlb_flush_nonlocked();
679 }
680
681 /*
682  * Map the 4MB kernel TSB pages.
683  */
684 void
685 pmap_map_tsb(void)
686 {
687         vm_offset_t va;
688         vm_paddr_t pa;
689         u_long data;
690         int i;
691
692         for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
693                 va = (vm_offset_t)tsb_kernel + i;
694                 pa = tsb_kernel_phys + i;
695                 data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
696                     TD_P | TD_W;
697                 stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
698                     TLB_TAR_CTX(TLB_CTX_KERNEL));
699                 stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
700         }
701 }
702
703 /*
704  * Set the secondary context to be the kernel context (needed for FP block
705  * operations in the kernel).
706  */
707 void
708 pmap_set_kctx(void)
709 {
710
711         stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) &
712             TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL);
713         flush(KERNBASE);
714 }
715
716 /*
717  * Allocate a physical page of memory directly from the phys_avail map.
718  * Can only be called from pmap_bootstrap before avail start and end are
719  * calculated.
720  */
721 static vm_paddr_t
722 pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
723 {
724         vm_paddr_t pa;
725         int i;
726
727         size = roundup(size, PAGE_SIZE * colors);
728         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
729                 if (phys_avail[i + 1] - phys_avail[i] < size)
730                         continue;
731                 pa = phys_avail[i];
732                 phys_avail[i] += size;
733                 return (pa);
734         }
735         OF_panic("%s: no suitable region found", __func__);
736 }
737
738 /*
739  * Set a TTE.  This function is intended as a helper when tsb_kernel is
740  * direct-mapped but we haven't taken over the trap table, yet, as it's the
741  * case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access
742  * the kernel TSB.
743  */
744 void
745 pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data)
746 {
747
748         if (tsb_kernel_ldd_phys == 0) {
749                 tp->tte_vpn = vpn;
750                 tp->tte_data = data;
751         } else {
752                 stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn),
753                     ASI_PHYS_USE_EC, vpn);
754                 stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data),
755                     ASI_PHYS_USE_EC, data);
756         }
757 }
758
759 /*
760  * Initialize a vm_page's machine-dependent fields.
761  */
762 void
763 pmap_page_init(vm_page_t m)
764 {
765
766         TAILQ_INIT(&m->md.tte_list);
767         m->md.color = DCACHE_COLOR(VM_PAGE_TO_PHYS(m));
768         m->md.pmap = NULL;
769 }
770
771 /*
772  * Initialize the pmap module.
773  */
774 void
775 pmap_init(void)
776 {
777         vm_offset_t addr;
778         vm_size_t size;
779         int result;
780         int i;
781
782         for (i = 0; i < translations_size; i++) {
783                 addr = translations[i].om_start;
784                 size = translations[i].om_size;
785                 if ((translations[i].om_tte & TD_V) == 0)
786                         continue;
787                 if (addr < VM_MIN_PROM_ADDRESS || addr > VM_MAX_PROM_ADDRESS)
788                         continue;
789                 result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0,
790                     VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
791                 if (result != KERN_SUCCESS || addr != translations[i].om_start)
792                         panic("pmap_init: vm_map_find");
793         }
794 }
795
796 /*
797  * Extract the physical page address associated with the given
798  * map/virtual_address pair.
799  */
800 vm_paddr_t
801 pmap_extract(pmap_t pm, vm_offset_t va)
802 {
803         struct tte *tp;
804         vm_paddr_t pa;
805
806         if (pm == kernel_pmap)
807                 return (pmap_kextract(va));
808         PMAP_LOCK(pm);
809         tp = tsb_tte_lookup(pm, va);
810         if (tp == NULL)
811                 pa = 0;
812         else
813                 pa = TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp));
814         PMAP_UNLOCK(pm);
815         return (pa);
816 }
817
818 /*
819  * Atomically extract and hold the physical page with the given
820  * pmap and virtual address pair if that mapping permits the given
821  * protection.
822  */
823 vm_page_t
824 pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot)
825 {
826         struct tte *tp;
827         vm_page_t m;
828         vm_paddr_t pa;
829
830         m = NULL;
831         pa = 0;
832         PMAP_LOCK(pm);
833 retry:
834         if (pm == kernel_pmap) {
835                 if (va >= VM_MIN_DIRECT_ADDRESS) {
836                         tp = NULL;
837                         m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
838                         (void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va),
839                             &pa);
840                         vm_page_hold(m);
841                 } else {
842                         tp = tsb_kvtotte(va);
843                         if ((tp->tte_data & TD_V) == 0)
844                                 tp = NULL;
845                 }
846         } else
847                 tp = tsb_tte_lookup(pm, va);
848         if (tp != NULL && ((tp->tte_data & TD_SW) ||
849             (prot & VM_PROT_WRITE) == 0)) {
850                 if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa))
851                         goto retry;
852                 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
853                 vm_page_hold(m);
854         }
855         PA_UNLOCK_COND(pa);
856         PMAP_UNLOCK(pm);
857         return (m);
858 }
859
860 /*
861  * Extract the physical page address associated with the given kernel virtual
862  * address.
863  */
864 vm_paddr_t
865 pmap_kextract(vm_offset_t va)
866 {
867         struct tte *tp;
868
869         if (va >= VM_MIN_DIRECT_ADDRESS)
870                 return (TLB_DIRECT_TO_PHYS(va));
871         tp = tsb_kvtotte(va);
872         if ((tp->tte_data & TD_V) == 0)
873                 return (0);
874         return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
875 }
876
877 int
878 pmap_cache_enter(vm_page_t m, vm_offset_t va)
879 {
880         struct tte *tp;
881         int color;
882
883         rw_assert(&tte_list_global_lock, RA_WLOCKED);
884         KASSERT((m->flags & PG_FICTITIOUS) == 0,
885             ("pmap_cache_enter: fake page"));
886         PMAP_STATS_INC(pmap_ncache_enter);
887
888         if (dcache_color_ignore != 0)
889                 return (1);
890
891         /*
892          * Find the color for this virtual address and note the added mapping.
893          */
894         color = DCACHE_COLOR(va);
895         m->md.colors[color]++;
896
897         /*
898          * If all existing mappings have the same color, the mapping is
899          * cacheable.
900          */
901         if (m->md.color == color) {
902                 KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] == 0,
903                     ("pmap_cache_enter: cacheable, mappings of other color"));
904                 if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
905                         PMAP_STATS_INC(pmap_ncache_enter_c);
906                 else
907                         PMAP_STATS_INC(pmap_ncache_enter_oc);
908                 return (1);
909         }
910
911         /*
912          * If there are no mappings of the other color, and the page still has
913          * the wrong color, this must be a new mapping.  Change the color to
914          * match the new mapping, which is cacheable.  We must flush the page
915          * from the cache now.
916          */
917         if (m->md.colors[DCACHE_OTHER_COLOR(color)] == 0) {
918                 KASSERT(m->md.colors[color] == 1,
919                     ("pmap_cache_enter: changing color, not new mapping"));
920                 dcache_page_inval(VM_PAGE_TO_PHYS(m));
921                 m->md.color = color;
922                 if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
923                         PMAP_STATS_INC(pmap_ncache_enter_cc);
924                 else
925                         PMAP_STATS_INC(pmap_ncache_enter_coc);
926                 return (1);
927         }
928
929         /*
930          * If the mapping is already non-cacheable, just return.
931          */
932         if (m->md.color == -1) {
933                 PMAP_STATS_INC(pmap_ncache_enter_nc);
934                 return (0);
935         }
936
937         PMAP_STATS_INC(pmap_ncache_enter_cnc);
938
939         /*
940          * Mark all mappings as uncacheable, flush any lines with the other
941          * color out of the dcache, and set the color to none (-1).
942          */
943         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
944                 atomic_clear_long(&tp->tte_data, TD_CV);
945                 tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
946         }
947         dcache_page_inval(VM_PAGE_TO_PHYS(m));
948         m->md.color = -1;
949         return (0);
950 }
951
952 static void
953 pmap_cache_remove(vm_page_t m, vm_offset_t va)
954 {
955         struct tte *tp;
956         int color;
957
958         rw_assert(&tte_list_global_lock, RA_WLOCKED);
959         CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
960             m->md.colors[DCACHE_COLOR(va)]);
961         KASSERT((m->flags & PG_FICTITIOUS) == 0,
962             ("pmap_cache_remove: fake page"));
963         PMAP_STATS_INC(pmap_ncache_remove);
964
965         if (dcache_color_ignore != 0)
966                 return;
967
968         KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
969             ("pmap_cache_remove: no mappings %d <= 0",
970             m->md.colors[DCACHE_COLOR(va)]));
971
972         /*
973          * Find the color for this virtual address and note the removal of
974          * the mapping.
975          */
976         color = DCACHE_COLOR(va);
977         m->md.colors[color]--;
978
979         /*
980          * If the page is cacheable, just return and keep the same color, even
981          * if there are no longer any mappings.
982          */
983         if (m->md.color != -1) {
984                 if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
985                         PMAP_STATS_INC(pmap_ncache_remove_c);
986                 else
987                         PMAP_STATS_INC(pmap_ncache_remove_oc);
988                 return;
989         }
990
991         KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] != 0,
992             ("pmap_cache_remove: uncacheable, no mappings of other color"));
993
994         /*
995          * If the page is not cacheable (color is -1), and the number of
996          * mappings for this color is not zero, just return.  There are
997          * mappings of the other color still, so remain non-cacheable.
998          */
999         if (m->md.colors[color] != 0) {
1000                 PMAP_STATS_INC(pmap_ncache_remove_nc);
1001                 return;
1002         }
1003
1004         /*
1005          * The number of mappings for this color is now zero.  Recache the
1006          * other colored mappings, and change the page color to the other
1007          * color.  There should be no lines in the data cache for this page,
1008          * so flushing should not be needed.
1009          */
1010         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1011                 atomic_set_long(&tp->tte_data, TD_CV);
1012                 tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1013         }
1014         m->md.color = DCACHE_OTHER_COLOR(color);
1015
1016         if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
1017                 PMAP_STATS_INC(pmap_ncache_remove_cc);
1018         else
1019                 PMAP_STATS_INC(pmap_ncache_remove_coc);
1020 }
1021
1022 /*
1023  * Map a wired page into kernel virtual address space.
1024  */
1025 void
1026 pmap_kenter(vm_offset_t va, vm_page_t m)
1027 {
1028         vm_offset_t ova;
1029         struct tte *tp;
1030         vm_page_t om;
1031         u_long data;
1032
1033         rw_assert(&tte_list_global_lock, RA_WLOCKED);
1034         PMAP_STATS_INC(pmap_nkenter);
1035         tp = tsb_kvtotte(va);
1036         CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
1037             va, VM_PAGE_TO_PHYS(m), tp, tp->tte_data);
1038         if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
1039                 CTR5(KTR_SPARE2,
1040         "pmap_kenter: off color va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
1041                     va, VM_PAGE_TO_PHYS(m), m->object,
1042                     m->object ? m->object->type : -1,
1043                     m->pindex);
1044                 PMAP_STATS_INC(pmap_nkenter_oc);
1045         }
1046         if ((tp->tte_data & TD_V) != 0) {
1047                 om = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1048                 ova = TTE_GET_VA(tp);
1049                 if (m == om && va == ova) {
1050                         PMAP_STATS_INC(pmap_nkenter_stupid);
1051                         return;
1052                 }
1053                 TAILQ_REMOVE(&om->md.tte_list, tp, tte_link);
1054                 pmap_cache_remove(om, ova);
1055                 if (va != ova)
1056                         tlb_page_demap(kernel_pmap, ova);
1057         }
1058         data = TD_V | TD_8K | VM_PAGE_TO_PHYS(m) | TD_REF | TD_SW | TD_CP |
1059             TD_P | TD_W;
1060         if (pmap_cache_enter(m, va) != 0)
1061                 data |= TD_CV;
1062         tp->tte_vpn = TV_VPN(va, TS_8K);
1063         tp->tte_data = data;
1064         TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
1065 }
1066
1067 /*
1068  * Map a wired page into kernel virtual address space.  This additionally
1069  * takes a flag argument which is or'ed to the TTE data.  This is used by
1070  * sparc64_bus_mem_map().
1071  * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
1072  * to flush entries that might still be in the cache, if applicable.
1073  */
1074 void
1075 pmap_kenter_flags(vm_offset_t va, vm_paddr_t pa, u_long flags)
1076 {
1077         struct tte *tp;
1078
1079         tp = tsb_kvtotte(va);
1080         CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
1081             va, pa, tp, tp->tte_data);
1082         tp->tte_vpn = TV_VPN(va, TS_8K);
1083         tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
1084 }
1085
1086 /*
1087  * Remove a wired page from kernel virtual address space.
1088  */
1089 void
1090 pmap_kremove(vm_offset_t va)
1091 {
1092         struct tte *tp;
1093         vm_page_t m;
1094
1095         rw_assert(&tte_list_global_lock, RA_WLOCKED);
1096         PMAP_STATS_INC(pmap_nkremove);
1097         tp = tsb_kvtotte(va);
1098         CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
1099             tp->tte_data);
1100         if ((tp->tte_data & TD_V) == 0)
1101                 return;
1102         m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1103         TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1104         pmap_cache_remove(m, va);
1105         TTE_ZERO(tp);
1106 }
1107
1108 /*
1109  * Inverse of pmap_kenter_flags, used by bus_space_unmap().
1110  */
1111 void
1112 pmap_kremove_flags(vm_offset_t va)
1113 {
1114         struct tte *tp;
1115
1116         tp = tsb_kvtotte(va);
1117         CTR3(KTR_PMAP, "pmap_kremove_flags: va=%#lx tp=%p data=%#lx", va, tp,
1118             tp->tte_data);
1119         TTE_ZERO(tp);
1120 }
1121
1122 /*
1123  * Map a range of physical addresses into kernel virtual address space.
1124  *
1125  * The value passed in *virt is a suggested virtual address for the mapping.
1126  * Architectures which can support a direct-mapped physical to virtual region
1127  * can return the appropriate address within that region, leaving '*virt'
1128  * unchanged.
1129  */
1130 vm_offset_t
1131 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1132 {
1133
1134         return (TLB_PHYS_TO_DIRECT(start));
1135 }
1136
1137 /*
1138  * Map a list of wired pages into kernel virtual address space.  This is
1139  * intended for temporary mappings which do not need page modification or
1140  * references recorded.  Existing mappings in the region are overwritten.
1141  */
1142 void
1143 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
1144 {
1145         vm_offset_t va;
1146
1147         PMAP_STATS_INC(pmap_nqenter);
1148         va = sva;
1149         rw_wlock(&tte_list_global_lock);
1150         while (count-- > 0) {
1151                 pmap_kenter(va, *m);
1152                 va += PAGE_SIZE;
1153                 m++;
1154         }
1155         rw_wunlock(&tte_list_global_lock);
1156         tlb_range_demap(kernel_pmap, sva, va);
1157 }
1158
1159 /*
1160  * Remove page mappings from kernel virtual address space.  Intended for
1161  * temporary mappings entered by pmap_qenter.
1162  */
1163 void
1164 pmap_qremove(vm_offset_t sva, int count)
1165 {
1166         vm_offset_t va;
1167
1168         PMAP_STATS_INC(pmap_nqremove);
1169         va = sva;
1170         rw_wlock(&tte_list_global_lock);
1171         while (count-- > 0) {
1172                 pmap_kremove(va);
1173                 va += PAGE_SIZE;
1174         }
1175         rw_wunlock(&tte_list_global_lock);
1176         tlb_range_demap(kernel_pmap, sva, va);
1177 }
1178
1179 /*
1180  * Initialize the pmap associated with process 0.
1181  */
1182 void
1183 pmap_pinit0(pmap_t pm)
1184 {
1185         int i;
1186
1187         PMAP_LOCK_INIT(pm);
1188         for (i = 0; i < MAXCPU; i++)
1189                 pm->pm_context[i] = TLB_CTX_KERNEL;
1190         CPU_ZERO(&pm->pm_active);
1191         pm->pm_tsb = NULL;
1192         pm->pm_tsb_obj = NULL;
1193         bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1194 }
1195
1196 /*
1197  * Initialize a preallocated and zeroed pmap structure, such as one in a
1198  * vmspace structure.
1199  */
1200 int
1201 pmap_pinit(pmap_t pm)
1202 {
1203         vm_page_t ma[TSB_PAGES];
1204         vm_page_t m;
1205         int i;
1206
1207         /*
1208          * Allocate KVA space for the TSB.
1209          */
1210         if (pm->pm_tsb == NULL) {
1211                 pm->pm_tsb = (struct tte *)kva_alloc(TSB_BSIZE);
1212                 if (pm->pm_tsb == NULL) {
1213                         PMAP_LOCK_DESTROY(pm);
1214                         return (0);
1215                 }
1216         }
1217
1218         /*
1219          * Allocate an object for it.
1220          */
1221         if (pm->pm_tsb_obj == NULL)
1222                 pm->pm_tsb_obj = vm_object_allocate(OBJT_PHYS, TSB_PAGES);
1223
1224         for (i = 0; i < MAXCPU; i++)
1225                 pm->pm_context[i] = -1;
1226         CPU_ZERO(&pm->pm_active);
1227
1228         VM_OBJECT_WLOCK(pm->pm_tsb_obj);
1229         for (i = 0; i < TSB_PAGES; i++) {
1230                 m = vm_page_grab(pm->pm_tsb_obj, i, VM_ALLOC_NOBUSY |
1231                     VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1232                 m->valid = VM_PAGE_BITS_ALL;
1233                 m->md.pmap = pm;
1234                 ma[i] = m;
1235         }
1236         VM_OBJECT_WUNLOCK(pm->pm_tsb_obj);
1237         pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1238
1239         bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1240         return (1);
1241 }
1242
1243 /*
1244  * Release any resources held by the given physical map.
1245  * Called when a pmap initialized by pmap_pinit is being released.
1246  * Should only be called if the map contains no valid mappings.
1247  */
1248 void
1249 pmap_release(pmap_t pm)
1250 {
1251         vm_object_t obj;
1252         vm_page_t m;
1253 #ifdef SMP
1254         struct pcpu *pc;
1255 #endif
1256
1257         CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1258             pm->pm_context[curcpu], pm->pm_tsb);
1259         KASSERT(pmap_resident_count(pm) == 0,
1260             ("pmap_release: resident pages %ld != 0",
1261             pmap_resident_count(pm)));
1262
1263         /*
1264          * After the pmap was freed, it might be reallocated to a new process.
1265          * When switching, this might lead us to wrongly assume that we need
1266          * not switch contexts because old and new pmap pointer are equal.
1267          * Therefore, make sure that this pmap is not referenced by any PCPU
1268          * pointer any more.  This could happen in two cases:
1269          * - A process that referenced the pmap is currently exiting on a CPU.
1270          *   However, it is guaranteed to not switch in any more after setting
1271          *   its state to PRS_ZOMBIE.
1272          * - A process that referenced this pmap ran on a CPU, but we switched
1273          *   to a kernel thread, leaving the pmap pointer unchanged.
1274          */
1275 #ifdef SMP
1276         sched_pin();
1277         STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
1278                 atomic_cmpset_rel_ptr((uintptr_t *)&pc->pc_pmap,
1279                     (uintptr_t)pm, (uintptr_t)NULL);
1280         sched_unpin();
1281 #else
1282         critical_enter();
1283         if (PCPU_GET(pmap) == pm)
1284                 PCPU_SET(pmap, NULL);
1285         critical_exit();
1286 #endif
1287
1288         pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1289         obj = pm->pm_tsb_obj;
1290         VM_OBJECT_WLOCK(obj);
1291         KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1292         while (!TAILQ_EMPTY(&obj->memq)) {
1293                 m = TAILQ_FIRST(&obj->memq);
1294                 m->md.pmap = NULL;
1295                 m->wire_count--;
1296                 atomic_subtract_int(&cnt.v_wire_count, 1);
1297                 vm_page_free_zero(m);
1298         }
1299         VM_OBJECT_WUNLOCK(obj);
1300 }
1301
1302 /*
1303  * Grow the number of kernel page table entries.  Unneeded.
1304  */
1305 void
1306 pmap_growkernel(vm_offset_t addr)
1307 {
1308
1309         panic("pmap_growkernel: can't grow kernel");
1310 }
1311
1312 int
1313 pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1314     vm_offset_t va)
1315 {
1316         vm_page_t m;
1317         u_long data;
1318
1319         rw_assert(&tte_list_global_lock, RA_WLOCKED);
1320         data = atomic_readandclear_long(&tp->tte_data);
1321         if ((data & TD_FAKE) == 0) {
1322                 m = PHYS_TO_VM_PAGE(TD_PA(data));
1323                 TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1324                 if ((data & TD_WIRED) != 0)
1325                         pm->pm_stats.wired_count--;
1326                 if ((data & TD_PV) != 0) {
1327                         if ((data & TD_W) != 0)
1328                                 vm_page_dirty(m);
1329                         if ((data & TD_REF) != 0)
1330                                 vm_page_aflag_set(m, PGA_REFERENCED);
1331                         if (TAILQ_EMPTY(&m->md.tte_list))
1332                                 vm_page_aflag_clear(m, PGA_WRITEABLE);
1333                         pm->pm_stats.resident_count--;
1334                 }
1335                 pmap_cache_remove(m, va);
1336         }
1337         TTE_ZERO(tp);
1338         if (PMAP_REMOVE_DONE(pm))
1339                 return (0);
1340         return (1);
1341 }
1342
1343 /*
1344  * Remove the given range of addresses from the specified map.
1345  */
1346 void
1347 pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1348 {
1349         struct tte *tp;
1350         vm_offset_t va;
1351
1352         CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1353             pm->pm_context[curcpu], start, end);
1354         if (PMAP_REMOVE_DONE(pm))
1355                 return;
1356         rw_wlock(&tte_list_global_lock);
1357         PMAP_LOCK(pm);
1358         if (end - start > PMAP_TSB_THRESH) {
1359                 tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1360                 tlb_context_demap(pm);
1361         } else {
1362                 for (va = start; va < end; va += PAGE_SIZE)
1363                         if ((tp = tsb_tte_lookup(pm, va)) != NULL &&
1364                             !pmap_remove_tte(pm, NULL, tp, va))
1365                                 break;
1366                 tlb_range_demap(pm, start, end - 1);
1367         }
1368         PMAP_UNLOCK(pm);
1369         rw_wunlock(&tte_list_global_lock);
1370 }
1371
1372 void
1373 pmap_remove_all(vm_page_t m)
1374 {
1375         struct pmap *pm;
1376         struct tte *tpn;
1377         struct tte *tp;
1378         vm_offset_t va;
1379
1380         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1381             ("pmap_remove_all: page %p is not managed", m));
1382         rw_wlock(&tte_list_global_lock);
1383         for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
1384                 tpn = TAILQ_NEXT(tp, tte_link);
1385                 if ((tp->tte_data & TD_PV) == 0)
1386                         continue;
1387                 pm = TTE_GET_PMAP(tp);
1388                 va = TTE_GET_VA(tp);
1389                 PMAP_LOCK(pm);
1390                 if ((tp->tte_data & TD_WIRED) != 0)
1391                         pm->pm_stats.wired_count--;
1392                 if ((tp->tte_data & TD_REF) != 0)
1393                         vm_page_aflag_set(m, PGA_REFERENCED);
1394                 if ((tp->tte_data & TD_W) != 0)
1395                         vm_page_dirty(m);
1396                 tp->tte_data &= ~TD_V;
1397                 tlb_page_demap(pm, va);
1398                 TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1399                 pm->pm_stats.resident_count--;
1400                 pmap_cache_remove(m, va);
1401                 TTE_ZERO(tp);
1402                 PMAP_UNLOCK(pm);
1403         }
1404         vm_page_aflag_clear(m, PGA_WRITEABLE);
1405         rw_wunlock(&tte_list_global_lock);
1406 }
1407
1408 static int
1409 pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1410     vm_offset_t va)
1411 {
1412         u_long data;
1413         vm_page_t m;
1414
1415         PMAP_LOCK_ASSERT(pm, MA_OWNED);
1416         data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
1417         if ((data & (TD_PV | TD_W)) == (TD_PV | TD_W)) {
1418                 m = PHYS_TO_VM_PAGE(TD_PA(data));
1419                 vm_page_dirty(m);
1420         }
1421         return (1);
1422 }
1423
1424 /*
1425  * Set the physical protection on the specified range of this map as requested.
1426  */
1427 void
1428 pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1429 {
1430         vm_offset_t va;
1431         struct tte *tp;
1432
1433         CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1434             pm->pm_context[curcpu], sva, eva, prot);
1435
1436         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1437                 pmap_remove(pm, sva, eva);
1438                 return;
1439         }
1440
1441         if (prot & VM_PROT_WRITE)
1442                 return;
1443
1444         PMAP_LOCK(pm);
1445         if (eva - sva > PMAP_TSB_THRESH) {
1446                 tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1447                 tlb_context_demap(pm);
1448         } else {
1449                 for (va = sva; va < eva; va += PAGE_SIZE)
1450                         if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1451                                 pmap_protect_tte(pm, NULL, tp, va);
1452                 tlb_range_demap(pm, sva, eva - 1);
1453         }
1454         PMAP_UNLOCK(pm);
1455 }
1456
1457 /*
1458  * Map the given physical page at the specified virtual address in the
1459  * target pmap with the protection requested.  If specified the page
1460  * will be wired down.
1461  */
1462 void
1463 pmap_enter(pmap_t pm, vm_offset_t va, vm_prot_t access, vm_page_t m,
1464     vm_prot_t prot, boolean_t wired)
1465 {
1466
1467         rw_wlock(&tte_list_global_lock);
1468         PMAP_LOCK(pm);
1469         pmap_enter_locked(pm, va, m, prot, wired);
1470         rw_wunlock(&tte_list_global_lock);
1471         PMAP_UNLOCK(pm);
1472 }
1473
1474 /*
1475  * Map the given physical page at the specified virtual address in the
1476  * target pmap with the protection requested.  If specified the page
1477  * will be wired down.
1478  *
1479  * The page queues and pmap must be locked.
1480  */
1481 static void
1482 pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1483     boolean_t wired)
1484 {
1485         struct tte *tp;
1486         vm_paddr_t pa;
1487         vm_page_t real;
1488         u_long data;
1489
1490         rw_assert(&tte_list_global_lock, RA_WLOCKED);
1491         PMAP_LOCK_ASSERT(pm, MA_OWNED);
1492         if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
1493                 VM_OBJECT_ASSERT_LOCKED(m->object);
1494         PMAP_STATS_INC(pmap_nenter);
1495         pa = VM_PAGE_TO_PHYS(m);
1496
1497         /*
1498          * If this is a fake page from the device_pager, but it covers actual
1499          * physical memory, convert to the real backing page.
1500          */
1501         if ((m->flags & PG_FICTITIOUS) != 0) {
1502                 real = vm_phys_paddr_to_vm_page(pa);
1503                 if (real != NULL)
1504                         m = real;
1505         }
1506
1507         CTR6(KTR_PMAP,
1508             "pmap_enter_locked: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1509             pm->pm_context[curcpu], m, va, pa, prot, wired);
1510
1511         /*
1512          * If there is an existing mapping, and the physical address has not
1513          * changed, must be protection or wiring change.
1514          */
1515         if ((tp = tsb_tte_lookup(pm, va)) != NULL && TTE_GET_PA(tp) == pa) {
1516                 CTR0(KTR_PMAP, "pmap_enter_locked: update");
1517                 PMAP_STATS_INC(pmap_nenter_update);
1518
1519                 /*
1520                  * Wiring change, just update stats.
1521                  */
1522                 if (wired) {
1523                         if ((tp->tte_data & TD_WIRED) == 0) {
1524                                 tp->tte_data |= TD_WIRED;
1525                                 pm->pm_stats.wired_count++;
1526                         }
1527                 } else {
1528                         if ((tp->tte_data & TD_WIRED) != 0) {
1529                                 tp->tte_data &= ~TD_WIRED;
1530                                 pm->pm_stats.wired_count--;
1531                         }
1532                 }
1533
1534                 /*
1535                  * Save the old bits and clear the ones we're interested in.
1536                  */
1537                 data = tp->tte_data;
1538                 tp->tte_data &= ~(TD_EXEC | TD_SW | TD_W);
1539
1540                 /*
1541                  * If we're turning off write permissions, sense modify status.
1542                  */
1543                 if ((prot & VM_PROT_WRITE) != 0) {
1544                         tp->tte_data |= TD_SW;
1545                         if (wired)
1546                                 tp->tte_data |= TD_W;
1547                         if ((m->oflags & VPO_UNMANAGED) == 0)
1548                                 vm_page_aflag_set(m, PGA_WRITEABLE);
1549                 } else if ((data & TD_W) != 0)
1550                         vm_page_dirty(m);
1551
1552                 /*
1553                  * If we're turning on execute permissions, flush the icache.
1554                  */
1555                 if ((prot & VM_PROT_EXECUTE) != 0) {
1556                         if ((data & TD_EXEC) == 0)
1557                                 icache_page_inval(pa);
1558                         tp->tte_data |= TD_EXEC;
1559                 }
1560
1561                 /*
1562                  * Delete the old mapping.
1563                  */
1564                 tlb_page_demap(pm, TTE_GET_VA(tp));
1565         } else {
1566                 /*
1567                  * If there is an existing mapping, but its for a different
1568                  * physical address, delete the old mapping.
1569                  */
1570                 if (tp != NULL) {
1571                         CTR0(KTR_PMAP, "pmap_enter_locked: replace");
1572                         PMAP_STATS_INC(pmap_nenter_replace);
1573                         pmap_remove_tte(pm, NULL, tp, va);
1574                         tlb_page_demap(pm, va);
1575                 } else {
1576                         CTR0(KTR_PMAP, "pmap_enter_locked: new");
1577                         PMAP_STATS_INC(pmap_nenter_new);
1578                 }
1579
1580                 /*
1581                  * Now set up the data and install the new mapping.
1582                  */
1583                 data = TD_V | TD_8K | TD_PA(pa);
1584                 if (pm == kernel_pmap)
1585                         data |= TD_P;
1586                 if ((prot & VM_PROT_WRITE) != 0) {
1587                         data |= TD_SW;
1588                         if ((m->oflags & VPO_UNMANAGED) == 0)
1589                                 vm_page_aflag_set(m, PGA_WRITEABLE);
1590                 }
1591                 if (prot & VM_PROT_EXECUTE) {
1592                         data |= TD_EXEC;
1593                         icache_page_inval(pa);
1594                 }
1595
1596                 /*
1597                  * If its wired update stats.  We also don't need reference or
1598                  * modify tracking for wired mappings, so set the bits now.
1599                  */
1600                 if (wired) {
1601                         pm->pm_stats.wired_count++;
1602                         data |= TD_REF | TD_WIRED;
1603                         if ((prot & VM_PROT_WRITE) != 0)
1604                                 data |= TD_W;
1605                 }
1606
1607                 tsb_tte_enter(pm, m, va, TS_8K, data);
1608         }
1609 }
1610
1611 /*
1612  * Maps a sequence of resident pages belonging to the same object.
1613  * The sequence begins with the given page m_start.  This page is
1614  * mapped at the given virtual address start.  Each subsequent page is
1615  * mapped at a virtual address that is offset from start by the same
1616  * amount as the page is offset from m_start within the object.  The
1617  * last page in the sequence is the page with the largest offset from
1618  * m_start that can be mapped at a virtual address less than the given
1619  * virtual address end.  Not every virtual page between start and end
1620  * is mapped; only those for which a resident page exists with the
1621  * corresponding offset from m_start are mapped.
1622  */
1623 void
1624 pmap_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1625     vm_page_t m_start, vm_prot_t prot)
1626 {
1627         vm_page_t m;
1628         vm_pindex_t diff, psize;
1629
1630         VM_OBJECT_ASSERT_LOCKED(m_start->object);
1631
1632         psize = atop(end - start);
1633         m = m_start;
1634         rw_wlock(&tte_list_global_lock);
1635         PMAP_LOCK(pm);
1636         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1637                 pmap_enter_locked(pm, start + ptoa(diff), m, prot &
1638                     (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
1639                 m = TAILQ_NEXT(m, listq);
1640         }
1641         rw_wunlock(&tte_list_global_lock);
1642         PMAP_UNLOCK(pm);
1643 }
1644
1645 void
1646 pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1647 {
1648
1649         rw_wlock(&tte_list_global_lock);
1650         PMAP_LOCK(pm);
1651         pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1652             FALSE);
1653         rw_wunlock(&tte_list_global_lock);
1654         PMAP_UNLOCK(pm);
1655 }
1656
1657 void
1658 pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1659     vm_pindex_t pindex, vm_size_t size)
1660 {
1661
1662         VM_OBJECT_ASSERT_WLOCKED(object);
1663         KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1664             ("pmap_object_init_pt: non-device object"));
1665 }
1666
1667 /*
1668  * Change the wiring attribute for a map/virtual-address pair.
1669  * The mapping must already exist in the pmap.
1670  */
1671 void
1672 pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired)
1673 {
1674         struct tte *tp;
1675         u_long data;
1676
1677         PMAP_LOCK(pm);
1678         if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1679                 if (wired) {
1680                         data = atomic_set_long(&tp->tte_data, TD_WIRED);
1681                         if ((data & TD_WIRED) == 0)
1682                                 pm->pm_stats.wired_count++;
1683                 } else {
1684                         data = atomic_clear_long(&tp->tte_data, TD_WIRED);
1685                         if ((data & TD_WIRED) != 0)
1686                                 pm->pm_stats.wired_count--;
1687                 }
1688         }
1689         PMAP_UNLOCK(pm);
1690 }
1691
1692 static int
1693 pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp,
1694     vm_offset_t va)
1695 {
1696         vm_page_t m;
1697         u_long data;
1698
1699         if ((tp->tte_data & TD_FAKE) != 0)
1700                 return (1);
1701         if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1702                 data = tp->tte_data &
1703                     ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1704                 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1705                 tsb_tte_enter(dst_pmap, m, va, TS_8K, data);
1706         }
1707         return (1);
1708 }
1709
1710 void
1711 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1712     vm_size_t len, vm_offset_t src_addr)
1713 {
1714         struct tte *tp;
1715         vm_offset_t va;
1716
1717         if (dst_addr != src_addr)
1718                 return;
1719         rw_wlock(&tte_list_global_lock);
1720         if (dst_pmap < src_pmap) {
1721                 PMAP_LOCK(dst_pmap);
1722                 PMAP_LOCK(src_pmap);
1723         } else {
1724                 PMAP_LOCK(src_pmap);
1725                 PMAP_LOCK(dst_pmap);
1726         }
1727         if (len > PMAP_TSB_THRESH) {
1728                 tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1729                     pmap_copy_tte);
1730                 tlb_context_demap(dst_pmap);
1731         } else {
1732                 for (va = src_addr; va < src_addr + len; va += PAGE_SIZE)
1733                         if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1734                                 pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1735                 tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1736         }
1737         rw_wunlock(&tte_list_global_lock);
1738         PMAP_UNLOCK(src_pmap);
1739         PMAP_UNLOCK(dst_pmap);
1740 }
1741
1742 void
1743 pmap_zero_page(vm_page_t m)
1744 {
1745         struct tte *tp;
1746         vm_offset_t va;
1747         vm_paddr_t pa;
1748
1749         KASSERT((m->flags & PG_FICTITIOUS) == 0,
1750             ("pmap_zero_page: fake page"));
1751         PMAP_STATS_INC(pmap_nzero_page);
1752         pa = VM_PAGE_TO_PHYS(m);
1753         if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1754                 PMAP_STATS_INC(pmap_nzero_page_c);
1755                 va = TLB_PHYS_TO_DIRECT(pa);
1756                 cpu_block_zero((void *)va, PAGE_SIZE);
1757         } else if (m->md.color == -1) {
1758                 PMAP_STATS_INC(pmap_nzero_page_nc);
1759                 aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1760         } else {
1761                 PMAP_STATS_INC(pmap_nzero_page_oc);
1762                 PMAP_LOCK(kernel_pmap);
1763                 va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1764                 tp = tsb_kvtotte(va);
1765                 tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1766                 tp->tte_vpn = TV_VPN(va, TS_8K);
1767                 cpu_block_zero((void *)va, PAGE_SIZE);
1768                 tlb_page_demap(kernel_pmap, va);
1769                 PMAP_UNLOCK(kernel_pmap);
1770         }
1771 }
1772
1773 void
1774 pmap_zero_page_area(vm_page_t m, int off, int size)
1775 {
1776         struct tte *tp;
1777         vm_offset_t va;
1778         vm_paddr_t pa;
1779
1780         KASSERT((m->flags & PG_FICTITIOUS) == 0,
1781             ("pmap_zero_page_area: fake page"));
1782         KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1783         PMAP_STATS_INC(pmap_nzero_page_area);
1784         pa = VM_PAGE_TO_PHYS(m);
1785         if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1786                 PMAP_STATS_INC(pmap_nzero_page_area_c);
1787                 va = TLB_PHYS_TO_DIRECT(pa);
1788                 bzero((void *)(va + off), size);
1789         } else if (m->md.color == -1) {
1790                 PMAP_STATS_INC(pmap_nzero_page_area_nc);
1791                 aszero(ASI_PHYS_USE_EC, pa + off, size);
1792         } else {
1793                 PMAP_STATS_INC(pmap_nzero_page_area_oc);
1794                 PMAP_LOCK(kernel_pmap);
1795                 va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1796                 tp = tsb_kvtotte(va);
1797                 tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1798                 tp->tte_vpn = TV_VPN(va, TS_8K);
1799                 bzero((void *)(va + off), size);
1800                 tlb_page_demap(kernel_pmap, va);
1801                 PMAP_UNLOCK(kernel_pmap);
1802         }
1803 }
1804
1805 void
1806 pmap_zero_page_idle(vm_page_t m)
1807 {
1808         struct tte *tp;
1809         vm_offset_t va;
1810         vm_paddr_t pa;
1811
1812         KASSERT((m->flags & PG_FICTITIOUS) == 0,
1813             ("pmap_zero_page_idle: fake page"));
1814         PMAP_STATS_INC(pmap_nzero_page_idle);
1815         pa = VM_PAGE_TO_PHYS(m);
1816         if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1817                 PMAP_STATS_INC(pmap_nzero_page_idle_c);
1818                 va = TLB_PHYS_TO_DIRECT(pa);
1819                 cpu_block_zero((void *)va, PAGE_SIZE);
1820         } else if (m->md.color == -1) {
1821                 PMAP_STATS_INC(pmap_nzero_page_idle_nc);
1822                 aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1823         } else {
1824                 PMAP_STATS_INC(pmap_nzero_page_idle_oc);
1825                 va = pmap_idle_map + (m->md.color * PAGE_SIZE);
1826                 tp = tsb_kvtotte(va);
1827                 tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1828                 tp->tte_vpn = TV_VPN(va, TS_8K);
1829                 cpu_block_zero((void *)va, PAGE_SIZE);
1830                 tlb_page_demap(kernel_pmap, va);
1831         }
1832 }
1833
1834 void
1835 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1836 {
1837         vm_offset_t vdst;
1838         vm_offset_t vsrc;
1839         vm_paddr_t pdst;
1840         vm_paddr_t psrc;
1841         struct tte *tp;
1842
1843         KASSERT((mdst->flags & PG_FICTITIOUS) == 0,
1844             ("pmap_copy_page: fake dst page"));
1845         KASSERT((msrc->flags & PG_FICTITIOUS) == 0,
1846             ("pmap_copy_page: fake src page"));
1847         PMAP_STATS_INC(pmap_ncopy_page);
1848         pdst = VM_PAGE_TO_PHYS(mdst);
1849         psrc = VM_PAGE_TO_PHYS(msrc);
1850         if (dcache_color_ignore != 0 ||
1851             (msrc->md.color == DCACHE_COLOR(psrc) &&
1852             mdst->md.color == DCACHE_COLOR(pdst))) {
1853                 PMAP_STATS_INC(pmap_ncopy_page_c);
1854                 vdst = TLB_PHYS_TO_DIRECT(pdst);
1855                 vsrc = TLB_PHYS_TO_DIRECT(psrc);
1856                 cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1857         } else if (msrc->md.color == -1 && mdst->md.color == -1) {
1858                 PMAP_STATS_INC(pmap_ncopy_page_nc);
1859                 ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
1860         } else if (msrc->md.color == -1) {
1861                 if (mdst->md.color == DCACHE_COLOR(pdst)) {
1862                         PMAP_STATS_INC(pmap_ncopy_page_dc);
1863                         vdst = TLB_PHYS_TO_DIRECT(pdst);
1864                         ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1865                             PAGE_SIZE);
1866                 } else {
1867                         PMAP_STATS_INC(pmap_ncopy_page_doc);
1868                         PMAP_LOCK(kernel_pmap);
1869                         vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1870                         tp = tsb_kvtotte(vdst);
1871                         tp->tte_data =
1872                             TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1873                         tp->tte_vpn = TV_VPN(vdst, TS_8K);
1874                         ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1875                             PAGE_SIZE);
1876                         tlb_page_demap(kernel_pmap, vdst);
1877                         PMAP_UNLOCK(kernel_pmap);
1878                 }
1879         } else if (mdst->md.color == -1) {
1880                 if (msrc->md.color == DCACHE_COLOR(psrc)) {
1881                         PMAP_STATS_INC(pmap_ncopy_page_sc);
1882                         vsrc = TLB_PHYS_TO_DIRECT(psrc);
1883                         ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1884                             PAGE_SIZE);
1885                 } else {
1886                         PMAP_STATS_INC(pmap_ncopy_page_soc);
1887                         PMAP_LOCK(kernel_pmap);
1888                         vsrc = pmap_temp_map_1 + (msrc->md.color * PAGE_SIZE);
1889                         tp = tsb_kvtotte(vsrc);
1890                         tp->tte_data =
1891                             TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1892                         tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1893                         ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1894                             PAGE_SIZE);
1895                         tlb_page_demap(kernel_pmap, vsrc);
1896                         PMAP_UNLOCK(kernel_pmap);
1897                 }
1898         } else {
1899                 PMAP_STATS_INC(pmap_ncopy_page_oc);
1900                 PMAP_LOCK(kernel_pmap);
1901                 vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1902                 tp = tsb_kvtotte(vdst);
1903                 tp->tte_data =
1904                     TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1905                 tp->tte_vpn = TV_VPN(vdst, TS_8K);
1906                 vsrc = pmap_temp_map_2 + (msrc->md.color * PAGE_SIZE);
1907                 tp = tsb_kvtotte(vsrc);
1908                 tp->tte_data =
1909                     TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1910                 tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1911                 cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1912                 tlb_page_demap(kernel_pmap, vdst);
1913                 tlb_page_demap(kernel_pmap, vsrc);
1914                 PMAP_UNLOCK(kernel_pmap);
1915         }
1916 }
1917
1918 int unmapped_buf_allowed;
1919
1920 void
1921 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
1922     vm_offset_t b_offset, int xfersize)
1923 {
1924
1925         panic("pmap_copy_pages: not implemented");
1926 }
1927
1928 /*
1929  * Returns true if the pmap's pv is one of the first
1930  * 16 pvs linked to from this page.  This count may
1931  * be changed upwards or downwards in the future; it
1932  * is only necessary that true be returned for a small
1933  * subset of pmaps for proper page aging.
1934  */
1935 boolean_t
1936 pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1937 {
1938         struct tte *tp;
1939         int loops;
1940         boolean_t rv;
1941
1942         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1943             ("pmap_page_exists_quick: page %p is not managed", m));
1944         loops = 0;
1945         rv = FALSE;
1946         rw_wlock(&tte_list_global_lock);
1947         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1948                 if ((tp->tte_data & TD_PV) == 0)
1949                         continue;
1950                 if (TTE_GET_PMAP(tp) == pm) {
1951                         rv = TRUE;
1952                         break;
1953                 }
1954                 if (++loops >= 16)
1955                         break;
1956         }
1957         rw_wunlock(&tte_list_global_lock);
1958         return (rv);
1959 }
1960
1961 /*
1962  * Return the number of managed mappings to the given physical page
1963  * that are wired.
1964  */
1965 int
1966 pmap_page_wired_mappings(vm_page_t m)
1967 {
1968         struct tte *tp;
1969         int count;
1970
1971         count = 0;
1972         if ((m->oflags & VPO_UNMANAGED) != 0)
1973                 return (count);
1974         rw_wlock(&tte_list_global_lock);
1975         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
1976                 if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED))
1977                         count++;
1978         rw_wunlock(&tte_list_global_lock);
1979         return (count);
1980 }
1981
1982 /*
1983  * Remove all pages from specified address space, this aids process exit
1984  * speeds.  This is much faster than pmap_remove in the case of running down
1985  * an entire address space.  Only works for the current pmap.
1986  */
1987 void
1988 pmap_remove_pages(pmap_t pm)
1989 {
1990
1991 }
1992
1993 /*
1994  * Returns TRUE if the given page has a managed mapping.
1995  */
1996 boolean_t
1997 pmap_page_is_mapped(vm_page_t m)
1998 {
1999         struct tte *tp;
2000         boolean_t rv;
2001
2002         rv = FALSE;
2003         if ((m->oflags & VPO_UNMANAGED) != 0)
2004                 return (rv);
2005         rw_wlock(&tte_list_global_lock);
2006         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
2007                 if ((tp->tte_data & TD_PV) != 0) {
2008                         rv = TRUE;
2009                         break;
2010                 }
2011         rw_wunlock(&tte_list_global_lock);
2012         return (rv);
2013 }
2014
2015 /*
2016  * Return a count of reference bits for a page, clearing those bits.
2017  * It is not necessary for every reference bit to be cleared, but it
2018  * is necessary that 0 only be returned when there are truly no
2019  * reference bits set.
2020  *
2021  * XXX: The exact number of bits to check and clear is a matter that
2022  * should be tested and standardized at some point in the future for
2023  * optimal aging of shared pages.
2024  */
2025 int
2026 pmap_ts_referenced(vm_page_t m)
2027 {
2028         struct tte *tpf;
2029         struct tte *tpn;
2030         struct tte *tp;
2031         u_long data;
2032         int count;
2033
2034         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2035             ("pmap_ts_referenced: page %p is not managed", m));
2036         count = 0;
2037         rw_wlock(&tte_list_global_lock);
2038         if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) {
2039                 tpf = tp;
2040                 do {
2041                         tpn = TAILQ_NEXT(tp, tte_link);
2042                         TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
2043                         TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
2044                         if ((tp->tte_data & TD_PV) == 0)
2045                                 continue;
2046                         data = atomic_clear_long(&tp->tte_data, TD_REF);
2047                         if ((data & TD_REF) != 0 && ++count > 4)
2048                                 break;
2049                 } while ((tp = tpn) != NULL && tp != tpf);
2050         }
2051         rw_wunlock(&tte_list_global_lock);
2052         return (count);
2053 }
2054
2055 boolean_t
2056 pmap_is_modified(vm_page_t m)
2057 {
2058         struct tte *tp;
2059         boolean_t rv;
2060
2061         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2062             ("pmap_is_modified: page %p is not managed", m));
2063         rv = FALSE;
2064
2065         /*
2066          * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2067          * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2068          * is clear, no TTEs can have TD_W set.
2069          */
2070         VM_OBJECT_ASSERT_WLOCKED(m->object);
2071         if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2072                 return (rv);
2073         rw_wlock(&tte_list_global_lock);
2074         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2075                 if ((tp->tte_data & TD_PV) == 0)
2076                         continue;
2077                 if ((tp->tte_data & TD_W) != 0) {
2078                         rv = TRUE;
2079                         break;
2080                 }
2081         }
2082         rw_wunlock(&tte_list_global_lock);
2083         return (rv);
2084 }
2085
2086 /*
2087  *      pmap_is_prefaultable:
2088  *
2089  *      Return whether or not the specified virtual address is elgible
2090  *      for prefault.
2091  */
2092 boolean_t
2093 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2094 {
2095         boolean_t rv;
2096
2097         PMAP_LOCK(pmap);
2098         rv = tsb_tte_lookup(pmap, addr) == NULL;
2099         PMAP_UNLOCK(pmap);
2100         return (rv);
2101 }
2102
2103 /*
2104  * Return whether or not the specified physical page was referenced
2105  * in any physical maps.
2106  */
2107 boolean_t
2108 pmap_is_referenced(vm_page_t m)
2109 {
2110         struct tte *tp;
2111         boolean_t rv;
2112
2113         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2114             ("pmap_is_referenced: page %p is not managed", m));
2115         rv = FALSE;
2116         rw_wlock(&tte_list_global_lock);
2117         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2118                 if ((tp->tte_data & TD_PV) == 0)
2119                         continue;
2120                 if ((tp->tte_data & TD_REF) != 0) {
2121                         rv = TRUE;
2122                         break;
2123                 }
2124         }
2125         rw_wunlock(&tte_list_global_lock);
2126         return (rv);
2127 }
2128
2129 /*
2130  * This function is advisory.
2131  */
2132 void
2133 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2134 {
2135 }
2136
2137 void
2138 pmap_clear_modify(vm_page_t m)
2139 {
2140         struct tte *tp;
2141         u_long data;
2142
2143         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2144             ("pmap_clear_modify: page %p is not managed", m));
2145         VM_OBJECT_ASSERT_WLOCKED(m->object);
2146         KASSERT(!vm_page_xbusied(m),
2147             ("pmap_clear_modify: page %p is exclusive busied", m));
2148
2149         /*
2150          * If the page is not PGA_WRITEABLE, then no TTEs can have TD_W set.
2151          * If the object containing the page is locked and the page is not
2152          * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2153          */
2154         if ((m->aflags & PGA_WRITEABLE) == 0)
2155                 return;
2156         rw_wlock(&tte_list_global_lock);
2157         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2158                 if ((tp->tte_data & TD_PV) == 0)
2159                         continue;
2160                 data = atomic_clear_long(&tp->tte_data, TD_W);
2161                 if ((data & TD_W) != 0)
2162                         tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2163         }
2164         rw_wunlock(&tte_list_global_lock);
2165 }
2166
2167 void
2168 pmap_remove_write(vm_page_t m)
2169 {
2170         struct tte *tp;
2171         u_long data;
2172
2173         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2174             ("pmap_remove_write: page %p is not managed", m));
2175
2176         /*
2177          * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2178          * set by another thread while the object is locked.  Thus,
2179          * if PGA_WRITEABLE is clear, no page table entries need updating.
2180          */
2181         VM_OBJECT_ASSERT_WLOCKED(m->object);
2182         if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2183                 return;
2184         rw_wlock(&tte_list_global_lock);
2185         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2186                 if ((tp->tte_data & TD_PV) == 0)
2187                         continue;
2188                 data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
2189                 if ((data & TD_W) != 0) {
2190                         vm_page_dirty(m);
2191                         tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2192                 }
2193         }
2194         vm_page_aflag_clear(m, PGA_WRITEABLE);
2195         rw_wunlock(&tte_list_global_lock);
2196 }
2197
2198 int
2199 pmap_mincore(pmap_t pm, vm_offset_t addr, vm_paddr_t *locked_pa)
2200 {
2201
2202         /* TODO; */
2203         return (0);
2204 }
2205
2206 /*
2207  * Activate a user pmap.  The pmap must be activated before its address space
2208  * can be accessed in any way.
2209  */
2210 void
2211 pmap_activate(struct thread *td)
2212 {
2213         struct vmspace *vm;
2214         struct pmap *pm;
2215         int context;
2216
2217         critical_enter();
2218         vm = td->td_proc->p_vmspace;
2219         pm = vmspace_pmap(vm);
2220
2221         context = PCPU_GET(tlb_ctx);
2222         if (context == PCPU_GET(tlb_ctx_max)) {
2223                 tlb_flush_user();
2224                 context = PCPU_GET(tlb_ctx_min);
2225         }
2226         PCPU_SET(tlb_ctx, context + 1);
2227
2228         pm->pm_context[curcpu] = context;
2229 #ifdef SMP
2230         CPU_SET_ATOMIC(PCPU_GET(cpuid), &pm->pm_active);
2231         atomic_store_acq_ptr((uintptr_t *)PCPU_PTR(pmap), (uintptr_t)pm);
2232 #else
2233         CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
2234         PCPU_SET(pmap, pm);
2235 #endif
2236
2237         stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
2238         stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb);
2239         stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) &
2240             TLB_CXR_PGSZ_MASK) | context);
2241         flush(KERNBASE);
2242         critical_exit();
2243 }
2244
2245 void
2246 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2247 {
2248
2249 }
2250
2251 /*
2252  * Increase the starting virtual address of the given mapping if a
2253  * different alignment might result in more superpage mappings.
2254  */
2255 void
2256 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2257     vm_offset_t *addr, vm_size_t size)
2258 {
2259
2260 }