]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/sparc64/sparc64/pmap.c
MFC: r216803, r217058, r217514, r218457
[FreeBSD/stable/8.git] / sys / sparc64 / sparc64 / pmap.c
1 /*-
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  * Copyright (c) 1994 John S. Dyson
5  * All rights reserved.
6  * Copyright (c) 1994 David Greenman
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * the Systems Programming Group of the University of Utah Computer
11  * Science Department and William Jolitz of UUNET Technologies Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
38  */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 /*
44  * Manages physical address maps.
45  *
46  * In addition to hardware address maps, this module is called upon to
47  * provide software-use-only maps which may or may not be stored in the
48  * same form as hardware maps.  These pseudo-maps are used to store
49  * intermediate results from copy operations to and from address spaces.
50  *
51  * Since the information managed by this module is also stored by the
52  * logical address mapping module, this module may throw away valid virtual
53  * to physical mappings at almost any time.  However, invalidations of
54  * mappings must be done as requested.
55  *
56  * In order to cope with hardware architectures which make virtual to
57  * physical map invalidates expensive, this module may delay invalidate
58  * reduced protection operations until such time as they are actually
59  * necessary.  This module is given full information as to which processors
60  * are currently using which maps, and to when physical maps must be made
61  * correct.
62  */
63
64 #include "opt_kstack_pages.h"
65 #include "opt_pmap.h"
66
67 #include <sys/param.h>
68 #include <sys/kernel.h>
69 #include <sys/ktr.h>
70 #include <sys/lock.h>
71 #include <sys/msgbuf.h>
72 #include <sys/mutex.h>
73 #include <sys/proc.h>
74 #include <sys/smp.h>
75 #include <sys/sysctl.h>
76 #include <sys/systm.h>
77 #include <sys/vmmeter.h>
78
79 #include <dev/ofw/openfirm.h>
80
81 #include <vm/vm.h>
82 #include <vm/vm_param.h>
83 #include <vm/vm_kern.h>
84 #include <vm/vm_page.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_extern.h>
88 #include <vm/vm_pageout.h>
89 #include <vm/vm_pager.h>
90
91 #include <machine/cache.h>
92 #include <machine/frame.h>
93 #include <machine/instr.h>
94 #include <machine/md_var.h>
95 #include <machine/metadata.h>
96 #include <machine/ofw_mem.h>
97 #include <machine/smp.h>
98 #include <machine/tlb.h>
99 #include <machine/tte.h>
100 #include <machine/tsb.h>
101 #include <machine/ver.h>
102
103 #define PMAP_DEBUG
104
105 #ifndef PMAP_SHPGPERPROC
106 #define PMAP_SHPGPERPROC        200
107 #endif
108
109 /* XXX */
110 #include "opt_sched.h"
111 #ifndef SCHED_4BSD
112 #error "sparc64 only works with SCHED_4BSD which uses a global scheduler lock."
113 #endif
114 extern struct mtx sched_lock;
115
116 /*
117  * Virtual address of message buffer
118  */
119 struct msgbuf *msgbufp;
120
121 /*
122  * Map of physical memory reagions
123  */
124 vm_paddr_t phys_avail[128];
125 static struct ofw_mem_region mra[128];
126 struct ofw_mem_region sparc64_memreg[128];
127 int sparc64_nmemreg;
128 static struct ofw_map translations[128];
129 static int translations_size;
130
131 static vm_offset_t pmap_idle_map;
132 static vm_offset_t pmap_temp_map_1;
133 static vm_offset_t pmap_temp_map_2;
134
135 /*
136  * First and last available kernel virtual addresses
137  */
138 vm_offset_t virtual_avail;
139 vm_offset_t virtual_end;
140 vm_offset_t kernel_vm_end;
141
142 vm_offset_t vm_max_kernel_address;
143
144 /*
145  * Kernel pmap
146  */
147 struct pmap kernel_pmap_store;
148
149 /*
150  * Allocate physical memory for use in pmap_bootstrap.
151  */
152 static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
153
154 static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
155
156 /*
157  * Map the given physical page at the specified virtual address in the
158  * target pmap with the protection requested.  If specified the page
159  * will be wired down.
160  *
161  * The page queues and pmap must be locked.
162  */
163 static void pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m,
164     vm_prot_t prot, boolean_t wired);
165
166 extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[];
167 extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[];
168 extern int tl1_dmmu_miss_patch_asi_1[];
169 extern int tl1_dmmu_miss_patch_quad_ldd_1[];
170 extern int tl1_dmmu_miss_patch_tsb_1[];
171 extern int tl1_dmmu_miss_patch_tsb_2[];
172 extern int tl1_dmmu_miss_patch_tsb_mask_1[];
173 extern int tl1_dmmu_miss_patch_tsb_mask_2[];
174 extern int tl1_dmmu_prot_patch_asi_1[];
175 extern int tl1_dmmu_prot_patch_quad_ldd_1[];
176 extern int tl1_dmmu_prot_patch_tsb_1[];
177 extern int tl1_dmmu_prot_patch_tsb_2[];
178 extern int tl1_dmmu_prot_patch_tsb_mask_1[];
179 extern int tl1_dmmu_prot_patch_tsb_mask_2[];
180 extern int tl1_immu_miss_patch_asi_1[];
181 extern int tl1_immu_miss_patch_quad_ldd_1[];
182 extern int tl1_immu_miss_patch_tsb_1[];
183 extern int tl1_immu_miss_patch_tsb_2[];
184 extern int tl1_immu_miss_patch_tsb_mask_1[];
185 extern int tl1_immu_miss_patch_tsb_mask_2[];
186
187 /*
188  * If user pmap is processed with pmap_remove and with pmap_remove and the
189  * resident count drops to 0, there are no more pages to remove, so we
190  * need not continue.
191  */
192 #define PMAP_REMOVE_DONE(pm) \
193         ((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
194
195 /*
196  * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
197  * and pmap_protect() instead of trying each virtual address.
198  */
199 #define PMAP_TSB_THRESH ((TSB_SIZE / 2) * PAGE_SIZE)
200
201 SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "");
202
203 PMAP_STATS_VAR(pmap_nenter);
204 PMAP_STATS_VAR(pmap_nenter_update);
205 PMAP_STATS_VAR(pmap_nenter_replace);
206 PMAP_STATS_VAR(pmap_nenter_new);
207 PMAP_STATS_VAR(pmap_nkenter);
208 PMAP_STATS_VAR(pmap_nkenter_oc);
209 PMAP_STATS_VAR(pmap_nkenter_stupid);
210 PMAP_STATS_VAR(pmap_nkremove);
211 PMAP_STATS_VAR(pmap_nqenter);
212 PMAP_STATS_VAR(pmap_nqremove);
213 PMAP_STATS_VAR(pmap_ncache_enter);
214 PMAP_STATS_VAR(pmap_ncache_enter_c);
215 PMAP_STATS_VAR(pmap_ncache_enter_oc);
216 PMAP_STATS_VAR(pmap_ncache_enter_cc);
217 PMAP_STATS_VAR(pmap_ncache_enter_coc);
218 PMAP_STATS_VAR(pmap_ncache_enter_nc);
219 PMAP_STATS_VAR(pmap_ncache_enter_cnc);
220 PMAP_STATS_VAR(pmap_ncache_remove);
221 PMAP_STATS_VAR(pmap_ncache_remove_c);
222 PMAP_STATS_VAR(pmap_ncache_remove_oc);
223 PMAP_STATS_VAR(pmap_ncache_remove_cc);
224 PMAP_STATS_VAR(pmap_ncache_remove_coc);
225 PMAP_STATS_VAR(pmap_ncache_remove_nc);
226 PMAP_STATS_VAR(pmap_nzero_page);
227 PMAP_STATS_VAR(pmap_nzero_page_c);
228 PMAP_STATS_VAR(pmap_nzero_page_oc);
229 PMAP_STATS_VAR(pmap_nzero_page_nc);
230 PMAP_STATS_VAR(pmap_nzero_page_area);
231 PMAP_STATS_VAR(pmap_nzero_page_area_c);
232 PMAP_STATS_VAR(pmap_nzero_page_area_oc);
233 PMAP_STATS_VAR(pmap_nzero_page_area_nc);
234 PMAP_STATS_VAR(pmap_nzero_page_idle);
235 PMAP_STATS_VAR(pmap_nzero_page_idle_c);
236 PMAP_STATS_VAR(pmap_nzero_page_idle_oc);
237 PMAP_STATS_VAR(pmap_nzero_page_idle_nc);
238 PMAP_STATS_VAR(pmap_ncopy_page);
239 PMAP_STATS_VAR(pmap_ncopy_page_c);
240 PMAP_STATS_VAR(pmap_ncopy_page_oc);
241 PMAP_STATS_VAR(pmap_ncopy_page_nc);
242 PMAP_STATS_VAR(pmap_ncopy_page_dc);
243 PMAP_STATS_VAR(pmap_ncopy_page_doc);
244 PMAP_STATS_VAR(pmap_ncopy_page_sc);
245 PMAP_STATS_VAR(pmap_ncopy_page_soc);
246
247 PMAP_STATS_VAR(pmap_nnew_thread);
248 PMAP_STATS_VAR(pmap_nnew_thread_oc);
249
250 static inline u_long dtlb_get_data(u_int slot);
251
252 /*
253  * Quick sort callout for comparing memory regions
254  */
255 static int mr_cmp(const void *a, const void *b);
256 static int om_cmp(const void *a, const void *b);
257
258 static int
259 mr_cmp(const void *a, const void *b)
260 {
261         const struct ofw_mem_region *mra;
262         const struct ofw_mem_region *mrb;
263
264         mra = a;
265         mrb = b;
266         if (mra->mr_start < mrb->mr_start)
267                 return (-1);
268         else if (mra->mr_start > mrb->mr_start)
269                 return (1);
270         else
271                 return (0);
272 }
273
274 static int
275 om_cmp(const void *a, const void *b)
276 {
277         const struct ofw_map *oma;
278         const struct ofw_map *omb;
279
280         oma = a;
281         omb = b;
282         if (oma->om_start < omb->om_start)
283                 return (-1);
284         else if (oma->om_start > omb->om_start)
285                 return (1);
286         else
287                 return (0);
288 }
289
290 static inline u_long
291 dtlb_get_data(u_int slot)
292 {
293
294         /*
295          * We read ASI_DTLB_DATA_ACCESS_REG twice in order to work
296          * around errata of USIII and beyond.
297          */
298         (void)ldxa(TLB_DAR_SLOT(slot), ASI_DTLB_DATA_ACCESS_REG);
299         return (ldxa(TLB_DAR_SLOT(slot), ASI_DTLB_DATA_ACCESS_REG));
300 }
301
302 /*
303  * Bootstrap the system enough to run with virtual memory.
304  */
305 void
306 pmap_bootstrap(u_int cpu_impl)
307 {
308         struct pmap *pm;
309         struct tte *tp;
310         vm_offset_t off;
311         vm_offset_t va;
312         vm_paddr_t pa;
313         vm_size_t physsz;
314         vm_size_t virtsz;
315         u_long data;
316         u_long vpn;
317         phandle_t pmem;
318         phandle_t vmem;
319         u_int dtlb_slots_avail;
320         int i;
321         int j;
322         int sz;
323         uint32_t asi;
324         uint32_t colors;
325         uint32_t ldd;
326
327         /*
328          * Set the kernel context.
329          */
330         pmap_set_kctx();
331
332         colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
333
334         /*
335          * Find out what physical memory is available from the PROM and
336          * initialize the phys_avail array.  This must be done before
337          * pmap_bootstrap_alloc is called.
338          */
339         if ((pmem = OF_finddevice("/memory")) == -1)
340                 panic("pmap_bootstrap: finddevice /memory");
341         if ((sz = OF_getproplen(pmem, "available")) == -1)
342                 panic("pmap_bootstrap: getproplen /memory/available");
343         if (sizeof(phys_avail) < sz)
344                 panic("pmap_bootstrap: phys_avail too small");
345         if (sizeof(mra) < sz)
346                 panic("pmap_bootstrap: mra too small");
347         bzero(mra, sz);
348         if (OF_getprop(pmem, "available", mra, sz) == -1)
349                 panic("pmap_bootstrap: getprop /memory/available");
350         sz /= sizeof(*mra);
351         CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
352         qsort(mra, sz, sizeof (*mra), mr_cmp);
353         physsz = 0;
354         getenv_quad("hw.physmem", &physmem);
355         physmem = btoc(physmem);
356         for (i = 0, j = 0; i < sz; i++, j += 2) {
357                 CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
358                     mra[i].mr_size);
359                 if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) {
360                         if (btoc(physsz) < physmem) {
361                                 phys_avail[j] = mra[i].mr_start;
362                                 phys_avail[j + 1] = mra[i].mr_start +
363                                     (ctob(physmem) - physsz);
364                                 physsz = ctob(physmem);
365                         }
366                         break;
367                 }
368                 phys_avail[j] = mra[i].mr_start;
369                 phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
370                 physsz += mra[i].mr_size;
371         }
372         physmem = btoc(physsz);
373
374         /*
375          * Calculate the size of kernel virtual memory, and the size and mask
376          * for the kernel TSB based on the phsyical memory size but limited
377          * by the amount of dTLB slots available for locked entries if we have
378          * to lock the TSB in the TLB (given that for spitfire-class CPUs all
379          * of the dt64 slots can hold locked entries but there is no large
380          * dTLB for unlocked ones, we don't use more than half of it for the
381          * TSB).
382          * Note that for reasons unknown OpenSolaris doesn't take advantage of
383          * ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III.  However, given that no
384          * public documentation is available for these, the latter just might
385          * not support it, yet.
386          */
387         virtsz = roundup(physsz, PAGE_SIZE_4M << (PAGE_SHIFT - TTE_SHIFT));
388         if (cpu_impl == CPU_IMPL_SPARC64V ||
389             cpu_impl >= CPU_IMPL_ULTRASPARCIIIp)
390                 tsb_kernel_ldd_phys = 1;
391         else {
392                 dtlb_slots_avail = 0;
393                 for (i = 0; i < dtlb_slots; i++) {
394                         data = dtlb_get_data(i);
395                         if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
396                                 dtlb_slots_avail++;
397                 }
398 #ifdef SMP
399                 dtlb_slots_avail -= PCPU_PAGES;
400 #endif
401                 if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
402                     cpu_impl < CPU_IMPL_ULTRASPARCIII)
403                         dtlb_slots_avail /= 2;
404                 virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) <<
405                     (PAGE_SHIFT - TTE_SHIFT));
406         }
407         vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
408         tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
409         tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
410
411         /*
412          * Allocate the kernel TSB and lock it in the TLB if necessary.
413          */
414         pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
415         if (pa & PAGE_MASK_4M)
416                 panic("pmap_bootstrap: TSB unaligned\n");
417         tsb_kernel_phys = pa;
418         if (tsb_kernel_ldd_phys == 0) {
419                 tsb_kernel =
420                     (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
421                 pmap_map_tsb();
422                 bzero(tsb_kernel, tsb_kernel_size);
423         } else {
424                 tsb_kernel =
425                     (struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys);
426                 aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size);
427         }
428
429         /*
430          * Allocate and map the dynamic per-CPU area for the BSP.
431          */
432         pa = pmap_bootstrap_alloc(DPCPU_SIZE, colors);
433         dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pa);
434
435         /*
436          * Allocate and map the message buffer.
437          */
438         pa = pmap_bootstrap_alloc(msgbufsize, colors);
439         msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
440
441         /*
442          * Patch the TSB addresses and mask as well as the ASIs used to load
443          * it into the trap table.
444          */
445
446 #define LDDA_R_I_R(rd, imm_asi, rs1, rs2)                               \
447         (EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) |     \
448             EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) |   \
449             EIF_F3_RS2(rs2))
450 #define OR_R_I_R(rd, imm13, rs1)                                        \
451         (EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) |       \
452             EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
453 #define SETHI(rd, imm22)                                                \
454         (EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) |   \
455             EIF_IMM((imm22) >> 10, 22))
456 #define WR_R_I(rd, imm13, rs1)                                          \
457         (EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) |       \
458             EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
459
460 #define PATCH_ASI(addr, asi) do {                                       \
461         if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0,                   \
462             IF_F3_RS1(addr[0])))                                        \
463                 panic("%s: patched instructions have changed",          \
464                     __func__);                                          \
465         addr[0] |= EIF_IMM((asi), 13);                                  \
466         flush(addr);                                                    \
467 } while (0)
468
469 #define PATCH_LDD(addr, asi) do {                                       \
470         if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0,               \
471             IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0])))                    \
472                 panic("%s: patched instructions have changed",          \
473                     __func__);                                          \
474         addr[0] |= EIF_F3_IMM_ASI(asi);                                 \
475         flush(addr);                                                    \
476 } while (0)
477
478 #define PATCH_TSB(addr, val) do {                                       \
479         if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||                 \
480             addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,                 \
481             IF_F3_RS1(addr[1])) ||                                      \
482             addr[3] != SETHI(IF_F2_RD(addr[3]), 0x0))                   \
483                 panic("%s: patched instructions have changed",          \
484                     __func__);                                          \
485         addr[0] |= EIF_IMM((val) >> 42, 22);                            \
486         addr[1] |= EIF_IMM((val) >> 32, 10);                            \
487         addr[3] |= EIF_IMM((val) >> 10, 22);                            \
488         flush(addr);                                                    \
489         flush(addr + 1);                                                \
490         flush(addr + 3);                                                \
491 } while (0)
492
493 #define PATCH_TSB_MASK(addr, val) do {                                  \
494         if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||                 \
495             addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,                 \
496             IF_F3_RS1(addr[1])))                                        \
497                 panic("%s: patched instructions have changed",          \
498                     __func__);                                          \
499         addr[0] |= EIF_IMM((val) >> 10, 22);                            \
500         addr[1] |= EIF_IMM((val), 10);                                  \
501         flush(addr);                                                    \
502         flush(addr + 1);                                                \
503 } while (0)
504
505         if (tsb_kernel_ldd_phys == 0) {
506                 asi = ASI_N;
507                 ldd = ASI_NUCLEUS_QUAD_LDD;
508                 off = (vm_offset_t)tsb_kernel;
509         } else {
510                 asi = ASI_PHYS_USE_EC;
511                 ldd = ASI_ATOMIC_QUAD_LDD_PHYS;
512                 off = (vm_offset_t)tsb_kernel_phys;
513         }
514         PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys);
515         PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1,
516             tsb_kernel_phys + tsb_kernel_size - 1);
517         PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi);
518         PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd);
519         PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off);
520         PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off);
521         PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask);
522         PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask);
523         PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi);
524         PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd);
525         PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off);
526         PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off);
527         PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask);
528         PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask);
529         PATCH_ASI(tl1_immu_miss_patch_asi_1, asi);
530         PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd);
531         PATCH_TSB(tl1_immu_miss_patch_tsb_1, off);
532         PATCH_TSB(tl1_immu_miss_patch_tsb_2, off);
533         PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask);
534         PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask);
535
536         /*
537          * Enter fake 8k pages for the 4MB kernel pages, so that
538          * pmap_kextract() will work for them.
539          */
540         for (i = 0; i < kernel_tlb_slots; i++) {
541                 pa = kernel_tlbs[i].te_pa;
542                 va = kernel_tlbs[i].te_va;
543                 for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
544                         tp = tsb_kvtotte(va + off);
545                         vpn = TV_VPN(va + off, TS_8K);
546                         data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF |
547                             TD_SW | TD_CP | TD_CV | TD_P | TD_W;
548                         pmap_bootstrap_set_tte(tp, vpn, data);
549                 }
550         }
551
552         /*
553          * Set the start and end of KVA.  The kernel is loaded starting
554          * at the first available 4MB super page, so we advance to the
555          * end of the last one used for it.
556          */
557         virtual_avail = KERNBASE + kernel_tlb_slots * PAGE_SIZE_4M;
558         virtual_end = vm_max_kernel_address;
559         kernel_vm_end = vm_max_kernel_address;
560
561         /*
562          * Allocate kva space for temporary mappings.
563          */
564         pmap_idle_map = virtual_avail;
565         virtual_avail += PAGE_SIZE * colors;
566         pmap_temp_map_1 = virtual_avail;
567         virtual_avail += PAGE_SIZE * colors;
568         pmap_temp_map_2 = virtual_avail;
569         virtual_avail += PAGE_SIZE * colors;
570
571         /*
572          * Allocate a kernel stack with guard page for thread0 and map it
573          * into the kernel TSB.  We must ensure that the virtual address is
574          * colored properly for corresponding CPUs, since we're allocating
575          * from phys_avail so the memory won't have an associated vm_page_t.
576          */
577         pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, colors);
578         kstack0_phys = pa;
579         virtual_avail += roundup(KSTACK_GUARD_PAGES, colors) * PAGE_SIZE;
580         kstack0 = virtual_avail;
581         virtual_avail += roundup(KSTACK_PAGES, colors) * PAGE_SIZE;
582         if (dcache_color_ignore == 0)
583                 KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
584                     ("pmap_bootstrap: kstack0 miscolored"));
585         for (i = 0; i < KSTACK_PAGES; i++) {
586                 pa = kstack0_phys + i * PAGE_SIZE;
587                 va = kstack0 + i * PAGE_SIZE;
588                 tp = tsb_kvtotte(va);
589                 vpn = TV_VPN(va, TS_8K);
590                 data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
591                     TD_CV | TD_P | TD_W;
592                 pmap_bootstrap_set_tte(tp, vpn, data);
593         }
594
595         /*
596          * Calculate the last available physical address.
597          */
598         for (i = 0; phys_avail[i + 2] != 0; i += 2)
599                 ;
600         Maxmem = sparc64_btop(phys_avail[i + 1]);
601
602         /*
603          * Add the PROM mappings to the kernel TSB.
604          */
605         if ((vmem = OF_finddevice("/virtual-memory")) == -1)
606                 panic("pmap_bootstrap: finddevice /virtual-memory");
607         if ((sz = OF_getproplen(vmem, "translations")) == -1)
608                 panic("pmap_bootstrap: getproplen translations");
609         if (sizeof(translations) < sz)
610                 panic("pmap_bootstrap: translations too small");
611         bzero(translations, sz);
612         if (OF_getprop(vmem, "translations", translations, sz) == -1)
613                 panic("pmap_bootstrap: getprop /virtual-memory/translations");
614         sz /= sizeof(*translations);
615         translations_size = sz;
616         CTR0(KTR_PMAP, "pmap_bootstrap: translations");
617         qsort(translations, sz, sizeof (*translations), om_cmp);
618         for (i = 0; i < sz; i++) {
619                 CTR3(KTR_PMAP,
620                     "translation: start=%#lx size=%#lx tte=%#lx",
621                     translations[i].om_start, translations[i].om_size,
622                     translations[i].om_tte);
623                 if ((translations[i].om_tte & TD_V) == 0)
624                         continue;
625                 if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
626                     translations[i].om_start > VM_MAX_PROM_ADDRESS)
627                         continue;
628                 for (off = 0; off < translations[i].om_size;
629                     off += PAGE_SIZE) {
630                         va = translations[i].om_start + off;
631                         tp = tsb_kvtotte(va);
632                         vpn = TV_VPN(va, TS_8K);
633                         data = ((translations[i].om_tte &
634                             ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) |
635                             (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
636                             cpu_impl < CPU_IMPL_ULTRASPARCIII ?
637                             (TD_DIAG_SF_MASK << TD_DIAG_SF_SHIFT) :
638                             (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) |
639                             (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) +
640                             off;
641                         pmap_bootstrap_set_tte(tp, vpn, data);
642                 }
643         }
644
645         /*
646          * Get the available physical memory ranges from /memory/reg.  These
647          * are only used for kernel dumps, but it may not be wise to do PROM
648          * calls in that situation.
649          */
650         if ((sz = OF_getproplen(pmem, "reg")) == -1)
651                 panic("pmap_bootstrap: getproplen /memory/reg");
652         if (sizeof(sparc64_memreg) < sz)
653                 panic("pmap_bootstrap: sparc64_memreg too small");
654         if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
655                 panic("pmap_bootstrap: getprop /memory/reg");
656         sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
657
658         /*
659          * Initialize the kernel pmap (which is statically allocated).
660          * NOTE: PMAP_LOCK_INIT() is needed as part of the initialization
661          * but sparc64 start up is not ready to initialize mutexes yet.
662          * It is called in machdep.c.
663          */
664         pm = kernel_pmap;
665         for (i = 0; i < MAXCPU; i++)
666                 pm->pm_context[i] = TLB_CTX_KERNEL;
667         pm->pm_active = ~0;
668
669         /*
670          * Flush all non-locked TLB entries possibly left over by the
671          * firmware.
672          */
673         tlb_flush_nonlocked();
674 }
675
676 /*
677  * Map the 4MB kernel TSB pages.
678  */
679 void
680 pmap_map_tsb(void)
681 {
682         vm_offset_t va;
683         vm_paddr_t pa;
684         u_long data;
685         int i;
686
687         for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
688                 va = (vm_offset_t)tsb_kernel + i;
689                 pa = tsb_kernel_phys + i;
690                 data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
691                     TD_P | TD_W;
692                 stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
693                     TLB_TAR_CTX(TLB_CTX_KERNEL));
694                 stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
695         }
696 }
697
698 /*
699  * Set the secondary context to be the kernel context (needed for FP block
700  * operations in the kernel).
701  */
702 void
703 pmap_set_kctx(void)
704 {
705
706         stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) &
707             TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL);
708         flush(KERNBASE);
709 }
710
711 /*
712  * Allocate a physical page of memory directly from the phys_avail map.
713  * Can only be called from pmap_bootstrap before avail start and end are
714  * calculated.
715  */
716 static vm_paddr_t
717 pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
718 {
719         vm_paddr_t pa;
720         int i;
721
722         size = roundup(size, PAGE_SIZE * colors);
723         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
724                 if (phys_avail[i + 1] - phys_avail[i] < size)
725                         continue;
726                 pa = phys_avail[i];
727                 phys_avail[i] += size;
728                 return (pa);
729         }
730         panic("pmap_bootstrap_alloc");
731 }
732
733 /*
734  * Set a TTE.  This function is intended as a helper when tsb_kernel is
735  * direct-mapped but we haven't taken over the trap table, yet, as it's the
736  * case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access
737  * the kernel TSB.
738  */
739 void
740 pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data)
741 {
742
743         if (tsb_kernel_ldd_phys == 0) {
744                 tp->tte_vpn = vpn;
745                 tp->tte_data = data;
746         } else {
747                 stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn),
748                     ASI_PHYS_USE_EC, vpn);
749                 stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data),
750                     ASI_PHYS_USE_EC, data);
751         }
752 }
753
754 /*
755  * Initialize a vm_page's machine-dependent fields.
756  */
757 void
758 pmap_page_init(vm_page_t m)
759 {
760
761         TAILQ_INIT(&m->md.tte_list);
762         m->md.color = DCACHE_COLOR(VM_PAGE_TO_PHYS(m));
763         m->md.flags = 0;
764         m->md.pmap = NULL;
765 }
766
767 /*
768  * Initialize the pmap module.
769  */
770 void
771 pmap_init(void)
772 {
773         vm_offset_t addr;
774         vm_size_t size;
775         int result;
776         int i;
777
778         for (i = 0; i < translations_size; i++) {
779                 addr = translations[i].om_start;
780                 size = translations[i].om_size;
781                 if ((translations[i].om_tte & TD_V) == 0)
782                         continue;
783                 if (addr < VM_MIN_PROM_ADDRESS || addr > VM_MAX_PROM_ADDRESS)
784                         continue;
785                 result = vm_map_find(kernel_map, NULL, 0, &addr, size,
786                     VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
787                 if (result != KERN_SUCCESS || addr != translations[i].om_start)
788                         panic("pmap_init: vm_map_find");
789         }
790 }
791
792 /*
793  * Extract the physical page address associated with the given
794  * map/virtual_address pair.
795  */
796 vm_paddr_t
797 pmap_extract(pmap_t pm, vm_offset_t va)
798 {
799         struct tte *tp;
800         vm_paddr_t pa;
801
802         if (pm == kernel_pmap)
803                 return (pmap_kextract(va));
804         PMAP_LOCK(pm);
805         tp = tsb_tte_lookup(pm, va);
806         if (tp == NULL)
807                 pa = 0;
808         else
809                 pa = TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp));
810         PMAP_UNLOCK(pm);
811         return (pa);
812 }
813
814 /*
815  * Atomically extract and hold the physical page with the given
816  * pmap and virtual address pair if that mapping permits the given
817  * protection.
818  */
819 vm_page_t
820 pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot)
821 {
822         struct tte *tp;
823         vm_page_t m;
824
825         m = NULL;
826         vm_page_lock_queues();
827         if (pm == kernel_pmap) {
828                 if (va >= VM_MIN_DIRECT_ADDRESS) {
829                         tp = NULL;
830                         m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
831                         vm_page_hold(m);
832                 } else {
833                         tp = tsb_kvtotte(va);
834                         if ((tp->tte_data & TD_V) == 0)
835                                 tp = NULL;
836                 }
837         } else {
838                 PMAP_LOCK(pm);
839                 tp = tsb_tte_lookup(pm, va);
840         }
841         if (tp != NULL && ((tp->tte_data & TD_SW) ||
842             (prot & VM_PROT_WRITE) == 0)) {
843                 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
844                 vm_page_hold(m);
845         }
846         vm_page_unlock_queues();
847         if (pm != kernel_pmap)
848                 PMAP_UNLOCK(pm);
849         return (m);
850 }
851
852 /*
853  * Extract the physical page address associated with the given kernel virtual
854  * address.
855  */
856 vm_paddr_t
857 pmap_kextract(vm_offset_t va)
858 {
859         struct tte *tp;
860
861         if (va >= VM_MIN_DIRECT_ADDRESS)
862                 return (TLB_DIRECT_TO_PHYS(va));
863         tp = tsb_kvtotte(va);
864         if ((tp->tte_data & TD_V) == 0)
865                 return (0);
866         return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
867 }
868
869 int
870 pmap_cache_enter(vm_page_t m, vm_offset_t va)
871 {
872         struct tte *tp;
873         int color;
874
875         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
876         KASSERT((m->flags & PG_FICTITIOUS) == 0,
877             ("pmap_cache_enter: fake page"));
878         PMAP_STATS_INC(pmap_ncache_enter);
879
880         if (dcache_color_ignore != 0)
881                 return (1);
882
883         /*
884          * Find the color for this virtual address and note the added mapping.
885          */
886         color = DCACHE_COLOR(va);
887         m->md.colors[color]++;
888
889         /*
890          * If all existing mappings have the same color, the mapping is
891          * cacheable.
892          */
893         if (m->md.color == color) {
894                 KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] == 0,
895                     ("pmap_cache_enter: cacheable, mappings of other color"));
896                 if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
897                         PMAP_STATS_INC(pmap_ncache_enter_c);
898                 else
899                         PMAP_STATS_INC(pmap_ncache_enter_oc);
900                 return (1);
901         }
902
903         /*
904          * If there are no mappings of the other color, and the page still has
905          * the wrong color, this must be a new mapping.  Change the color to
906          * match the new mapping, which is cacheable.  We must flush the page
907          * from the cache now.
908          */
909         if (m->md.colors[DCACHE_OTHER_COLOR(color)] == 0) {
910                 KASSERT(m->md.colors[color] == 1,
911                     ("pmap_cache_enter: changing color, not new mapping"));
912                 dcache_page_inval(VM_PAGE_TO_PHYS(m));
913                 m->md.color = color;
914                 if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
915                         PMAP_STATS_INC(pmap_ncache_enter_cc);
916                 else
917                         PMAP_STATS_INC(pmap_ncache_enter_coc);
918                 return (1);
919         }
920
921         /*
922          * If the mapping is already non-cacheable, just return.
923          */
924         if (m->md.color == -1) {
925                 PMAP_STATS_INC(pmap_ncache_enter_nc);
926                 return (0);
927         }
928
929         PMAP_STATS_INC(pmap_ncache_enter_cnc);
930
931         /*
932          * Mark all mappings as uncacheable, flush any lines with the other
933          * color out of the dcache, and set the color to none (-1).
934          */
935         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
936                 atomic_clear_long(&tp->tte_data, TD_CV);
937                 tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
938         }
939         dcache_page_inval(VM_PAGE_TO_PHYS(m));
940         m->md.color = -1;
941         return (0);
942 }
943
944 void
945 pmap_cache_remove(vm_page_t m, vm_offset_t va)
946 {
947         struct tte *tp;
948         int color;
949
950         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
951         CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
952             m->md.colors[DCACHE_COLOR(va)]);
953         KASSERT((m->flags & PG_FICTITIOUS) == 0,
954             ("pmap_cache_remove: fake page"));
955         PMAP_STATS_INC(pmap_ncache_remove);
956
957         if (dcache_color_ignore != 0)
958                 return;
959
960         KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
961             ("pmap_cache_remove: no mappings %d <= 0",
962             m->md.colors[DCACHE_COLOR(va)]));
963
964         /*
965          * Find the color for this virtual address and note the removal of
966          * the mapping.
967          */
968         color = DCACHE_COLOR(va);
969         m->md.colors[color]--;
970
971         /*
972          * If the page is cacheable, just return and keep the same color, even
973          * if there are no longer any mappings.
974          */
975         if (m->md.color != -1) {
976                 if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
977                         PMAP_STATS_INC(pmap_ncache_remove_c);
978                 else
979                         PMAP_STATS_INC(pmap_ncache_remove_oc);
980                 return;
981         }
982
983         KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] != 0,
984             ("pmap_cache_remove: uncacheable, no mappings of other color"));
985
986         /*
987          * If the page is not cacheable (color is -1), and the number of
988          * mappings for this color is not zero, just return.  There are
989          * mappings of the other color still, so remain non-cacheable.
990          */
991         if (m->md.colors[color] != 0) {
992                 PMAP_STATS_INC(pmap_ncache_remove_nc);
993                 return;
994         }
995
996         /*
997          * The number of mappings for this color is now zero.  Recache the
998          * other colored mappings, and change the page color to the other
999          * color.  There should be no lines in the data cache for this page,
1000          * so flushing should not be needed.
1001          */
1002         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1003                 atomic_set_long(&tp->tte_data, TD_CV);
1004                 tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1005         }
1006         m->md.color = DCACHE_OTHER_COLOR(color);
1007
1008         if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
1009                 PMAP_STATS_INC(pmap_ncache_remove_cc);
1010         else
1011                 PMAP_STATS_INC(pmap_ncache_remove_coc);
1012 }
1013
1014 /*
1015  * Map a wired page into kernel virtual address space.
1016  */
1017 void
1018 pmap_kenter(vm_offset_t va, vm_page_t m)
1019 {
1020         vm_offset_t ova;
1021         struct tte *tp;
1022         vm_page_t om;
1023         u_long data;
1024
1025         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1026         PMAP_STATS_INC(pmap_nkenter);
1027         tp = tsb_kvtotte(va);
1028         CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
1029             va, VM_PAGE_TO_PHYS(m), tp, tp->tte_data);
1030         if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
1031                 CTR5(KTR_CT2,
1032         "pmap_kenter: off color va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
1033                     va, VM_PAGE_TO_PHYS(m), m->object,
1034                     m->object ? m->object->type : -1,
1035                     m->pindex);
1036                 PMAP_STATS_INC(pmap_nkenter_oc);
1037         }
1038         if ((tp->tte_data & TD_V) != 0) {
1039                 om = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1040                 ova = TTE_GET_VA(tp);
1041                 if (m == om && va == ova) {
1042                         PMAP_STATS_INC(pmap_nkenter_stupid);
1043                         return;
1044                 }
1045                 TAILQ_REMOVE(&om->md.tte_list, tp, tte_link);
1046                 pmap_cache_remove(om, ova);
1047                 if (va != ova)
1048                         tlb_page_demap(kernel_pmap, ova);
1049         }
1050         data = TD_V | TD_8K | VM_PAGE_TO_PHYS(m) | TD_REF | TD_SW | TD_CP |
1051             TD_P | TD_W;
1052         if (pmap_cache_enter(m, va) != 0)
1053                 data |= TD_CV;
1054         tp->tte_vpn = TV_VPN(va, TS_8K);
1055         tp->tte_data = data;
1056         TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
1057 }
1058
1059 /*
1060  * Map a wired page into kernel virtual address space.  This additionally
1061  * takes a flag argument wich is or'ed to the TTE data.  This is used by
1062  * sparc64_bus_mem_map().
1063  * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
1064  * to flush entries that might still be in the cache, if applicable.
1065  */
1066 void
1067 pmap_kenter_flags(vm_offset_t va, vm_paddr_t pa, u_long flags)
1068 {
1069         struct tte *tp;
1070
1071         tp = tsb_kvtotte(va);
1072         CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
1073             va, pa, tp, tp->tte_data);
1074         tp->tte_vpn = TV_VPN(va, TS_8K);
1075         tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
1076 }
1077
1078 /*
1079  * Remove a wired page from kernel virtual address space.
1080  */
1081 void
1082 pmap_kremove(vm_offset_t va)
1083 {
1084         struct tte *tp;
1085         vm_page_t m;
1086
1087         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1088         PMAP_STATS_INC(pmap_nkremove);
1089         tp = tsb_kvtotte(va);
1090         CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
1091             tp->tte_data);
1092         if ((tp->tte_data & TD_V) == 0)
1093                 return;
1094         m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1095         TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1096         pmap_cache_remove(m, va);
1097         TTE_ZERO(tp);
1098 }
1099
1100 /*
1101  * Inverse of pmap_kenter_flags, used by bus_space_unmap().
1102  */
1103 void
1104 pmap_kremove_flags(vm_offset_t va)
1105 {
1106         struct tte *tp;
1107
1108         tp = tsb_kvtotte(va);
1109         CTR3(KTR_PMAP, "pmap_kremove_flags: va=%#lx tp=%p data=%#lx", va, tp,
1110             tp->tte_data);
1111         TTE_ZERO(tp);
1112 }
1113
1114 /*
1115  * Map a range of physical addresses into kernel virtual address space.
1116  *
1117  * The value passed in *virt is a suggested virtual address for the mapping.
1118  * Architectures which can support a direct-mapped physical to virtual region
1119  * can return the appropriate address within that region, leaving '*virt'
1120  * unchanged.
1121  */
1122 vm_offset_t
1123 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1124 {
1125
1126         return (TLB_PHYS_TO_DIRECT(start));
1127 }
1128
1129 /*
1130  * Map a list of wired pages into kernel virtual address space.  This is
1131  * intended for temporary mappings which do not need page modification or
1132  * references recorded.  Existing mappings in the region are overwritten.
1133  */
1134 void
1135 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
1136 {
1137         vm_offset_t va;
1138         int locked;
1139
1140         PMAP_STATS_INC(pmap_nqenter);
1141         va = sva;
1142         if (!(locked = mtx_owned(&vm_page_queue_mtx)))
1143                 vm_page_lock_queues();
1144         while (count-- > 0) {
1145                 pmap_kenter(va, *m);
1146                 va += PAGE_SIZE;
1147                 m++;
1148         }
1149         if (!locked)
1150                 vm_page_unlock_queues();
1151         tlb_range_demap(kernel_pmap, sva, va);
1152 }
1153
1154 /*
1155  * Remove page mappings from kernel virtual address space.  Intended for
1156  * temporary mappings entered by pmap_qenter.
1157  */
1158 void
1159 pmap_qremove(vm_offset_t sva, int count)
1160 {
1161         vm_offset_t va;
1162         int locked;
1163
1164         PMAP_STATS_INC(pmap_nqremove);
1165         va = sva;
1166         if (!(locked = mtx_owned(&vm_page_queue_mtx)))
1167                 vm_page_lock_queues();
1168         while (count-- > 0) {
1169                 pmap_kremove(va);
1170                 va += PAGE_SIZE;
1171         }
1172         if (!locked)
1173                 vm_page_unlock_queues();
1174         tlb_range_demap(kernel_pmap, sva, va);
1175 }
1176
1177 /*
1178  * Initialize the pmap associated with process 0.
1179  */
1180 void
1181 pmap_pinit0(pmap_t pm)
1182 {
1183         int i;
1184
1185         PMAP_LOCK_INIT(pm);
1186         for (i = 0; i < MAXCPU; i++)
1187                 pm->pm_context[i] = TLB_CTX_KERNEL;
1188         pm->pm_active = 0;
1189         pm->pm_tsb = NULL;
1190         pm->pm_tsb_obj = NULL;
1191         bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1192 }
1193
1194 /*
1195  * Initialize a preallocated and zeroed pmap structure, such as one in a
1196  * vmspace structure.
1197  */
1198 int
1199 pmap_pinit(pmap_t pm)
1200 {
1201         vm_page_t ma[TSB_PAGES];
1202         vm_page_t m;
1203         int i;
1204
1205         PMAP_LOCK_INIT(pm);
1206
1207         /*
1208          * Allocate KVA space for the TSB.
1209          */
1210         if (pm->pm_tsb == NULL) {
1211                 pm->pm_tsb = (struct tte *)kmem_alloc_nofault(kernel_map,
1212                     TSB_BSIZE);
1213                 if (pm->pm_tsb == NULL) {
1214                         PMAP_LOCK_DESTROY(pm);
1215                         return (0);
1216                 }
1217         }
1218
1219         /*
1220          * Allocate an object for it.
1221          */
1222         if (pm->pm_tsb_obj == NULL)
1223                 pm->pm_tsb_obj = vm_object_allocate(OBJT_DEFAULT, TSB_PAGES);
1224
1225         mtx_lock_spin(&sched_lock);
1226         for (i = 0; i < MAXCPU; i++)
1227                 pm->pm_context[i] = -1;
1228         pm->pm_active = 0;
1229         mtx_unlock_spin(&sched_lock);
1230
1231         VM_OBJECT_LOCK(pm->pm_tsb_obj);
1232         for (i = 0; i < TSB_PAGES; i++) {
1233                 m = vm_page_grab(pm->pm_tsb_obj, i, VM_ALLOC_NOBUSY |
1234                     VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1235                 m->valid = VM_PAGE_BITS_ALL;
1236                 m->md.pmap = pm;
1237                 ma[i] = m;
1238         }
1239         VM_OBJECT_UNLOCK(pm->pm_tsb_obj);
1240         pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1241
1242         bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1243         return (1);
1244 }
1245
1246 /*
1247  * Release any resources held by the given physical map.
1248  * Called when a pmap initialized by pmap_pinit is being released.
1249  * Should only be called if the map contains no valid mappings.
1250  */
1251 void
1252 pmap_release(pmap_t pm)
1253 {
1254         vm_object_t obj;
1255         vm_page_t m;
1256         struct pcpu *pc;
1257
1258         CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1259             pm->pm_context[curcpu], pm->pm_tsb);
1260         KASSERT(pmap_resident_count(pm) == 0,
1261             ("pmap_release: resident pages %ld != 0",
1262             pmap_resident_count(pm)));
1263
1264         /*
1265          * After the pmap was freed, it might be reallocated to a new process.
1266          * When switching, this might lead us to wrongly assume that we need
1267          * not switch contexts because old and new pmap pointer are equal.
1268          * Therefore, make sure that this pmap is not referenced by any PCPU
1269          * pointer any more.  This could happen in two cases:
1270          * - A process that referenced the pmap is currently exiting on a CPU.
1271          *   However, it is guaranteed to not switch in any more after setting
1272          *   its state to PRS_ZOMBIE.
1273          * - A process that referenced this pmap ran on a CPU, but we switched
1274          *   to a kernel thread, leaving the pmap pointer unchanged.
1275          */
1276         mtx_lock_spin(&sched_lock);
1277         SLIST_FOREACH(pc, &cpuhead, pc_allcpu)
1278                 if (pc->pc_pmap == pm)
1279                         pc->pc_pmap = NULL;
1280         mtx_unlock_spin(&sched_lock);
1281
1282         obj = pm->pm_tsb_obj;
1283         VM_OBJECT_LOCK(obj);
1284         KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1285         while (!TAILQ_EMPTY(&obj->memq)) {
1286                 m = TAILQ_FIRST(&obj->memq);
1287                 vm_page_lock_queues();
1288                 if (vm_page_sleep_if_busy(m, FALSE, "pmaprl"))
1289                         continue;
1290                 KASSERT(m->hold_count == 0,
1291                     ("pmap_release: freeing held tsb page"));
1292                 m->md.pmap = NULL;
1293                 m->wire_count--;
1294                 atomic_subtract_int(&cnt.v_wire_count, 1);
1295                 vm_page_free_zero(m);
1296                 vm_page_unlock_queues();
1297         }
1298         VM_OBJECT_UNLOCK(obj);
1299         pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1300         PMAP_LOCK_DESTROY(pm);
1301 }
1302
1303 /*
1304  * Grow the number of kernel page table entries.  Unneeded.
1305  */
1306 void
1307 pmap_growkernel(vm_offset_t addr)
1308 {
1309
1310         panic("pmap_growkernel: can't grow kernel");
1311 }
1312
1313 int
1314 pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1315     vm_offset_t va)
1316 {
1317         vm_page_t m;
1318         u_long data;
1319
1320         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1321         data = atomic_readandclear_long(&tp->tte_data);
1322         if ((data & TD_FAKE) == 0) {
1323                 m = PHYS_TO_VM_PAGE(TD_PA(data));
1324                 TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1325                 if ((data & TD_WIRED) != 0)
1326                         pm->pm_stats.wired_count--;
1327                 if ((data & TD_PV) != 0) {
1328                         if ((data & TD_W) != 0)
1329                                 vm_page_dirty(m);
1330                         if ((data & TD_REF) != 0)
1331                                 vm_page_flag_set(m, PG_REFERENCED);
1332                         if (TAILQ_EMPTY(&m->md.tte_list))
1333                                 vm_page_flag_clear(m, PG_WRITEABLE);
1334                         pm->pm_stats.resident_count--;
1335                 }
1336                 pmap_cache_remove(m, va);
1337         }
1338         TTE_ZERO(tp);
1339         if (PMAP_REMOVE_DONE(pm))
1340                 return (0);
1341         return (1);
1342 }
1343
1344 /*
1345  * Remove the given range of addresses from the specified map.
1346  */
1347 void
1348 pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1349 {
1350         struct tte *tp;
1351         vm_offset_t va;
1352
1353         CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1354             pm->pm_context[curcpu], start, end);
1355         if (PMAP_REMOVE_DONE(pm))
1356                 return;
1357         vm_page_lock_queues();
1358         PMAP_LOCK(pm);
1359         if (end - start > PMAP_TSB_THRESH) {
1360                 tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1361                 tlb_context_demap(pm);
1362         } else {
1363                 for (va = start; va < end; va += PAGE_SIZE)
1364                         if ((tp = tsb_tte_lookup(pm, va)) != NULL &&
1365                             !pmap_remove_tte(pm, NULL, tp, va))
1366                                 break;
1367                 tlb_range_demap(pm, start, end - 1);
1368         }
1369         PMAP_UNLOCK(pm);
1370         vm_page_unlock_queues();
1371 }
1372
1373 void
1374 pmap_remove_all(vm_page_t m)
1375 {
1376         struct pmap *pm;
1377         struct tte *tpn;
1378         struct tte *tp;
1379         vm_offset_t va;
1380
1381         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1382         for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
1383                 tpn = TAILQ_NEXT(tp, tte_link);
1384                 if ((tp->tte_data & TD_PV) == 0)
1385                         continue;
1386                 pm = TTE_GET_PMAP(tp);
1387                 va = TTE_GET_VA(tp);
1388                 PMAP_LOCK(pm);
1389                 if ((tp->tte_data & TD_WIRED) != 0)
1390                         pm->pm_stats.wired_count--;
1391                 if ((tp->tte_data & TD_REF) != 0)
1392                         vm_page_flag_set(m, PG_REFERENCED);
1393                 if ((tp->tte_data & TD_W) != 0)
1394                         vm_page_dirty(m);
1395                 tp->tte_data &= ~TD_V;
1396                 tlb_page_demap(pm, va);
1397                 TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1398                 pm->pm_stats.resident_count--;
1399                 pmap_cache_remove(m, va);
1400                 TTE_ZERO(tp);
1401                 PMAP_UNLOCK(pm);
1402         }
1403         vm_page_flag_clear(m, PG_WRITEABLE);
1404 }
1405
1406 int
1407 pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1408     vm_offset_t va)
1409 {
1410         u_long data;
1411         vm_page_t m;
1412
1413         data = atomic_clear_long(&tp->tte_data, TD_REF | TD_SW | TD_W);
1414         if ((data & TD_PV) != 0) {
1415                 m = PHYS_TO_VM_PAGE(TD_PA(data));
1416                 if ((data & TD_REF) != 0)
1417                         vm_page_flag_set(m, PG_REFERENCED);
1418                 if ((data & TD_W) != 0)
1419                         vm_page_dirty(m);
1420         }
1421         return (1);
1422 }
1423
1424 /*
1425  * Set the physical protection on the specified range of this map as requested.
1426  */
1427 void
1428 pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1429 {
1430         vm_offset_t va;
1431         struct tte *tp;
1432
1433         CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1434             pm->pm_context[curcpu], sva, eva, prot);
1435
1436         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1437                 pmap_remove(pm, sva, eva);
1438                 return;
1439         }
1440
1441         if (prot & VM_PROT_WRITE)
1442                 return;
1443
1444         vm_page_lock_queues();
1445         PMAP_LOCK(pm);
1446         if (eva - sva > PMAP_TSB_THRESH) {
1447                 tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1448                 tlb_context_demap(pm);
1449         } else {
1450                 for (va = sva; va < eva; va += PAGE_SIZE)
1451                         if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1452                                 pmap_protect_tte(pm, NULL, tp, va);
1453                 tlb_range_demap(pm, sva, eva - 1);
1454         }
1455         PMAP_UNLOCK(pm);
1456         vm_page_unlock_queues();
1457 }
1458
1459 /*
1460  * Map the given physical page at the specified virtual address in the
1461  * target pmap with the protection requested.  If specified the page
1462  * will be wired down.
1463  */
1464 void
1465 pmap_enter(pmap_t pm, vm_offset_t va, vm_prot_t access, vm_page_t m,
1466     vm_prot_t prot, boolean_t wired)
1467 {
1468
1469         vm_page_lock_queues();
1470         PMAP_LOCK(pm);
1471         pmap_enter_locked(pm, va, m, prot, wired);
1472         vm_page_unlock_queues();
1473         PMAP_UNLOCK(pm);
1474 }
1475
1476 /*
1477  * Map the given physical page at the specified virtual address in the
1478  * target pmap with the protection requested.  If specified the page
1479  * will be wired down.
1480  *
1481  * The page queues and pmap must be locked.
1482  */
1483 static void
1484 pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1485     boolean_t wired)
1486 {
1487         struct tte *tp;
1488         vm_paddr_t pa;
1489         u_long data;
1490         int i;
1491
1492         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1493         PMAP_LOCK_ASSERT(pm, MA_OWNED);
1494         PMAP_STATS_INC(pmap_nenter);
1495         pa = VM_PAGE_TO_PHYS(m);
1496
1497         /*
1498          * If this is a fake page from the device_pager, but it covers actual
1499          * physical memory, convert to the real backing page.
1500          */
1501         if ((m->flags & PG_FICTITIOUS) != 0) {
1502                 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
1503                         if (pa >= phys_avail[i] && pa <= phys_avail[i + 1]) {
1504                                 m = PHYS_TO_VM_PAGE(pa);
1505                                 break;
1506                         }
1507                 }
1508         }
1509
1510         CTR6(KTR_PMAP,
1511             "pmap_enter_locked: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1512             pm->pm_context[curcpu], m, va, pa, prot, wired);
1513
1514         /*
1515          * If there is an existing mapping, and the physical address has not
1516          * changed, must be protection or wiring change.
1517          */
1518         if ((tp = tsb_tte_lookup(pm, va)) != NULL && TTE_GET_PA(tp) == pa) {
1519                 CTR0(KTR_PMAP, "pmap_enter_locked: update");
1520                 PMAP_STATS_INC(pmap_nenter_update);
1521
1522                 /*
1523                  * Wiring change, just update stats.
1524                  */
1525                 if (wired) {
1526                         if ((tp->tte_data & TD_WIRED) == 0) {
1527                                 tp->tte_data |= TD_WIRED;
1528                                 pm->pm_stats.wired_count++;
1529                         }
1530                 } else {
1531                         if ((tp->tte_data & TD_WIRED) != 0) {
1532                                 tp->tte_data &= ~TD_WIRED;
1533                                 pm->pm_stats.wired_count--;
1534                         }
1535                 }
1536
1537                 /*
1538                  * Save the old bits and clear the ones we're interested in.
1539                  */
1540                 data = tp->tte_data;
1541                 tp->tte_data &= ~(TD_EXEC | TD_SW | TD_W);
1542
1543                 /*
1544                  * If we're turning off write permissions, sense modify status.
1545                  */
1546                 if ((prot & VM_PROT_WRITE) != 0) {
1547                         tp->tte_data |= TD_SW;
1548                         if (wired)
1549                                 tp->tte_data |= TD_W;
1550                         vm_page_flag_set(m, PG_WRITEABLE);
1551                 } else if ((data & TD_W) != 0)
1552                         vm_page_dirty(m);
1553
1554                 /*
1555                  * If we're turning on execute permissions, flush the icache.
1556                  */
1557                 if ((prot & VM_PROT_EXECUTE) != 0) {
1558                         if ((data & TD_EXEC) == 0)
1559                                 icache_page_inval(pa);
1560                         tp->tte_data |= TD_EXEC;
1561                 }
1562
1563                 /*
1564                  * Delete the old mapping.
1565                  */
1566                 tlb_page_demap(pm, TTE_GET_VA(tp));
1567         } else {
1568                 /*
1569                  * If there is an existing mapping, but its for a different
1570                  * phsyical address, delete the old mapping.
1571                  */
1572                 if (tp != NULL) {
1573                         CTR0(KTR_PMAP, "pmap_enter_locked: replace");
1574                         PMAP_STATS_INC(pmap_nenter_replace);
1575                         pmap_remove_tte(pm, NULL, tp, va);
1576                         tlb_page_demap(pm, va);
1577                 } else {
1578                         CTR0(KTR_PMAP, "pmap_enter_locked: new");
1579                         PMAP_STATS_INC(pmap_nenter_new);
1580                 }
1581
1582                 /*
1583                  * Now set up the data and install the new mapping.
1584                  */
1585                 data = TD_V | TD_8K | TD_PA(pa);
1586                 if (pm == kernel_pmap)
1587                         data |= TD_P;
1588                 if ((prot & VM_PROT_WRITE) != 0) {
1589                         data |= TD_SW;
1590                         vm_page_flag_set(m, PG_WRITEABLE);
1591                 }
1592                 if (prot & VM_PROT_EXECUTE) {
1593                         data |= TD_EXEC;
1594                         icache_page_inval(pa);
1595                 }
1596
1597                 /*
1598                  * If its wired update stats.  We also don't need reference or
1599                  * modify tracking for wired mappings, so set the bits now.
1600                  */
1601                 if (wired) {
1602                         pm->pm_stats.wired_count++;
1603                         data |= TD_REF | TD_WIRED;
1604                         if ((prot & VM_PROT_WRITE) != 0)
1605                                 data |= TD_W;
1606                 }
1607
1608                 tsb_tte_enter(pm, m, va, TS_8K, data);
1609         }
1610 }
1611
1612 /*
1613  * Maps a sequence of resident pages belonging to the same object.
1614  * The sequence begins with the given page m_start.  This page is
1615  * mapped at the given virtual address start.  Each subsequent page is
1616  * mapped at a virtual address that is offset from start by the same
1617  * amount as the page is offset from m_start within the object.  The
1618  * last page in the sequence is the page with the largest offset from
1619  * m_start that can be mapped at a virtual address less than the given
1620  * virtual address end.  Not every virtual page between start and end
1621  * is mapped; only those for which a resident page exists with the
1622  * corresponding offset from m_start are mapped.
1623  */
1624 void
1625 pmap_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1626     vm_page_t m_start, vm_prot_t prot)
1627 {
1628         vm_page_t m;
1629         vm_pindex_t diff, psize;
1630
1631         psize = atop(end - start);
1632         m = m_start;
1633         PMAP_LOCK(pm);
1634         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1635                 pmap_enter_locked(pm, start + ptoa(diff), m, prot &
1636                     (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
1637                 m = TAILQ_NEXT(m, listq);
1638         }
1639         PMAP_UNLOCK(pm);
1640 }
1641
1642 void
1643 pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1644 {
1645
1646         PMAP_LOCK(pm);
1647         pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1648             FALSE);
1649         PMAP_UNLOCK(pm);
1650 }
1651
1652 void
1653 pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1654     vm_pindex_t pindex, vm_size_t size)
1655 {
1656
1657         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1658         KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1659             ("pmap_object_init_pt: non-device object"));
1660 }
1661
1662 /*
1663  * Change the wiring attribute for a map/virtual-address pair.
1664  * The mapping must already exist in the pmap.
1665  */
1666 void
1667 pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired)
1668 {
1669         struct tte *tp;
1670         u_long data;
1671
1672         PMAP_LOCK(pm);
1673         if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1674                 if (wired) {
1675                         data = atomic_set_long(&tp->tte_data, TD_WIRED);
1676                         if ((data & TD_WIRED) == 0)
1677                                 pm->pm_stats.wired_count++;
1678                 } else {
1679                         data = atomic_clear_long(&tp->tte_data, TD_WIRED);
1680                         if ((data & TD_WIRED) != 0)
1681                                 pm->pm_stats.wired_count--;
1682                 }
1683         }
1684         PMAP_UNLOCK(pm);
1685 }
1686
1687 static int
1688 pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp,
1689     vm_offset_t va)
1690 {
1691         vm_page_t m;
1692         u_long data;
1693
1694         if ((tp->tte_data & TD_FAKE) != 0)
1695                 return (1);
1696         if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1697                 data = tp->tte_data &
1698                     ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1699                 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1700                 tsb_tte_enter(dst_pmap, m, va, TS_8K, data);
1701         }
1702         return (1);
1703 }
1704
1705 void
1706 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1707     vm_size_t len, vm_offset_t src_addr)
1708 {
1709         struct tte *tp;
1710         vm_offset_t va;
1711
1712         if (dst_addr != src_addr)
1713                 return;
1714         vm_page_lock_queues();
1715         if (dst_pmap < src_pmap) {
1716                 PMAP_LOCK(dst_pmap);
1717                 PMAP_LOCK(src_pmap);
1718         } else {
1719                 PMAP_LOCK(src_pmap);
1720                 PMAP_LOCK(dst_pmap);
1721         }
1722         if (len > PMAP_TSB_THRESH) {
1723                 tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1724                     pmap_copy_tte);
1725                 tlb_context_demap(dst_pmap);
1726         } else {
1727                 for (va = src_addr; va < src_addr + len; va += PAGE_SIZE)
1728                         if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1729                                 pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1730                 tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1731         }
1732         vm_page_unlock_queues();
1733         PMAP_UNLOCK(src_pmap);
1734         PMAP_UNLOCK(dst_pmap);
1735 }
1736
1737 void
1738 pmap_zero_page(vm_page_t m)
1739 {
1740         struct tte *tp;
1741         vm_offset_t va;
1742         vm_paddr_t pa;
1743
1744         KASSERT((m->flags & PG_FICTITIOUS) == 0,
1745             ("pmap_zero_page: fake page"));
1746         PMAP_STATS_INC(pmap_nzero_page);
1747         pa = VM_PAGE_TO_PHYS(m);
1748         if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1749                 PMAP_STATS_INC(pmap_nzero_page_c);
1750                 va = TLB_PHYS_TO_DIRECT(pa);
1751                 cpu_block_zero((void *)va, PAGE_SIZE);
1752         } else if (m->md.color == -1) {
1753                 PMAP_STATS_INC(pmap_nzero_page_nc);
1754                 aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1755         } else {
1756                 PMAP_STATS_INC(pmap_nzero_page_oc);
1757                 PMAP_LOCK(kernel_pmap);
1758                 va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1759                 tp = tsb_kvtotte(va);
1760                 tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1761                 tp->tte_vpn = TV_VPN(va, TS_8K);
1762                 cpu_block_zero((void *)va, PAGE_SIZE);
1763                 tlb_page_demap(kernel_pmap, va);
1764                 PMAP_UNLOCK(kernel_pmap);
1765         }
1766 }
1767
1768 void
1769 pmap_zero_page_area(vm_page_t m, int off, int size)
1770 {
1771         struct tte *tp;
1772         vm_offset_t va;
1773         vm_paddr_t pa;
1774
1775         KASSERT((m->flags & PG_FICTITIOUS) == 0,
1776             ("pmap_zero_page_area: fake page"));
1777         KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1778         PMAP_STATS_INC(pmap_nzero_page_area);
1779         pa = VM_PAGE_TO_PHYS(m);
1780         if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1781                 PMAP_STATS_INC(pmap_nzero_page_area_c);
1782                 va = TLB_PHYS_TO_DIRECT(pa);
1783                 bzero((void *)(va + off), size);
1784         } else if (m->md.color == -1) {
1785                 PMAP_STATS_INC(pmap_nzero_page_area_nc);
1786                 aszero(ASI_PHYS_USE_EC, pa + off, size);
1787         } else {
1788                 PMAP_STATS_INC(pmap_nzero_page_area_oc);
1789                 PMAP_LOCK(kernel_pmap);
1790                 va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1791                 tp = tsb_kvtotte(va);
1792                 tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1793                 tp->tte_vpn = TV_VPN(va, TS_8K);
1794                 bzero((void *)(va + off), size);
1795                 tlb_page_demap(kernel_pmap, va);
1796                 PMAP_UNLOCK(kernel_pmap);
1797         }
1798 }
1799
1800 void
1801 pmap_zero_page_idle(vm_page_t m)
1802 {
1803         struct tte *tp;
1804         vm_offset_t va;
1805         vm_paddr_t pa;
1806
1807         KASSERT((m->flags & PG_FICTITIOUS) == 0,
1808             ("pmap_zero_page_idle: fake page"));
1809         PMAP_STATS_INC(pmap_nzero_page_idle);
1810         pa = VM_PAGE_TO_PHYS(m);
1811         if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1812                 PMAP_STATS_INC(pmap_nzero_page_idle_c);
1813                 va = TLB_PHYS_TO_DIRECT(pa);
1814                 cpu_block_zero((void *)va, PAGE_SIZE);
1815         } else if (m->md.color == -1) {
1816                 PMAP_STATS_INC(pmap_nzero_page_idle_nc);
1817                 aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1818         } else {
1819                 PMAP_STATS_INC(pmap_nzero_page_idle_oc);
1820                 va = pmap_idle_map + (m->md.color * PAGE_SIZE);
1821                 tp = tsb_kvtotte(va);
1822                 tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1823                 tp->tte_vpn = TV_VPN(va, TS_8K);
1824                 cpu_block_zero((void *)va, PAGE_SIZE);
1825                 tlb_page_demap(kernel_pmap, va);
1826         }
1827 }
1828
1829 void
1830 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1831 {
1832         vm_offset_t vdst;
1833         vm_offset_t vsrc;
1834         vm_paddr_t pdst;
1835         vm_paddr_t psrc;
1836         struct tte *tp;
1837
1838         KASSERT((mdst->flags & PG_FICTITIOUS) == 0,
1839             ("pmap_copy_page: fake dst page"));
1840         KASSERT((msrc->flags & PG_FICTITIOUS) == 0,
1841             ("pmap_copy_page: fake src page"));
1842         PMAP_STATS_INC(pmap_ncopy_page);
1843         pdst = VM_PAGE_TO_PHYS(mdst);
1844         psrc = VM_PAGE_TO_PHYS(msrc);
1845         if (dcache_color_ignore != 0 ||
1846             (msrc->md.color == DCACHE_COLOR(psrc) &&
1847             mdst->md.color == DCACHE_COLOR(pdst))) {
1848                 PMAP_STATS_INC(pmap_ncopy_page_c);
1849                 vdst = TLB_PHYS_TO_DIRECT(pdst);
1850                 vsrc = TLB_PHYS_TO_DIRECT(psrc);
1851                 cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1852         } else if (msrc->md.color == -1 && mdst->md.color == -1) {
1853                 PMAP_STATS_INC(pmap_ncopy_page_nc);
1854                 ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
1855         } else if (msrc->md.color == -1) {
1856                 if (mdst->md.color == DCACHE_COLOR(pdst)) {
1857                         PMAP_STATS_INC(pmap_ncopy_page_dc);
1858                         vdst = TLB_PHYS_TO_DIRECT(pdst);
1859                         ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1860                             PAGE_SIZE);
1861                 } else {
1862                         PMAP_STATS_INC(pmap_ncopy_page_doc);
1863                         PMAP_LOCK(kernel_pmap);
1864                         vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1865                         tp = tsb_kvtotte(vdst);
1866                         tp->tte_data =
1867                             TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1868                         tp->tte_vpn = TV_VPN(vdst, TS_8K);
1869                         ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1870                             PAGE_SIZE);
1871                         tlb_page_demap(kernel_pmap, vdst);
1872                         PMAP_UNLOCK(kernel_pmap);
1873                 }
1874         } else if (mdst->md.color == -1) {
1875                 if (msrc->md.color == DCACHE_COLOR(psrc)) {
1876                         PMAP_STATS_INC(pmap_ncopy_page_sc);
1877                         vsrc = TLB_PHYS_TO_DIRECT(psrc);
1878                         ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1879                             PAGE_SIZE);
1880                 } else {
1881                         PMAP_STATS_INC(pmap_ncopy_page_soc);
1882                         PMAP_LOCK(kernel_pmap);
1883                         vsrc = pmap_temp_map_1 + (msrc->md.color * PAGE_SIZE);
1884                         tp = tsb_kvtotte(vsrc);
1885                         tp->tte_data =
1886                             TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1887                         tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1888                         ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1889                             PAGE_SIZE);
1890                         tlb_page_demap(kernel_pmap, vsrc);
1891                         PMAP_UNLOCK(kernel_pmap);
1892                 }
1893         } else {
1894                 PMAP_STATS_INC(pmap_ncopy_page_oc);
1895                 PMAP_LOCK(kernel_pmap);
1896                 vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1897                 tp = tsb_kvtotte(vdst);
1898                 tp->tte_data =
1899                     TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1900                 tp->tte_vpn = TV_VPN(vdst, TS_8K);
1901                 vsrc = pmap_temp_map_2 + (msrc->md.color * PAGE_SIZE);
1902                 tp = tsb_kvtotte(vsrc);
1903                 tp->tte_data =
1904                     TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1905                 tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1906                 cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1907                 tlb_page_demap(kernel_pmap, vdst);
1908                 tlb_page_demap(kernel_pmap, vsrc);
1909                 PMAP_UNLOCK(kernel_pmap);
1910         }
1911 }
1912
1913 /*
1914  * Returns true if the pmap's pv is one of the first
1915  * 16 pvs linked to from this page.  This count may
1916  * be changed upwards or downwards in the future; it
1917  * is only necessary that true be returned for a small
1918  * subset of pmaps for proper page aging.
1919  */
1920 boolean_t
1921 pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1922 {
1923         struct tte *tp;
1924         int loops;
1925
1926         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1927         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
1928                 return (FALSE);
1929         loops = 0;
1930         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1931                 if ((tp->tte_data & TD_PV) == 0)
1932                         continue;
1933                 if (TTE_GET_PMAP(tp) == pm)
1934                         return (TRUE);
1935                 if (++loops >= 16)
1936                         break;
1937         }
1938         return (FALSE);
1939 }
1940
1941 /*
1942  * Return the number of managed mappings to the given physical page
1943  * that are wired.
1944  */
1945 int
1946 pmap_page_wired_mappings(vm_page_t m)
1947 {
1948         struct tte *tp;
1949         int count;
1950
1951         count = 0;
1952         if ((m->flags & PG_FICTITIOUS) != 0)
1953                 return (count);
1954         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1955         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
1956                 if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED))
1957                         count++;
1958         return (count);
1959 }
1960
1961 /*
1962  * Remove all pages from specified address space, this aids process exit
1963  * speeds.  This is much faster than pmap_remove n the case of running down
1964  * an entire address space.  Only works for the current pmap.
1965  */
1966 void
1967 pmap_remove_pages(pmap_t pm)
1968 {
1969
1970 }
1971
1972 /*
1973  * Returns TRUE if the given page has a managed mapping.
1974  */
1975 boolean_t
1976 pmap_page_is_mapped(vm_page_t m)
1977 {
1978         struct tte *tp;
1979
1980         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
1981                 return (FALSE);
1982         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1983         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
1984                 if ((tp->tte_data & TD_PV) != 0)
1985                         return (TRUE);
1986         return (FALSE);
1987 }
1988
1989 /*
1990  * Return a count of reference bits for a page, clearing those bits.
1991  * It is not necessary for every reference bit to be cleared, but it
1992  * is necessary that 0 only be returned when there are truly no
1993  * reference bits set.
1994  *
1995  * XXX: The exact number of bits to check and clear is a matter that
1996  * should be tested and standardized at some point in the future for
1997  * optimal aging of shared pages.
1998  */
1999 int
2000 pmap_ts_referenced(vm_page_t m)
2001 {
2002         struct tte *tpf;
2003         struct tte *tpn;
2004         struct tte *tp;
2005         u_long data;
2006         int count;
2007
2008         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2009         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2010                 return (0);
2011         count = 0;
2012         if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) {
2013                 tpf = tp;
2014                 do {
2015                         tpn = TAILQ_NEXT(tp, tte_link);
2016                         TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
2017                         TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
2018                         if ((tp->tte_data & TD_PV) == 0)
2019                                 continue;
2020                         data = atomic_clear_long(&tp->tte_data, TD_REF);
2021                         if ((data & TD_REF) != 0 && ++count > 4)
2022                                 break;
2023                 } while ((tp = tpn) != NULL && tp != tpf);
2024         }
2025         return (count);
2026 }
2027
2028 boolean_t
2029 pmap_is_modified(vm_page_t m)
2030 {
2031         struct tte *tp;
2032
2033         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2034         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2035                 return (FALSE);
2036         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2037                 if ((tp->tte_data & TD_PV) == 0)
2038                         continue;
2039                 if ((tp->tte_data & TD_W) != 0)
2040                         return (TRUE);
2041         }
2042         return (FALSE);
2043 }
2044
2045 /*
2046  *      pmap_is_prefaultable:
2047  *
2048  *      Return whether or not the specified virtual address is elgible
2049  *      for prefault.
2050  */
2051 boolean_t
2052 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2053 {
2054         boolean_t rv;
2055
2056         PMAP_LOCK(pmap);
2057         rv = tsb_tte_lookup(pmap, addr) == NULL;
2058         PMAP_UNLOCK(pmap);
2059         return (rv);
2060 }
2061
2062 void
2063 pmap_clear_modify(vm_page_t m)
2064 {
2065         struct tte *tp;
2066         u_long data;
2067
2068         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2069         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2070                 return;
2071         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2072                 if ((tp->tte_data & TD_PV) == 0)
2073                         continue;
2074                 data = atomic_clear_long(&tp->tte_data, TD_W);
2075                 if ((data & TD_W) != 0)
2076                         tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2077         }
2078 }
2079
2080 void
2081 pmap_clear_reference(vm_page_t m)
2082 {
2083         struct tte *tp;
2084         u_long data;
2085
2086         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2087         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2088                 return;
2089         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2090                 if ((tp->tte_data & TD_PV) == 0)
2091                         continue;
2092                 data = atomic_clear_long(&tp->tte_data, TD_REF);
2093                 if ((data & TD_REF) != 0)
2094                         tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2095         }
2096 }
2097
2098 void
2099 pmap_remove_write(vm_page_t m)
2100 {
2101         struct tte *tp;
2102         u_long data;
2103
2104         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2105         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
2106             (m->flags & PG_WRITEABLE) == 0)
2107                 return;
2108         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2109                 if ((tp->tte_data & TD_PV) == 0)
2110                         continue;
2111                 data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
2112                 if ((data & TD_W) != 0) {
2113                         vm_page_dirty(m);
2114                         tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2115                 }
2116         }
2117         vm_page_flag_clear(m, PG_WRITEABLE);
2118 }
2119
2120 int
2121 pmap_mincore(pmap_t pm, vm_offset_t addr)
2122 {
2123
2124         /* TODO; */
2125         return (0);
2126 }
2127
2128 /*
2129  * Activate a user pmap.  The pmap must be activated before its address space
2130  * can be accessed in any way.
2131  */
2132 void
2133 pmap_activate(struct thread *td)
2134 {
2135         struct vmspace *vm;
2136         struct pmap *pm;
2137         int context;
2138
2139         vm = td->td_proc->p_vmspace;
2140         pm = vmspace_pmap(vm);
2141
2142         mtx_lock_spin(&sched_lock);
2143
2144         context = PCPU_GET(tlb_ctx);
2145         if (context == PCPU_GET(tlb_ctx_max)) {
2146                 tlb_flush_user();
2147                 context = PCPU_GET(tlb_ctx_min);
2148         }
2149         PCPU_SET(tlb_ctx, context + 1);
2150
2151         pm->pm_context[curcpu] = context;
2152         pm->pm_active |= PCPU_GET(cpumask);
2153         PCPU_SET(pmap, pm);
2154
2155         stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
2156         stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb);
2157         stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) &
2158             TLB_CXR_PGSZ_MASK) | context);
2159         flush(KERNBASE);
2160
2161         mtx_unlock_spin(&sched_lock);
2162 }
2163
2164 void
2165 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2166 {
2167
2168 }
2169
2170 /*
2171  * Increase the starting virtual address of the given mapping if a
2172  * different alignment might result in more superpage mappings.
2173  */
2174 void
2175 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2176     vm_offset_t *addr, vm_size_t size)
2177 {
2178
2179 }