]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/sparc64/sparc64/pmap.c
Copy head to stable/9 as part of 9.0-RELEASE release cycle.
[FreeBSD/stable/9.git] / sys / sparc64 / sparc64 / pmap.c
1 /*-
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  * Copyright (c) 1994 John S. Dyson
5  * All rights reserved.
6  * Copyright (c) 1994 David Greenman
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * the Systems Programming Group of the University of Utah Computer
11  * Science Department and William Jolitz of UUNET Technologies Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
38  */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 /*
44  * Manages physical address maps.
45  *
46  * In addition to hardware address maps, this module is called upon to
47  * provide software-use-only maps which may or may not be stored in the
48  * same form as hardware maps.  These pseudo-maps are used to store
49  * intermediate results from copy operations to and from address spaces.
50  *
51  * Since the information managed by this module is also stored by the
52  * logical address mapping module, this module may throw away valid virtual
53  * to physical mappings at almost any time.  However, invalidations of
54  * mappings must be done as requested.
55  *
56  * In order to cope with hardware architectures which make virtual to
57  * physical map invalidates expensive, this module may delay invalidate
58  * reduced protection operations until such time as they are actually
59  * necessary.  This module is given full information as to which processors
60  * are currently using which maps, and to when physical maps must be made
61  * correct.
62  */
63
64 #include "opt_kstack_pages.h"
65 #include "opt_pmap.h"
66
67 #include <sys/param.h>
68 #include <sys/kernel.h>
69 #include <sys/ktr.h>
70 #include <sys/lock.h>
71 #include <sys/msgbuf.h>
72 #include <sys/mutex.h>
73 #include <sys/proc.h>
74 #include <sys/smp.h>
75 #include <sys/sysctl.h>
76 #include <sys/systm.h>
77 #include <sys/vmmeter.h>
78
79 #include <dev/ofw/openfirm.h>
80
81 #include <vm/vm.h>
82 #include <vm/vm_param.h>
83 #include <vm/vm_kern.h>
84 #include <vm/vm_page.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_extern.h>
88 #include <vm/vm_pageout.h>
89 #include <vm/vm_pager.h>
90
91 #include <machine/cache.h>
92 #include <machine/frame.h>
93 #include <machine/instr.h>
94 #include <machine/md_var.h>
95 #include <machine/metadata.h>
96 #include <machine/ofw_mem.h>
97 #include <machine/smp.h>
98 #include <machine/tlb.h>
99 #include <machine/tte.h>
100 #include <machine/tsb.h>
101 #include <machine/ver.h>
102
103 #define PMAP_DEBUG
104
105 #ifndef PMAP_SHPGPERPROC
106 #define PMAP_SHPGPERPROC        200
107 #endif
108
109 /* XXX */
110 #include "opt_sched.h"
111 #ifndef SCHED_4BSD
112 #error "sparc64 only works with SCHED_4BSD which uses a global scheduler lock."
113 #endif
114 extern struct mtx sched_lock;
115
116 /*
117  * Virtual address of message buffer
118  */
119 struct msgbuf *msgbufp;
120
121 /*
122  * Map of physical memory reagions
123  */
124 vm_paddr_t phys_avail[128];
125 static struct ofw_mem_region mra[128];
126 struct ofw_mem_region sparc64_memreg[128];
127 int sparc64_nmemreg;
128 static struct ofw_map translations[128];
129 static int translations_size;
130
131 static vm_offset_t pmap_idle_map;
132 static vm_offset_t pmap_temp_map_1;
133 static vm_offset_t pmap_temp_map_2;
134
135 /*
136  * First and last available kernel virtual addresses
137  */
138 vm_offset_t virtual_avail;
139 vm_offset_t virtual_end;
140 vm_offset_t kernel_vm_end;
141
142 vm_offset_t vm_max_kernel_address;
143
144 /*
145  * Kernel pmap
146  */
147 struct pmap kernel_pmap_store;
148
149 /*
150  * Allocate physical memory for use in pmap_bootstrap.
151  */
152 static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
153
154 static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
155 static void pmap_cache_remove(vm_page_t m, vm_offset_t va);
156 static int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2,
157     struct tte *tp, vm_offset_t va);
158
159 /*
160  * Map the given physical page at the specified virtual address in the
161  * target pmap with the protection requested.  If specified the page
162  * will be wired down.
163  *
164  * The page queues and pmap must be locked.
165  */
166 static void pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m,
167     vm_prot_t prot, boolean_t wired);
168
169 extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[];
170 extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[];
171 extern int tl1_dmmu_miss_patch_asi_1[];
172 extern int tl1_dmmu_miss_patch_quad_ldd_1[];
173 extern int tl1_dmmu_miss_patch_tsb_1[];
174 extern int tl1_dmmu_miss_patch_tsb_2[];
175 extern int tl1_dmmu_miss_patch_tsb_mask_1[];
176 extern int tl1_dmmu_miss_patch_tsb_mask_2[];
177 extern int tl1_dmmu_prot_patch_asi_1[];
178 extern int tl1_dmmu_prot_patch_quad_ldd_1[];
179 extern int tl1_dmmu_prot_patch_tsb_1[];
180 extern int tl1_dmmu_prot_patch_tsb_2[];
181 extern int tl1_dmmu_prot_patch_tsb_mask_1[];
182 extern int tl1_dmmu_prot_patch_tsb_mask_2[];
183 extern int tl1_immu_miss_patch_asi_1[];
184 extern int tl1_immu_miss_patch_quad_ldd_1[];
185 extern int tl1_immu_miss_patch_tsb_1[];
186 extern int tl1_immu_miss_patch_tsb_2[];
187 extern int tl1_immu_miss_patch_tsb_mask_1[];
188 extern int tl1_immu_miss_patch_tsb_mask_2[];
189
190 /*
191  * If user pmap is processed with pmap_remove and with pmap_remove and the
192  * resident count drops to 0, there are no more pages to remove, so we
193  * need not continue.
194  */
195 #define PMAP_REMOVE_DONE(pm) \
196         ((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
197
198 /*
199  * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
200  * and pmap_protect() instead of trying each virtual address.
201  */
202 #define PMAP_TSB_THRESH ((TSB_SIZE / 2) * PAGE_SIZE)
203
204 SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "");
205
206 PMAP_STATS_VAR(pmap_nenter);
207 PMAP_STATS_VAR(pmap_nenter_update);
208 PMAP_STATS_VAR(pmap_nenter_replace);
209 PMAP_STATS_VAR(pmap_nenter_new);
210 PMAP_STATS_VAR(pmap_nkenter);
211 PMAP_STATS_VAR(pmap_nkenter_oc);
212 PMAP_STATS_VAR(pmap_nkenter_stupid);
213 PMAP_STATS_VAR(pmap_nkremove);
214 PMAP_STATS_VAR(pmap_nqenter);
215 PMAP_STATS_VAR(pmap_nqremove);
216 PMAP_STATS_VAR(pmap_ncache_enter);
217 PMAP_STATS_VAR(pmap_ncache_enter_c);
218 PMAP_STATS_VAR(pmap_ncache_enter_oc);
219 PMAP_STATS_VAR(pmap_ncache_enter_cc);
220 PMAP_STATS_VAR(pmap_ncache_enter_coc);
221 PMAP_STATS_VAR(pmap_ncache_enter_nc);
222 PMAP_STATS_VAR(pmap_ncache_enter_cnc);
223 PMAP_STATS_VAR(pmap_ncache_remove);
224 PMAP_STATS_VAR(pmap_ncache_remove_c);
225 PMAP_STATS_VAR(pmap_ncache_remove_oc);
226 PMAP_STATS_VAR(pmap_ncache_remove_cc);
227 PMAP_STATS_VAR(pmap_ncache_remove_coc);
228 PMAP_STATS_VAR(pmap_ncache_remove_nc);
229 PMAP_STATS_VAR(pmap_nzero_page);
230 PMAP_STATS_VAR(pmap_nzero_page_c);
231 PMAP_STATS_VAR(pmap_nzero_page_oc);
232 PMAP_STATS_VAR(pmap_nzero_page_nc);
233 PMAP_STATS_VAR(pmap_nzero_page_area);
234 PMAP_STATS_VAR(pmap_nzero_page_area_c);
235 PMAP_STATS_VAR(pmap_nzero_page_area_oc);
236 PMAP_STATS_VAR(pmap_nzero_page_area_nc);
237 PMAP_STATS_VAR(pmap_nzero_page_idle);
238 PMAP_STATS_VAR(pmap_nzero_page_idle_c);
239 PMAP_STATS_VAR(pmap_nzero_page_idle_oc);
240 PMAP_STATS_VAR(pmap_nzero_page_idle_nc);
241 PMAP_STATS_VAR(pmap_ncopy_page);
242 PMAP_STATS_VAR(pmap_ncopy_page_c);
243 PMAP_STATS_VAR(pmap_ncopy_page_oc);
244 PMAP_STATS_VAR(pmap_ncopy_page_nc);
245 PMAP_STATS_VAR(pmap_ncopy_page_dc);
246 PMAP_STATS_VAR(pmap_ncopy_page_doc);
247 PMAP_STATS_VAR(pmap_ncopy_page_sc);
248 PMAP_STATS_VAR(pmap_ncopy_page_soc);
249
250 PMAP_STATS_VAR(pmap_nnew_thread);
251 PMAP_STATS_VAR(pmap_nnew_thread_oc);
252
253 static inline u_long dtlb_get_data(u_int tlb, u_int slot);
254
255 /*
256  * Quick sort callout for comparing memory regions
257  */
258 static int mr_cmp(const void *a, const void *b);
259 static int om_cmp(const void *a, const void *b);
260
261 static int
262 mr_cmp(const void *a, const void *b)
263 {
264         const struct ofw_mem_region *mra;
265         const struct ofw_mem_region *mrb;
266
267         mra = a;
268         mrb = b;
269         if (mra->mr_start < mrb->mr_start)
270                 return (-1);
271         else if (mra->mr_start > mrb->mr_start)
272                 return (1);
273         else
274                 return (0);
275 }
276
277 static int
278 om_cmp(const void *a, const void *b)
279 {
280         const struct ofw_map *oma;
281         const struct ofw_map *omb;
282
283         oma = a;
284         omb = b;
285         if (oma->om_start < omb->om_start)
286                 return (-1);
287         else if (oma->om_start > omb->om_start)
288                 return (1);
289         else
290                 return (0);
291 }
292
293 static inline u_long
294 dtlb_get_data(u_int tlb, u_int slot)
295 {
296         u_long data;
297         register_t s;
298
299         slot = TLB_DAR_SLOT(tlb, slot);
300         /*
301          * We read ASI_DTLB_DATA_ACCESS_REG twice back-to-back in order to
302          * work around errata of USIII and beyond.
303          */
304         s = intr_disable();
305         (void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
306         data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
307         intr_restore(s);
308         return (data);
309 }
310
311 /*
312  * Bootstrap the system enough to run with virtual memory.
313  */
314 void
315 pmap_bootstrap(u_int cpu_impl)
316 {
317         struct pmap *pm;
318         struct tte *tp;
319         vm_offset_t off;
320         vm_offset_t va;
321         vm_paddr_t pa;
322         vm_size_t physsz;
323         vm_size_t virtsz;
324         u_long data;
325         u_long vpn;
326         phandle_t pmem;
327         phandle_t vmem;
328         u_int dtlb_slots_avail;
329         int i;
330         int j;
331         int sz;
332         uint32_t asi;
333         uint32_t colors;
334         uint32_t ldd;
335
336         /*
337          * Set the kernel context.
338          */
339         pmap_set_kctx();
340
341         colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
342
343         /*
344          * Find out what physical memory is available from the PROM and
345          * initialize the phys_avail array.  This must be done before
346          * pmap_bootstrap_alloc is called.
347          */
348         if ((pmem = OF_finddevice("/memory")) == -1)
349                 panic("pmap_bootstrap: finddevice /memory");
350         if ((sz = OF_getproplen(pmem, "available")) == -1)
351                 panic("pmap_bootstrap: getproplen /memory/available");
352         if (sizeof(phys_avail) < sz)
353                 panic("pmap_bootstrap: phys_avail too small");
354         if (sizeof(mra) < sz)
355                 panic("pmap_bootstrap: mra too small");
356         bzero(mra, sz);
357         if (OF_getprop(pmem, "available", mra, sz) == -1)
358                 panic("pmap_bootstrap: getprop /memory/available");
359         sz /= sizeof(*mra);
360         CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
361         qsort(mra, sz, sizeof (*mra), mr_cmp);
362         physsz = 0;
363         getenv_quad("hw.physmem", &physmem);
364         physmem = btoc(physmem);
365         for (i = 0, j = 0; i < sz; i++, j += 2) {
366                 CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
367                     mra[i].mr_size);
368                 if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) {
369                         if (btoc(physsz) < physmem) {
370                                 phys_avail[j] = mra[i].mr_start;
371                                 phys_avail[j + 1] = mra[i].mr_start +
372                                     (ctob(physmem) - physsz);
373                                 physsz = ctob(physmem);
374                         }
375                         break;
376                 }
377                 phys_avail[j] = mra[i].mr_start;
378                 phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
379                 physsz += mra[i].mr_size;
380         }
381         physmem = btoc(physsz);
382
383         /*
384          * Calculate the size of kernel virtual memory, and the size and mask
385          * for the kernel TSB based on the phsyical memory size but limited
386          * by the amount of dTLB slots available for locked entries if we have
387          * to lock the TSB in the TLB (given that for spitfire-class CPUs all
388          * of the dt64 slots can hold locked entries but there is no large
389          * dTLB for unlocked ones, we don't use more than half of it for the
390          * TSB).
391          * Note that for reasons unknown OpenSolaris doesn't take advantage of
392          * ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III.  However, given that no
393          * public documentation is available for these, the latter just might
394          * not support it, yet.
395          */
396         if (cpu_impl == CPU_IMPL_SPARC64V ||
397             cpu_impl >= CPU_IMPL_ULTRASPARCIIIp) {
398                 tsb_kernel_ldd_phys = 1;
399                 virtsz = roundup(5 / 3 * physsz, PAGE_SIZE_4M <<
400                     (PAGE_SHIFT - TTE_SHIFT));
401         } else {
402                 dtlb_slots_avail = 0;
403                 for (i = 0; i < dtlb_slots; i++) {
404                         data = dtlb_get_data(cpu_impl ==
405                             CPU_IMPL_ULTRASPARCIII ? TLB_DAR_T16 :
406                             TLB_DAR_T32, i);
407                         if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
408                                 dtlb_slots_avail++;
409                 }
410 #ifdef SMP
411                 dtlb_slots_avail -= PCPU_PAGES;
412 #endif
413                 if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
414                     cpu_impl < CPU_IMPL_ULTRASPARCIII)
415                         dtlb_slots_avail /= 2;
416                 virtsz = roundup(physsz, PAGE_SIZE_4M <<
417                     (PAGE_SHIFT - TTE_SHIFT));
418                 virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) <<
419                     (PAGE_SHIFT - TTE_SHIFT));
420         }
421         vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
422         tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
423         tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
424
425         /*
426          * Allocate the kernel TSB and lock it in the TLB if necessary.
427          */
428         pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
429         if (pa & PAGE_MASK_4M)
430                 panic("pmap_bootstrap: TSB unaligned\n");
431         tsb_kernel_phys = pa;
432         if (tsb_kernel_ldd_phys == 0) {
433                 tsb_kernel =
434                     (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
435                 pmap_map_tsb();
436                 bzero(tsb_kernel, tsb_kernel_size);
437         } else {
438                 tsb_kernel =
439                     (struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys);
440                 aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size);
441         }
442
443         /*
444          * Allocate and map the dynamic per-CPU area for the BSP.
445          */
446         pa = pmap_bootstrap_alloc(DPCPU_SIZE, colors);
447         dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pa);
448
449         /*
450          * Allocate and map the message buffer.
451          */
452         pa = pmap_bootstrap_alloc(msgbufsize, colors);
453         msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
454
455         /*
456          * Patch the TSB addresses and mask as well as the ASIs used to load
457          * it into the trap table.
458          */
459
460 #define LDDA_R_I_R(rd, imm_asi, rs1, rs2)                               \
461         (EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) |     \
462             EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) |   \
463             EIF_F3_RS2(rs2))
464 #define OR_R_I_R(rd, imm13, rs1)                                        \
465         (EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) |       \
466             EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
467 #define SETHI(rd, imm22)                                                \
468         (EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) |   \
469             EIF_IMM((imm22) >> 10, 22))
470 #define WR_R_I(rd, imm13, rs1)                                          \
471         (EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) |       \
472             EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
473
474 #define PATCH_ASI(addr, asi) do {                                       \
475         if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0,                   \
476             IF_F3_RS1(addr[0])))                                        \
477                 panic("%s: patched instructions have changed",          \
478                     __func__);                                          \
479         addr[0] |= EIF_IMM((asi), 13);                                  \
480         flush(addr);                                                    \
481 } while (0)
482
483 #define PATCH_LDD(addr, asi) do {                                       \
484         if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0,               \
485             IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0])))                    \
486                 panic("%s: patched instructions have changed",          \
487                     __func__);                                          \
488         addr[0] |= EIF_F3_IMM_ASI(asi);                                 \
489         flush(addr);                                                    \
490 } while (0)
491
492 #define PATCH_TSB(addr, val) do {                                       \
493         if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||                 \
494             addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,                 \
495             IF_F3_RS1(addr[1])) ||                                      \
496             addr[3] != SETHI(IF_F2_RD(addr[3]), 0x0))                   \
497                 panic("%s: patched instructions have changed",          \
498                     __func__);                                          \
499         addr[0] |= EIF_IMM((val) >> 42, 22);                            \
500         addr[1] |= EIF_IMM((val) >> 32, 10);                            \
501         addr[3] |= EIF_IMM((val) >> 10, 22);                            \
502         flush(addr);                                                    \
503         flush(addr + 1);                                                \
504         flush(addr + 3);                                                \
505 } while (0)
506
507 #define PATCH_TSB_MASK(addr, val) do {                                  \
508         if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||                 \
509             addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,                 \
510             IF_F3_RS1(addr[1])))                                        \
511                 panic("%s: patched instructions have changed",          \
512                     __func__);                                          \
513         addr[0] |= EIF_IMM((val) >> 10, 22);                            \
514         addr[1] |= EIF_IMM((val), 10);                                  \
515         flush(addr);                                                    \
516         flush(addr + 1);                                                \
517 } while (0)
518
519         if (tsb_kernel_ldd_phys == 0) {
520                 asi = ASI_N;
521                 ldd = ASI_NUCLEUS_QUAD_LDD;
522                 off = (vm_offset_t)tsb_kernel;
523         } else {
524                 asi = ASI_PHYS_USE_EC;
525                 ldd = ASI_ATOMIC_QUAD_LDD_PHYS;
526                 off = (vm_offset_t)tsb_kernel_phys;
527         }
528         PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys);
529         PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1,
530             tsb_kernel_phys + tsb_kernel_size - 1);
531         PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi);
532         PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd);
533         PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off);
534         PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off);
535         PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask);
536         PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask);
537         PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi);
538         PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd);
539         PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off);
540         PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off);
541         PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask);
542         PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask);
543         PATCH_ASI(tl1_immu_miss_patch_asi_1, asi);
544         PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd);
545         PATCH_TSB(tl1_immu_miss_patch_tsb_1, off);
546         PATCH_TSB(tl1_immu_miss_patch_tsb_2, off);
547         PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask);
548         PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask);
549
550         /*
551          * Enter fake 8k pages for the 4MB kernel pages, so that
552          * pmap_kextract() will work for them.
553          */
554         for (i = 0; i < kernel_tlb_slots; i++) {
555                 pa = kernel_tlbs[i].te_pa;
556                 va = kernel_tlbs[i].te_va;
557                 for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
558                         tp = tsb_kvtotte(va + off);
559                         vpn = TV_VPN(va + off, TS_8K);
560                         data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF |
561                             TD_SW | TD_CP | TD_CV | TD_P | TD_W;
562                         pmap_bootstrap_set_tte(tp, vpn, data);
563                 }
564         }
565
566         /*
567          * Set the start and end of KVA.  The kernel is loaded starting
568          * at the first available 4MB super page, so we advance to the
569          * end of the last one used for it.
570          */
571         virtual_avail = KERNBASE + kernel_tlb_slots * PAGE_SIZE_4M;
572         virtual_end = vm_max_kernel_address;
573         kernel_vm_end = vm_max_kernel_address;
574
575         /*
576          * Allocate kva space for temporary mappings.
577          */
578         pmap_idle_map = virtual_avail;
579         virtual_avail += PAGE_SIZE * colors;
580         pmap_temp_map_1 = virtual_avail;
581         virtual_avail += PAGE_SIZE * colors;
582         pmap_temp_map_2 = virtual_avail;
583         virtual_avail += PAGE_SIZE * colors;
584
585         /*
586          * Allocate a kernel stack with guard page for thread0 and map it
587          * into the kernel TSB.  We must ensure that the virtual address is
588          * colored properly for corresponding CPUs, since we're allocating
589          * from phys_avail so the memory won't have an associated vm_page_t.
590          */
591         pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, colors);
592         kstack0_phys = pa;
593         virtual_avail += roundup(KSTACK_GUARD_PAGES, colors) * PAGE_SIZE;
594         kstack0 = virtual_avail;
595         virtual_avail += roundup(KSTACK_PAGES, colors) * PAGE_SIZE;
596         if (dcache_color_ignore == 0)
597                 KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
598                     ("pmap_bootstrap: kstack0 miscolored"));
599         for (i = 0; i < KSTACK_PAGES; i++) {
600                 pa = kstack0_phys + i * PAGE_SIZE;
601                 va = kstack0 + i * PAGE_SIZE;
602                 tp = tsb_kvtotte(va);
603                 vpn = TV_VPN(va, TS_8K);
604                 data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
605                     TD_CV | TD_P | TD_W;
606                 pmap_bootstrap_set_tte(tp, vpn, data);
607         }
608
609         /*
610          * Calculate the last available physical address.
611          */
612         for (i = 0; phys_avail[i + 2] != 0; i += 2)
613                 ;
614         Maxmem = sparc64_btop(phys_avail[i + 1]);
615
616         /*
617          * Add the PROM mappings to the kernel TSB.
618          */
619         if ((vmem = OF_finddevice("/virtual-memory")) == -1)
620                 panic("pmap_bootstrap: finddevice /virtual-memory");
621         if ((sz = OF_getproplen(vmem, "translations")) == -1)
622                 panic("pmap_bootstrap: getproplen translations");
623         if (sizeof(translations) < sz)
624                 panic("pmap_bootstrap: translations too small");
625         bzero(translations, sz);
626         if (OF_getprop(vmem, "translations", translations, sz) == -1)
627                 panic("pmap_bootstrap: getprop /virtual-memory/translations");
628         sz /= sizeof(*translations);
629         translations_size = sz;
630         CTR0(KTR_PMAP, "pmap_bootstrap: translations");
631         qsort(translations, sz, sizeof (*translations), om_cmp);
632         for (i = 0; i < sz; i++) {
633                 CTR3(KTR_PMAP,
634                     "translation: start=%#lx size=%#lx tte=%#lx",
635                     translations[i].om_start, translations[i].om_size,
636                     translations[i].om_tte);
637                 if ((translations[i].om_tte & TD_V) == 0)
638                         continue;
639                 if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
640                     translations[i].om_start > VM_MAX_PROM_ADDRESS)
641                         continue;
642                 for (off = 0; off < translations[i].om_size;
643                     off += PAGE_SIZE) {
644                         va = translations[i].om_start + off;
645                         tp = tsb_kvtotte(va);
646                         vpn = TV_VPN(va, TS_8K);
647                         data = ((translations[i].om_tte &
648                             ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) |
649                             (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
650                             cpu_impl < CPU_IMPL_ULTRASPARCIII ?
651                             (TD_DIAG_SF_MASK << TD_DIAG_SF_SHIFT) :
652                             (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) |
653                             (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) +
654                             off;
655                         pmap_bootstrap_set_tte(tp, vpn, data);
656                 }
657         }
658
659         /*
660          * Get the available physical memory ranges from /memory/reg.  These
661          * are only used for kernel dumps, but it may not be wise to do PROM
662          * calls in that situation.
663          */
664         if ((sz = OF_getproplen(pmem, "reg")) == -1)
665                 panic("pmap_bootstrap: getproplen /memory/reg");
666         if (sizeof(sparc64_memreg) < sz)
667                 panic("pmap_bootstrap: sparc64_memreg too small");
668         if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
669                 panic("pmap_bootstrap: getprop /memory/reg");
670         sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
671
672         /*
673          * Initialize the kernel pmap (which is statically allocated).
674          */
675         pm = kernel_pmap;
676         PMAP_LOCK_INIT(pm);
677         for (i = 0; i < MAXCPU; i++)
678                 pm->pm_context[i] = TLB_CTX_KERNEL;
679         CPU_FILL(&pm->pm_active);
680
681         /*
682          * Flush all non-locked TLB entries possibly left over by the
683          * firmware.
684          */
685         tlb_flush_nonlocked();
686 }
687
688 /*
689  * Map the 4MB kernel TSB pages.
690  */
691 void
692 pmap_map_tsb(void)
693 {
694         vm_offset_t va;
695         vm_paddr_t pa;
696         u_long data;
697         int i;
698
699         for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
700                 va = (vm_offset_t)tsb_kernel + i;
701                 pa = tsb_kernel_phys + i;
702                 data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
703                     TD_P | TD_W;
704                 stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
705                     TLB_TAR_CTX(TLB_CTX_KERNEL));
706                 stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
707         }
708 }
709
710 /*
711  * Set the secondary context to be the kernel context (needed for FP block
712  * operations in the kernel).
713  */
714 void
715 pmap_set_kctx(void)
716 {
717
718         stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) &
719             TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL);
720         flush(KERNBASE);
721 }
722
723 /*
724  * Allocate a physical page of memory directly from the phys_avail map.
725  * Can only be called from pmap_bootstrap before avail start and end are
726  * calculated.
727  */
728 static vm_paddr_t
729 pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
730 {
731         vm_paddr_t pa;
732         int i;
733
734         size = roundup(size, PAGE_SIZE * colors);
735         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
736                 if (phys_avail[i + 1] - phys_avail[i] < size)
737                         continue;
738                 pa = phys_avail[i];
739                 phys_avail[i] += size;
740                 return (pa);
741         }
742         panic("pmap_bootstrap_alloc");
743 }
744
745 /*
746  * Set a TTE.  This function is intended as a helper when tsb_kernel is
747  * direct-mapped but we haven't taken over the trap table, yet, as it's the
748  * case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access
749  * the kernel TSB.
750  */
751 void
752 pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data)
753 {
754
755         if (tsb_kernel_ldd_phys == 0) {
756                 tp->tte_vpn = vpn;
757                 tp->tte_data = data;
758         } else {
759                 stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn),
760                     ASI_PHYS_USE_EC, vpn);
761                 stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data),
762                     ASI_PHYS_USE_EC, data);
763         }
764 }
765
766 /*
767  * Initialize a vm_page's machine-dependent fields.
768  */
769 void
770 pmap_page_init(vm_page_t m)
771 {
772
773         TAILQ_INIT(&m->md.tte_list);
774         m->md.color = DCACHE_COLOR(VM_PAGE_TO_PHYS(m));
775         m->md.flags = 0;
776         m->md.pmap = NULL;
777 }
778
779 /*
780  * Initialize the pmap module.
781  */
782 void
783 pmap_init(void)
784 {
785         vm_offset_t addr;
786         vm_size_t size;
787         int result;
788         int i;
789
790         for (i = 0; i < translations_size; i++) {
791                 addr = translations[i].om_start;
792                 size = translations[i].om_size;
793                 if ((translations[i].om_tte & TD_V) == 0)
794                         continue;
795                 if (addr < VM_MIN_PROM_ADDRESS || addr > VM_MAX_PROM_ADDRESS)
796                         continue;
797                 result = vm_map_find(kernel_map, NULL, 0, &addr, size,
798                     VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
799                 if (result != KERN_SUCCESS || addr != translations[i].om_start)
800                         panic("pmap_init: vm_map_find");
801         }
802 }
803
804 /*
805  * Extract the physical page address associated with the given
806  * map/virtual_address pair.
807  */
808 vm_paddr_t
809 pmap_extract(pmap_t pm, vm_offset_t va)
810 {
811         struct tte *tp;
812         vm_paddr_t pa;
813
814         if (pm == kernel_pmap)
815                 return (pmap_kextract(va));
816         PMAP_LOCK(pm);
817         tp = tsb_tte_lookup(pm, va);
818         if (tp == NULL)
819                 pa = 0;
820         else
821                 pa = TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp));
822         PMAP_UNLOCK(pm);
823         return (pa);
824 }
825
826 /*
827  * Atomically extract and hold the physical page with the given
828  * pmap and virtual address pair if that mapping permits the given
829  * protection.
830  */
831 vm_page_t
832 pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot)
833 {
834         struct tte *tp;
835         vm_page_t m;
836         vm_paddr_t pa;
837
838         m = NULL;
839         pa = 0;
840         PMAP_LOCK(pm);
841 retry:
842         if (pm == kernel_pmap) {
843                 if (va >= VM_MIN_DIRECT_ADDRESS) {
844                         tp = NULL;
845                         m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
846                         (void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va),
847                             &pa);
848                         vm_page_hold(m);
849                 } else {
850                         tp = tsb_kvtotte(va);
851                         if ((tp->tte_data & TD_V) == 0)
852                                 tp = NULL;
853                 }
854         } else
855                 tp = tsb_tte_lookup(pm, va);
856         if (tp != NULL && ((tp->tte_data & TD_SW) ||
857             (prot & VM_PROT_WRITE) == 0)) {
858                 if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa))
859                         goto retry;
860                 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
861                 vm_page_hold(m);
862         }
863         PA_UNLOCK_COND(pa);
864         PMAP_UNLOCK(pm);
865         return (m);
866 }
867
868 /*
869  * Extract the physical page address associated with the given kernel virtual
870  * address.
871  */
872 vm_paddr_t
873 pmap_kextract(vm_offset_t va)
874 {
875         struct tte *tp;
876
877         if (va >= VM_MIN_DIRECT_ADDRESS)
878                 return (TLB_DIRECT_TO_PHYS(va));
879         tp = tsb_kvtotte(va);
880         if ((tp->tte_data & TD_V) == 0)
881                 return (0);
882         return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
883 }
884
885 int
886 pmap_cache_enter(vm_page_t m, vm_offset_t va)
887 {
888         struct tte *tp;
889         int color;
890
891         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
892         KASSERT((m->flags & PG_FICTITIOUS) == 0,
893             ("pmap_cache_enter: fake page"));
894         PMAP_STATS_INC(pmap_ncache_enter);
895
896         if (dcache_color_ignore != 0)
897                 return (1);
898
899         /*
900          * Find the color for this virtual address and note the added mapping.
901          */
902         color = DCACHE_COLOR(va);
903         m->md.colors[color]++;
904
905         /*
906          * If all existing mappings have the same color, the mapping is
907          * cacheable.
908          */
909         if (m->md.color == color) {
910                 KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] == 0,
911                     ("pmap_cache_enter: cacheable, mappings of other color"));
912                 if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
913                         PMAP_STATS_INC(pmap_ncache_enter_c);
914                 else
915                         PMAP_STATS_INC(pmap_ncache_enter_oc);
916                 return (1);
917         }
918
919         /*
920          * If there are no mappings of the other color, and the page still has
921          * the wrong color, this must be a new mapping.  Change the color to
922          * match the new mapping, which is cacheable.  We must flush the page
923          * from the cache now.
924          */
925         if (m->md.colors[DCACHE_OTHER_COLOR(color)] == 0) {
926                 KASSERT(m->md.colors[color] == 1,
927                     ("pmap_cache_enter: changing color, not new mapping"));
928                 dcache_page_inval(VM_PAGE_TO_PHYS(m));
929                 m->md.color = color;
930                 if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
931                         PMAP_STATS_INC(pmap_ncache_enter_cc);
932                 else
933                         PMAP_STATS_INC(pmap_ncache_enter_coc);
934                 return (1);
935         }
936
937         /*
938          * If the mapping is already non-cacheable, just return.
939          */
940         if (m->md.color == -1) {
941                 PMAP_STATS_INC(pmap_ncache_enter_nc);
942                 return (0);
943         }
944
945         PMAP_STATS_INC(pmap_ncache_enter_cnc);
946
947         /*
948          * Mark all mappings as uncacheable, flush any lines with the other
949          * color out of the dcache, and set the color to none (-1).
950          */
951         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
952                 atomic_clear_long(&tp->tte_data, TD_CV);
953                 tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
954         }
955         dcache_page_inval(VM_PAGE_TO_PHYS(m));
956         m->md.color = -1;
957         return (0);
958 }
959
960 static void
961 pmap_cache_remove(vm_page_t m, vm_offset_t va)
962 {
963         struct tte *tp;
964         int color;
965
966         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
967         CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
968             m->md.colors[DCACHE_COLOR(va)]);
969         KASSERT((m->flags & PG_FICTITIOUS) == 0,
970             ("pmap_cache_remove: fake page"));
971         PMAP_STATS_INC(pmap_ncache_remove);
972
973         if (dcache_color_ignore != 0)
974                 return;
975
976         KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
977             ("pmap_cache_remove: no mappings %d <= 0",
978             m->md.colors[DCACHE_COLOR(va)]));
979
980         /*
981          * Find the color for this virtual address and note the removal of
982          * the mapping.
983          */
984         color = DCACHE_COLOR(va);
985         m->md.colors[color]--;
986
987         /*
988          * If the page is cacheable, just return and keep the same color, even
989          * if there are no longer any mappings.
990          */
991         if (m->md.color != -1) {
992                 if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
993                         PMAP_STATS_INC(pmap_ncache_remove_c);
994                 else
995                         PMAP_STATS_INC(pmap_ncache_remove_oc);
996                 return;
997         }
998
999         KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] != 0,
1000             ("pmap_cache_remove: uncacheable, no mappings of other color"));
1001
1002         /*
1003          * If the page is not cacheable (color is -1), and the number of
1004          * mappings for this color is not zero, just return.  There are
1005          * mappings of the other color still, so remain non-cacheable.
1006          */
1007         if (m->md.colors[color] != 0) {
1008                 PMAP_STATS_INC(pmap_ncache_remove_nc);
1009                 return;
1010         }
1011
1012         /*
1013          * The number of mappings for this color is now zero.  Recache the
1014          * other colored mappings, and change the page color to the other
1015          * color.  There should be no lines in the data cache for this page,
1016          * so flushing should not be needed.
1017          */
1018         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1019                 atomic_set_long(&tp->tte_data, TD_CV);
1020                 tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1021         }
1022         m->md.color = DCACHE_OTHER_COLOR(color);
1023
1024         if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
1025                 PMAP_STATS_INC(pmap_ncache_remove_cc);
1026         else
1027                 PMAP_STATS_INC(pmap_ncache_remove_coc);
1028 }
1029
1030 /*
1031  * Map a wired page into kernel virtual address space.
1032  */
1033 void
1034 pmap_kenter(vm_offset_t va, vm_page_t m)
1035 {
1036         vm_offset_t ova;
1037         struct tte *tp;
1038         vm_page_t om;
1039         u_long data;
1040
1041         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1042         PMAP_STATS_INC(pmap_nkenter);
1043         tp = tsb_kvtotte(va);
1044         CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
1045             va, VM_PAGE_TO_PHYS(m), tp, tp->tte_data);
1046         if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
1047                 CTR5(KTR_SPARE2,
1048         "pmap_kenter: off color va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
1049                     va, VM_PAGE_TO_PHYS(m), m->object,
1050                     m->object ? m->object->type : -1,
1051                     m->pindex);
1052                 PMAP_STATS_INC(pmap_nkenter_oc);
1053         }
1054         if ((tp->tte_data & TD_V) != 0) {
1055                 om = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1056                 ova = TTE_GET_VA(tp);
1057                 if (m == om && va == ova) {
1058                         PMAP_STATS_INC(pmap_nkenter_stupid);
1059                         return;
1060                 }
1061                 TAILQ_REMOVE(&om->md.tte_list, tp, tte_link);
1062                 pmap_cache_remove(om, ova);
1063                 if (va != ova)
1064                         tlb_page_demap(kernel_pmap, ova);
1065         }
1066         data = TD_V | TD_8K | VM_PAGE_TO_PHYS(m) | TD_REF | TD_SW | TD_CP |
1067             TD_P | TD_W;
1068         if (pmap_cache_enter(m, va) != 0)
1069                 data |= TD_CV;
1070         tp->tte_vpn = TV_VPN(va, TS_8K);
1071         tp->tte_data = data;
1072         TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
1073 }
1074
1075 /*
1076  * Map a wired page into kernel virtual address space.  This additionally
1077  * takes a flag argument which is or'ed to the TTE data.  This is used by
1078  * sparc64_bus_mem_map().
1079  * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
1080  * to flush entries that might still be in the cache, if applicable.
1081  */
1082 void
1083 pmap_kenter_flags(vm_offset_t va, vm_paddr_t pa, u_long flags)
1084 {
1085         struct tte *tp;
1086
1087         tp = tsb_kvtotte(va);
1088         CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
1089             va, pa, tp, tp->tte_data);
1090         tp->tte_vpn = TV_VPN(va, TS_8K);
1091         tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
1092 }
1093
1094 /*
1095  * Remove a wired page from kernel virtual address space.
1096  */
1097 void
1098 pmap_kremove(vm_offset_t va)
1099 {
1100         struct tte *tp;
1101         vm_page_t m;
1102
1103         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1104         PMAP_STATS_INC(pmap_nkremove);
1105         tp = tsb_kvtotte(va);
1106         CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
1107             tp->tte_data);
1108         if ((tp->tte_data & TD_V) == 0)
1109                 return;
1110         m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1111         TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1112         pmap_cache_remove(m, va);
1113         TTE_ZERO(tp);
1114 }
1115
1116 /*
1117  * Inverse of pmap_kenter_flags, used by bus_space_unmap().
1118  */
1119 void
1120 pmap_kremove_flags(vm_offset_t va)
1121 {
1122         struct tte *tp;
1123
1124         tp = tsb_kvtotte(va);
1125         CTR3(KTR_PMAP, "pmap_kremove_flags: va=%#lx tp=%p data=%#lx", va, tp,
1126             tp->tte_data);
1127         TTE_ZERO(tp);
1128 }
1129
1130 /*
1131  * Map a range of physical addresses into kernel virtual address space.
1132  *
1133  * The value passed in *virt is a suggested virtual address for the mapping.
1134  * Architectures which can support a direct-mapped physical to virtual region
1135  * can return the appropriate address within that region, leaving '*virt'
1136  * unchanged.
1137  */
1138 vm_offset_t
1139 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1140 {
1141
1142         return (TLB_PHYS_TO_DIRECT(start));
1143 }
1144
1145 /*
1146  * Map a list of wired pages into kernel virtual address space.  This is
1147  * intended for temporary mappings which do not need page modification or
1148  * references recorded.  Existing mappings in the region are overwritten.
1149  */
1150 void
1151 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
1152 {
1153         vm_offset_t va;
1154         int locked;
1155
1156         PMAP_STATS_INC(pmap_nqenter);
1157         va = sva;
1158         if (!(locked = mtx_owned(&vm_page_queue_mtx)))
1159                 vm_page_lock_queues();
1160         while (count-- > 0) {
1161                 pmap_kenter(va, *m);
1162                 va += PAGE_SIZE;
1163                 m++;
1164         }
1165         if (!locked)
1166                 vm_page_unlock_queues();
1167         tlb_range_demap(kernel_pmap, sva, va);
1168 }
1169
1170 /*
1171  * Remove page mappings from kernel virtual address space.  Intended for
1172  * temporary mappings entered by pmap_qenter.
1173  */
1174 void
1175 pmap_qremove(vm_offset_t sva, int count)
1176 {
1177         vm_offset_t va;
1178         int locked;
1179
1180         PMAP_STATS_INC(pmap_nqremove);
1181         va = sva;
1182         if (!(locked = mtx_owned(&vm_page_queue_mtx)))
1183                 vm_page_lock_queues();
1184         while (count-- > 0) {
1185                 pmap_kremove(va);
1186                 va += PAGE_SIZE;
1187         }
1188         if (!locked)
1189                 vm_page_unlock_queues();
1190         tlb_range_demap(kernel_pmap, sva, va);
1191 }
1192
1193 /*
1194  * Initialize the pmap associated with process 0.
1195  */
1196 void
1197 pmap_pinit0(pmap_t pm)
1198 {
1199         int i;
1200
1201         PMAP_LOCK_INIT(pm);
1202         for (i = 0; i < MAXCPU; i++)
1203                 pm->pm_context[i] = TLB_CTX_KERNEL;
1204         CPU_ZERO(&pm->pm_active);
1205         pm->pm_tsb = NULL;
1206         pm->pm_tsb_obj = NULL;
1207         bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1208 }
1209
1210 /*
1211  * Initialize a preallocated and zeroed pmap structure, such as one in a
1212  * vmspace structure.
1213  */
1214 int
1215 pmap_pinit(pmap_t pm)
1216 {
1217         vm_page_t ma[TSB_PAGES];
1218         vm_page_t m;
1219         int i;
1220
1221         PMAP_LOCK_INIT(pm);
1222
1223         /*
1224          * Allocate KVA space for the TSB.
1225          */
1226         if (pm->pm_tsb == NULL) {
1227                 pm->pm_tsb = (struct tte *)kmem_alloc_nofault(kernel_map,
1228                     TSB_BSIZE);
1229                 if (pm->pm_tsb == NULL) {
1230                         PMAP_LOCK_DESTROY(pm);
1231                         return (0);
1232                 }
1233         }
1234
1235         /*
1236          * Allocate an object for it.
1237          */
1238         if (pm->pm_tsb_obj == NULL)
1239                 pm->pm_tsb_obj = vm_object_allocate(OBJT_PHYS, TSB_PAGES);
1240
1241         mtx_lock_spin(&sched_lock);
1242         for (i = 0; i < MAXCPU; i++)
1243                 pm->pm_context[i] = -1;
1244         CPU_ZERO(&pm->pm_active);
1245         mtx_unlock_spin(&sched_lock);
1246
1247         VM_OBJECT_LOCK(pm->pm_tsb_obj);
1248         for (i = 0; i < TSB_PAGES; i++) {
1249                 m = vm_page_grab(pm->pm_tsb_obj, i, VM_ALLOC_NOBUSY |
1250                     VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1251                 m->valid = VM_PAGE_BITS_ALL;
1252                 m->md.pmap = pm;
1253                 ma[i] = m;
1254         }
1255         VM_OBJECT_UNLOCK(pm->pm_tsb_obj);
1256         pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1257
1258         bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1259         return (1);
1260 }
1261
1262 /*
1263  * Release any resources held by the given physical map.
1264  * Called when a pmap initialized by pmap_pinit is being released.
1265  * Should only be called if the map contains no valid mappings.
1266  */
1267 void
1268 pmap_release(pmap_t pm)
1269 {
1270         vm_object_t obj;
1271         vm_page_t m;
1272         struct pcpu *pc;
1273
1274         CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1275             pm->pm_context[curcpu], pm->pm_tsb);
1276         KASSERT(pmap_resident_count(pm) == 0,
1277             ("pmap_release: resident pages %ld != 0",
1278             pmap_resident_count(pm)));
1279
1280         /*
1281          * After the pmap was freed, it might be reallocated to a new process.
1282          * When switching, this might lead us to wrongly assume that we need
1283          * not switch contexts because old and new pmap pointer are equal.
1284          * Therefore, make sure that this pmap is not referenced by any PCPU
1285          * pointer any more.  This could happen in two cases:
1286          * - A process that referenced the pmap is currently exiting on a CPU.
1287          *   However, it is guaranteed to not switch in any more after setting
1288          *   its state to PRS_ZOMBIE.
1289          * - A process that referenced this pmap ran on a CPU, but we switched
1290          *   to a kernel thread, leaving the pmap pointer unchanged.
1291          */
1292         mtx_lock_spin(&sched_lock);
1293         STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
1294                 if (pc->pc_pmap == pm)
1295                         pc->pc_pmap = NULL;
1296         mtx_unlock_spin(&sched_lock);
1297
1298         pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1299         obj = pm->pm_tsb_obj;
1300         VM_OBJECT_LOCK(obj);
1301         KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1302         while (!TAILQ_EMPTY(&obj->memq)) {
1303                 m = TAILQ_FIRST(&obj->memq);
1304                 m->md.pmap = NULL;
1305                 m->wire_count--;
1306                 atomic_subtract_int(&cnt.v_wire_count, 1);
1307                 vm_page_free_zero(m);
1308         }
1309         VM_OBJECT_UNLOCK(obj);
1310         PMAP_LOCK_DESTROY(pm);
1311 }
1312
1313 /*
1314  * Grow the number of kernel page table entries.  Unneeded.
1315  */
1316 void
1317 pmap_growkernel(vm_offset_t addr)
1318 {
1319
1320         panic("pmap_growkernel: can't grow kernel");
1321 }
1322
1323 int
1324 pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1325     vm_offset_t va)
1326 {
1327         vm_page_t m;
1328         u_long data;
1329
1330         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1331         data = atomic_readandclear_long(&tp->tte_data);
1332         if ((data & TD_FAKE) == 0) {
1333                 m = PHYS_TO_VM_PAGE(TD_PA(data));
1334                 TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1335                 if ((data & TD_WIRED) != 0)
1336                         pm->pm_stats.wired_count--;
1337                 if ((data & TD_PV) != 0) {
1338                         if ((data & TD_W) != 0)
1339                                 vm_page_dirty(m);
1340                         if ((data & TD_REF) != 0)
1341                                 vm_page_aflag_set(m, PGA_REFERENCED);
1342                         if (TAILQ_EMPTY(&m->md.tte_list))
1343                                 vm_page_aflag_clear(m, PGA_WRITEABLE);
1344                         pm->pm_stats.resident_count--;
1345                 }
1346                 pmap_cache_remove(m, va);
1347         }
1348         TTE_ZERO(tp);
1349         if (PMAP_REMOVE_DONE(pm))
1350                 return (0);
1351         return (1);
1352 }
1353
1354 /*
1355  * Remove the given range of addresses from the specified map.
1356  */
1357 void
1358 pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1359 {
1360         struct tte *tp;
1361         vm_offset_t va;
1362
1363         CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1364             pm->pm_context[curcpu], start, end);
1365         if (PMAP_REMOVE_DONE(pm))
1366                 return;
1367         vm_page_lock_queues();
1368         PMAP_LOCK(pm);
1369         if (end - start > PMAP_TSB_THRESH) {
1370                 tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1371                 tlb_context_demap(pm);
1372         } else {
1373                 for (va = start; va < end; va += PAGE_SIZE)
1374                         if ((tp = tsb_tte_lookup(pm, va)) != NULL &&
1375                             !pmap_remove_tte(pm, NULL, tp, va))
1376                                 break;
1377                 tlb_range_demap(pm, start, end - 1);
1378         }
1379         PMAP_UNLOCK(pm);
1380         vm_page_unlock_queues();
1381 }
1382
1383 void
1384 pmap_remove_all(vm_page_t m)
1385 {
1386         struct pmap *pm;
1387         struct tte *tpn;
1388         struct tte *tp;
1389         vm_offset_t va;
1390
1391         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1392             ("pmap_remove_all: page %p is not managed", m));
1393         vm_page_lock_queues();
1394         for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
1395                 tpn = TAILQ_NEXT(tp, tte_link);
1396                 if ((tp->tte_data & TD_PV) == 0)
1397                         continue;
1398                 pm = TTE_GET_PMAP(tp);
1399                 va = TTE_GET_VA(tp);
1400                 PMAP_LOCK(pm);
1401                 if ((tp->tte_data & TD_WIRED) != 0)
1402                         pm->pm_stats.wired_count--;
1403                 if ((tp->tte_data & TD_REF) != 0)
1404                         vm_page_aflag_set(m, PGA_REFERENCED);
1405                 if ((tp->tte_data & TD_W) != 0)
1406                         vm_page_dirty(m);
1407                 tp->tte_data &= ~TD_V;
1408                 tlb_page_demap(pm, va);
1409                 TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1410                 pm->pm_stats.resident_count--;
1411                 pmap_cache_remove(m, va);
1412                 TTE_ZERO(tp);
1413                 PMAP_UNLOCK(pm);
1414         }
1415         vm_page_aflag_clear(m, PGA_WRITEABLE);
1416         vm_page_unlock_queues();
1417 }
1418
1419 static int
1420 pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1421     vm_offset_t va)
1422 {
1423         u_long data;
1424         vm_page_t m;
1425
1426         data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
1427         if ((data & (TD_PV | TD_W)) == (TD_PV | TD_W)) {
1428                 m = PHYS_TO_VM_PAGE(TD_PA(data));
1429                 vm_page_dirty(m);
1430         }
1431         return (1);
1432 }
1433
1434 /*
1435  * Set the physical protection on the specified range of this map as requested.
1436  */
1437 void
1438 pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1439 {
1440         vm_offset_t va;
1441         struct tte *tp;
1442
1443         CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1444             pm->pm_context[curcpu], sva, eva, prot);
1445
1446         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1447                 pmap_remove(pm, sva, eva);
1448                 return;
1449         }
1450
1451         if (prot & VM_PROT_WRITE)
1452                 return;
1453
1454         vm_page_lock_queues();
1455         PMAP_LOCK(pm);
1456         if (eva - sva > PMAP_TSB_THRESH) {
1457                 tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1458                 tlb_context_demap(pm);
1459         } else {
1460                 for (va = sva; va < eva; va += PAGE_SIZE)
1461                         if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1462                                 pmap_protect_tte(pm, NULL, tp, va);
1463                 tlb_range_demap(pm, sva, eva - 1);
1464         }
1465         PMAP_UNLOCK(pm);
1466         vm_page_unlock_queues();
1467 }
1468
1469 /*
1470  * Map the given physical page at the specified virtual address in the
1471  * target pmap with the protection requested.  If specified the page
1472  * will be wired down.
1473  */
1474 void
1475 pmap_enter(pmap_t pm, vm_offset_t va, vm_prot_t access, vm_page_t m,
1476     vm_prot_t prot, boolean_t wired)
1477 {
1478
1479         vm_page_lock_queues();
1480         PMAP_LOCK(pm);
1481         pmap_enter_locked(pm, va, m, prot, wired);
1482         vm_page_unlock_queues();
1483         PMAP_UNLOCK(pm);
1484 }
1485
1486 /*
1487  * Map the given physical page at the specified virtual address in the
1488  * target pmap with the protection requested.  If specified the page
1489  * will be wired down.
1490  *
1491  * The page queues and pmap must be locked.
1492  */
1493 static void
1494 pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1495     boolean_t wired)
1496 {
1497         struct tte *tp;
1498         vm_paddr_t pa;
1499         vm_page_t real;
1500         u_long data;
1501
1502         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1503         PMAP_LOCK_ASSERT(pm, MA_OWNED);
1504         KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
1505             VM_OBJECT_LOCKED(m->object),
1506             ("pmap_enter_locked: page %p is not busy", m));
1507         PMAP_STATS_INC(pmap_nenter);
1508         pa = VM_PAGE_TO_PHYS(m);
1509
1510         /*
1511          * If this is a fake page from the device_pager, but it covers actual
1512          * physical memory, convert to the real backing page.
1513          */
1514         if ((m->flags & PG_FICTITIOUS) != 0) {
1515                 real = vm_phys_paddr_to_vm_page(pa);
1516                 if (real != NULL)
1517                         m = real;
1518         }
1519
1520         CTR6(KTR_PMAP,
1521             "pmap_enter_locked: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1522             pm->pm_context[curcpu], m, va, pa, prot, wired);
1523
1524         /*
1525          * If there is an existing mapping, and the physical address has not
1526          * changed, must be protection or wiring change.
1527          */
1528         if ((tp = tsb_tte_lookup(pm, va)) != NULL && TTE_GET_PA(tp) == pa) {
1529                 CTR0(KTR_PMAP, "pmap_enter_locked: update");
1530                 PMAP_STATS_INC(pmap_nenter_update);
1531
1532                 /*
1533                  * Wiring change, just update stats.
1534                  */
1535                 if (wired) {
1536                         if ((tp->tte_data & TD_WIRED) == 0) {
1537                                 tp->tte_data |= TD_WIRED;
1538                                 pm->pm_stats.wired_count++;
1539                         }
1540                 } else {
1541                         if ((tp->tte_data & TD_WIRED) != 0) {
1542                                 tp->tte_data &= ~TD_WIRED;
1543                                 pm->pm_stats.wired_count--;
1544                         }
1545                 }
1546
1547                 /*
1548                  * Save the old bits and clear the ones we're interested in.
1549                  */
1550                 data = tp->tte_data;
1551                 tp->tte_data &= ~(TD_EXEC | TD_SW | TD_W);
1552
1553                 /*
1554                  * If we're turning off write permissions, sense modify status.
1555                  */
1556                 if ((prot & VM_PROT_WRITE) != 0) {
1557                         tp->tte_data |= TD_SW;
1558                         if (wired)
1559                                 tp->tte_data |= TD_W;
1560                         if ((m->oflags & VPO_UNMANAGED) == 0)
1561                                 vm_page_aflag_set(m, PGA_WRITEABLE);
1562                 } else if ((data & TD_W) != 0)
1563                         vm_page_dirty(m);
1564
1565                 /*
1566                  * If we're turning on execute permissions, flush the icache.
1567                  */
1568                 if ((prot & VM_PROT_EXECUTE) != 0) {
1569                         if ((data & TD_EXEC) == 0)
1570                                 icache_page_inval(pa);
1571                         tp->tte_data |= TD_EXEC;
1572                 }
1573
1574                 /*
1575                  * Delete the old mapping.
1576                  */
1577                 tlb_page_demap(pm, TTE_GET_VA(tp));
1578         } else {
1579                 /*
1580                  * If there is an existing mapping, but its for a different
1581                  * physical address, delete the old mapping.
1582                  */
1583                 if (tp != NULL) {
1584                         CTR0(KTR_PMAP, "pmap_enter_locked: replace");
1585                         PMAP_STATS_INC(pmap_nenter_replace);
1586                         pmap_remove_tte(pm, NULL, tp, va);
1587                         tlb_page_demap(pm, va);
1588                 } else {
1589                         CTR0(KTR_PMAP, "pmap_enter_locked: new");
1590                         PMAP_STATS_INC(pmap_nenter_new);
1591                 }
1592
1593                 /*
1594                  * Now set up the data and install the new mapping.
1595                  */
1596                 data = TD_V | TD_8K | TD_PA(pa);
1597                 if (pm == kernel_pmap)
1598                         data |= TD_P;
1599                 if ((prot & VM_PROT_WRITE) != 0) {
1600                         data |= TD_SW;
1601                         if ((m->oflags & VPO_UNMANAGED) == 0)
1602                                 vm_page_aflag_set(m, PGA_WRITEABLE);
1603                 }
1604                 if (prot & VM_PROT_EXECUTE) {
1605                         data |= TD_EXEC;
1606                         icache_page_inval(pa);
1607                 }
1608
1609                 /*
1610                  * If its wired update stats.  We also don't need reference or
1611                  * modify tracking for wired mappings, so set the bits now.
1612                  */
1613                 if (wired) {
1614                         pm->pm_stats.wired_count++;
1615                         data |= TD_REF | TD_WIRED;
1616                         if ((prot & VM_PROT_WRITE) != 0)
1617                                 data |= TD_W;
1618                 }
1619
1620                 tsb_tte_enter(pm, m, va, TS_8K, data);
1621         }
1622 }
1623
1624 /*
1625  * Maps a sequence of resident pages belonging to the same object.
1626  * The sequence begins with the given page m_start.  This page is
1627  * mapped at the given virtual address start.  Each subsequent page is
1628  * mapped at a virtual address that is offset from start by the same
1629  * amount as the page is offset from m_start within the object.  The
1630  * last page in the sequence is the page with the largest offset from
1631  * m_start that can be mapped at a virtual address less than the given
1632  * virtual address end.  Not every virtual page between start and end
1633  * is mapped; only those for which a resident page exists with the
1634  * corresponding offset from m_start are mapped.
1635  */
1636 void
1637 pmap_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1638     vm_page_t m_start, vm_prot_t prot)
1639 {
1640         vm_page_t m;
1641         vm_pindex_t diff, psize;
1642
1643         psize = atop(end - start);
1644         m = m_start;
1645         vm_page_lock_queues();
1646         PMAP_LOCK(pm);
1647         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1648                 pmap_enter_locked(pm, start + ptoa(diff), m, prot &
1649                     (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
1650                 m = TAILQ_NEXT(m, listq);
1651         }
1652         vm_page_unlock_queues();
1653         PMAP_UNLOCK(pm);
1654 }
1655
1656 void
1657 pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1658 {
1659
1660         vm_page_lock_queues();
1661         PMAP_LOCK(pm);
1662         pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1663             FALSE);
1664         vm_page_unlock_queues();
1665         PMAP_UNLOCK(pm);
1666 }
1667
1668 void
1669 pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1670     vm_pindex_t pindex, vm_size_t size)
1671 {
1672
1673         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1674         KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1675             ("pmap_object_init_pt: non-device object"));
1676 }
1677
1678 /*
1679  * Change the wiring attribute for a map/virtual-address pair.
1680  * The mapping must already exist in the pmap.
1681  */
1682 void
1683 pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired)
1684 {
1685         struct tte *tp;
1686         u_long data;
1687
1688         PMAP_LOCK(pm);
1689         if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1690                 if (wired) {
1691                         data = atomic_set_long(&tp->tte_data, TD_WIRED);
1692                         if ((data & TD_WIRED) == 0)
1693                                 pm->pm_stats.wired_count++;
1694                 } else {
1695                         data = atomic_clear_long(&tp->tte_data, TD_WIRED);
1696                         if ((data & TD_WIRED) != 0)
1697                                 pm->pm_stats.wired_count--;
1698                 }
1699         }
1700         PMAP_UNLOCK(pm);
1701 }
1702
1703 static int
1704 pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp,
1705     vm_offset_t va)
1706 {
1707         vm_page_t m;
1708         u_long data;
1709
1710         if ((tp->tte_data & TD_FAKE) != 0)
1711                 return (1);
1712         if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1713                 data = tp->tte_data &
1714                     ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1715                 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1716                 tsb_tte_enter(dst_pmap, m, va, TS_8K, data);
1717         }
1718         return (1);
1719 }
1720
1721 void
1722 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1723     vm_size_t len, vm_offset_t src_addr)
1724 {
1725         struct tte *tp;
1726         vm_offset_t va;
1727
1728         if (dst_addr != src_addr)
1729                 return;
1730         vm_page_lock_queues();
1731         if (dst_pmap < src_pmap) {
1732                 PMAP_LOCK(dst_pmap);
1733                 PMAP_LOCK(src_pmap);
1734         } else {
1735                 PMAP_LOCK(src_pmap);
1736                 PMAP_LOCK(dst_pmap);
1737         }
1738         if (len > PMAP_TSB_THRESH) {
1739                 tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1740                     pmap_copy_tte);
1741                 tlb_context_demap(dst_pmap);
1742         } else {
1743                 for (va = src_addr; va < src_addr + len; va += PAGE_SIZE)
1744                         if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1745                                 pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1746                 tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1747         }
1748         vm_page_unlock_queues();
1749         PMAP_UNLOCK(src_pmap);
1750         PMAP_UNLOCK(dst_pmap);
1751 }
1752
1753 void
1754 pmap_zero_page(vm_page_t m)
1755 {
1756         struct tte *tp;
1757         vm_offset_t va;
1758         vm_paddr_t pa;
1759
1760         KASSERT((m->flags & PG_FICTITIOUS) == 0,
1761             ("pmap_zero_page: fake page"));
1762         PMAP_STATS_INC(pmap_nzero_page);
1763         pa = VM_PAGE_TO_PHYS(m);
1764         if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1765                 PMAP_STATS_INC(pmap_nzero_page_c);
1766                 va = TLB_PHYS_TO_DIRECT(pa);
1767                 cpu_block_zero((void *)va, PAGE_SIZE);
1768         } else if (m->md.color == -1) {
1769                 PMAP_STATS_INC(pmap_nzero_page_nc);
1770                 aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1771         } else {
1772                 PMAP_STATS_INC(pmap_nzero_page_oc);
1773                 PMAP_LOCK(kernel_pmap);
1774                 va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1775                 tp = tsb_kvtotte(va);
1776                 tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1777                 tp->tte_vpn = TV_VPN(va, TS_8K);
1778                 cpu_block_zero((void *)va, PAGE_SIZE);
1779                 tlb_page_demap(kernel_pmap, va);
1780                 PMAP_UNLOCK(kernel_pmap);
1781         }
1782 }
1783
1784 void
1785 pmap_zero_page_area(vm_page_t m, int off, int size)
1786 {
1787         struct tte *tp;
1788         vm_offset_t va;
1789         vm_paddr_t pa;
1790
1791         KASSERT((m->flags & PG_FICTITIOUS) == 0,
1792             ("pmap_zero_page_area: fake page"));
1793         KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1794         PMAP_STATS_INC(pmap_nzero_page_area);
1795         pa = VM_PAGE_TO_PHYS(m);
1796         if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1797                 PMAP_STATS_INC(pmap_nzero_page_area_c);
1798                 va = TLB_PHYS_TO_DIRECT(pa);
1799                 bzero((void *)(va + off), size);
1800         } else if (m->md.color == -1) {
1801                 PMAP_STATS_INC(pmap_nzero_page_area_nc);
1802                 aszero(ASI_PHYS_USE_EC, pa + off, size);
1803         } else {
1804                 PMAP_STATS_INC(pmap_nzero_page_area_oc);
1805                 PMAP_LOCK(kernel_pmap);
1806                 va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1807                 tp = tsb_kvtotte(va);
1808                 tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1809                 tp->tte_vpn = TV_VPN(va, TS_8K);
1810                 bzero((void *)(va + off), size);
1811                 tlb_page_demap(kernel_pmap, va);
1812                 PMAP_UNLOCK(kernel_pmap);
1813         }
1814 }
1815
1816 void
1817 pmap_zero_page_idle(vm_page_t m)
1818 {
1819         struct tte *tp;
1820         vm_offset_t va;
1821         vm_paddr_t pa;
1822
1823         KASSERT((m->flags & PG_FICTITIOUS) == 0,
1824             ("pmap_zero_page_idle: fake page"));
1825         PMAP_STATS_INC(pmap_nzero_page_idle);
1826         pa = VM_PAGE_TO_PHYS(m);
1827         if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1828                 PMAP_STATS_INC(pmap_nzero_page_idle_c);
1829                 va = TLB_PHYS_TO_DIRECT(pa);
1830                 cpu_block_zero((void *)va, PAGE_SIZE);
1831         } else if (m->md.color == -1) {
1832                 PMAP_STATS_INC(pmap_nzero_page_idle_nc);
1833                 aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1834         } else {
1835                 PMAP_STATS_INC(pmap_nzero_page_idle_oc);
1836                 va = pmap_idle_map + (m->md.color * PAGE_SIZE);
1837                 tp = tsb_kvtotte(va);
1838                 tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1839                 tp->tte_vpn = TV_VPN(va, TS_8K);
1840                 cpu_block_zero((void *)va, PAGE_SIZE);
1841                 tlb_page_demap(kernel_pmap, va);
1842         }
1843 }
1844
1845 void
1846 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1847 {
1848         vm_offset_t vdst;
1849         vm_offset_t vsrc;
1850         vm_paddr_t pdst;
1851         vm_paddr_t psrc;
1852         struct tte *tp;
1853
1854         KASSERT((mdst->flags & PG_FICTITIOUS) == 0,
1855             ("pmap_copy_page: fake dst page"));
1856         KASSERT((msrc->flags & PG_FICTITIOUS) == 0,
1857             ("pmap_copy_page: fake src page"));
1858         PMAP_STATS_INC(pmap_ncopy_page);
1859         pdst = VM_PAGE_TO_PHYS(mdst);
1860         psrc = VM_PAGE_TO_PHYS(msrc);
1861         if (dcache_color_ignore != 0 ||
1862             (msrc->md.color == DCACHE_COLOR(psrc) &&
1863             mdst->md.color == DCACHE_COLOR(pdst))) {
1864                 PMAP_STATS_INC(pmap_ncopy_page_c);
1865                 vdst = TLB_PHYS_TO_DIRECT(pdst);
1866                 vsrc = TLB_PHYS_TO_DIRECT(psrc);
1867                 cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1868         } else if (msrc->md.color == -1 && mdst->md.color == -1) {
1869                 PMAP_STATS_INC(pmap_ncopy_page_nc);
1870                 ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
1871         } else if (msrc->md.color == -1) {
1872                 if (mdst->md.color == DCACHE_COLOR(pdst)) {
1873                         PMAP_STATS_INC(pmap_ncopy_page_dc);
1874                         vdst = TLB_PHYS_TO_DIRECT(pdst);
1875                         ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1876                             PAGE_SIZE);
1877                 } else {
1878                         PMAP_STATS_INC(pmap_ncopy_page_doc);
1879                         PMAP_LOCK(kernel_pmap);
1880                         vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1881                         tp = tsb_kvtotte(vdst);
1882                         tp->tte_data =
1883                             TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1884                         tp->tte_vpn = TV_VPN(vdst, TS_8K);
1885                         ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1886                             PAGE_SIZE);
1887                         tlb_page_demap(kernel_pmap, vdst);
1888                         PMAP_UNLOCK(kernel_pmap);
1889                 }
1890         } else if (mdst->md.color == -1) {
1891                 if (msrc->md.color == DCACHE_COLOR(psrc)) {
1892                         PMAP_STATS_INC(pmap_ncopy_page_sc);
1893                         vsrc = TLB_PHYS_TO_DIRECT(psrc);
1894                         ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1895                             PAGE_SIZE);
1896                 } else {
1897                         PMAP_STATS_INC(pmap_ncopy_page_soc);
1898                         PMAP_LOCK(kernel_pmap);
1899                         vsrc = pmap_temp_map_1 + (msrc->md.color * PAGE_SIZE);
1900                         tp = tsb_kvtotte(vsrc);
1901                         tp->tte_data =
1902                             TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1903                         tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1904                         ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1905                             PAGE_SIZE);
1906                         tlb_page_demap(kernel_pmap, vsrc);
1907                         PMAP_UNLOCK(kernel_pmap);
1908                 }
1909         } else {
1910                 PMAP_STATS_INC(pmap_ncopy_page_oc);
1911                 PMAP_LOCK(kernel_pmap);
1912                 vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1913                 tp = tsb_kvtotte(vdst);
1914                 tp->tte_data =
1915                     TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1916                 tp->tte_vpn = TV_VPN(vdst, TS_8K);
1917                 vsrc = pmap_temp_map_2 + (msrc->md.color * PAGE_SIZE);
1918                 tp = tsb_kvtotte(vsrc);
1919                 tp->tte_data =
1920                     TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1921                 tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1922                 cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1923                 tlb_page_demap(kernel_pmap, vdst);
1924                 tlb_page_demap(kernel_pmap, vsrc);
1925                 PMAP_UNLOCK(kernel_pmap);
1926         }
1927 }
1928
1929 /*
1930  * Returns true if the pmap's pv is one of the first
1931  * 16 pvs linked to from this page.  This count may
1932  * be changed upwards or downwards in the future; it
1933  * is only necessary that true be returned for a small
1934  * subset of pmaps for proper page aging.
1935  */
1936 boolean_t
1937 pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1938 {
1939         struct tte *tp;
1940         int loops;
1941         boolean_t rv;
1942
1943         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1944             ("pmap_page_exists_quick: page %p is not managed", m));
1945         loops = 0;
1946         rv = FALSE;
1947         vm_page_lock_queues();
1948         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1949                 if ((tp->tte_data & TD_PV) == 0)
1950                         continue;
1951                 if (TTE_GET_PMAP(tp) == pm) {
1952                         rv = TRUE;
1953                         break;
1954                 }
1955                 if (++loops >= 16)
1956                         break;
1957         }
1958         vm_page_unlock_queues();
1959         return (rv);
1960 }
1961
1962 /*
1963  * Return the number of managed mappings to the given physical page
1964  * that are wired.
1965  */
1966 int
1967 pmap_page_wired_mappings(vm_page_t m)
1968 {
1969         struct tte *tp;
1970         int count;
1971
1972         count = 0;
1973         if ((m->oflags & VPO_UNMANAGED) != 0)
1974                 return (count);
1975         vm_page_lock_queues();
1976         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
1977                 if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED))
1978                         count++;
1979         vm_page_unlock_queues();
1980         return (count);
1981 }
1982
1983 /*
1984  * Remove all pages from specified address space, this aids process exit
1985  * speeds.  This is much faster than pmap_remove in the case of running down
1986  * an entire address space.  Only works for the current pmap.
1987  */
1988 void
1989 pmap_remove_pages(pmap_t pm)
1990 {
1991
1992 }
1993
1994 /*
1995  * Returns TRUE if the given page has a managed mapping.
1996  */
1997 boolean_t
1998 pmap_page_is_mapped(vm_page_t m)
1999 {
2000         struct tte *tp;
2001         boolean_t rv;
2002
2003         rv = FALSE;
2004         if ((m->oflags & VPO_UNMANAGED) != 0)
2005                 return (rv);
2006         vm_page_lock_queues();
2007         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
2008                 if ((tp->tte_data & TD_PV) != 0) {
2009                         rv = TRUE;
2010                         break;
2011                 }
2012         vm_page_unlock_queues();
2013         return (rv);
2014 }
2015
2016 /*
2017  * Return a count of reference bits for a page, clearing those bits.
2018  * It is not necessary for every reference bit to be cleared, but it
2019  * is necessary that 0 only be returned when there are truly no
2020  * reference bits set.
2021  *
2022  * XXX: The exact number of bits to check and clear is a matter that
2023  * should be tested and standardized at some point in the future for
2024  * optimal aging of shared pages.
2025  */
2026 int
2027 pmap_ts_referenced(vm_page_t m)
2028 {
2029         struct tte *tpf;
2030         struct tte *tpn;
2031         struct tte *tp;
2032         u_long data;
2033         int count;
2034
2035         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2036             ("pmap_ts_referenced: page %p is not managed", m));
2037         count = 0;
2038         vm_page_lock_queues();
2039         if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) {
2040                 tpf = tp;
2041                 do {
2042                         tpn = TAILQ_NEXT(tp, tte_link);
2043                         TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
2044                         TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
2045                         if ((tp->tte_data & TD_PV) == 0)
2046                                 continue;
2047                         data = atomic_clear_long(&tp->tte_data, TD_REF);
2048                         if ((data & TD_REF) != 0 && ++count > 4)
2049                                 break;
2050                 } while ((tp = tpn) != NULL && tp != tpf);
2051         }
2052         vm_page_unlock_queues();
2053         return (count);
2054 }
2055
2056 boolean_t
2057 pmap_is_modified(vm_page_t m)
2058 {
2059         struct tte *tp;
2060         boolean_t rv;
2061
2062         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2063             ("pmap_is_modified: page %p is not managed", m));
2064         rv = FALSE;
2065
2066         /*
2067          * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
2068          * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2069          * is clear, no TTEs can have TD_W set.
2070          */
2071         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2072         if ((m->oflags & VPO_BUSY) == 0 &&
2073             (m->aflags & PGA_WRITEABLE) == 0)
2074                 return (rv);
2075         vm_page_lock_queues();
2076         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2077                 if ((tp->tte_data & TD_PV) == 0)
2078                         continue;
2079                 if ((tp->tte_data & TD_W) != 0) {
2080                         rv = TRUE;
2081                         break;
2082                 }
2083         }
2084         vm_page_unlock_queues();
2085         return (rv);
2086 }
2087
2088 /*
2089  *      pmap_is_prefaultable:
2090  *
2091  *      Return whether or not the specified virtual address is elgible
2092  *      for prefault.
2093  */
2094 boolean_t
2095 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2096 {
2097         boolean_t rv;
2098
2099         PMAP_LOCK(pmap);
2100         rv = tsb_tte_lookup(pmap, addr) == NULL;
2101         PMAP_UNLOCK(pmap);
2102         return (rv);
2103 }
2104
2105 /*
2106  * Return whether or not the specified physical page was referenced
2107  * in any physical maps.
2108  */
2109 boolean_t
2110 pmap_is_referenced(vm_page_t m)
2111 {
2112         struct tte *tp;
2113         boolean_t rv;
2114
2115         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2116             ("pmap_is_referenced: page %p is not managed", m));
2117         rv = FALSE;
2118         vm_page_lock_queues();
2119         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2120                 if ((tp->tte_data & TD_PV) == 0)
2121                         continue;
2122                 if ((tp->tte_data & TD_REF) != 0) {
2123                         rv = TRUE;
2124                         break;
2125                 }
2126         }
2127         vm_page_unlock_queues();
2128         return (rv);
2129 }
2130
2131 void
2132 pmap_clear_modify(vm_page_t m)
2133 {
2134         struct tte *tp;
2135         u_long data;
2136
2137         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2138             ("pmap_clear_modify: page %p is not managed", m));
2139         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2140         KASSERT((m->oflags & VPO_BUSY) == 0,
2141             ("pmap_clear_modify: page %p is busy", m));
2142
2143         /*
2144          * If the page is not PGA_WRITEABLE, then no TTEs can have TD_W set.
2145          * If the object containing the page is locked and the page is not
2146          * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
2147          */
2148         if ((m->aflags & PGA_WRITEABLE) == 0)
2149                 return;
2150         vm_page_lock_queues();
2151         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2152                 if ((tp->tte_data & TD_PV) == 0)
2153                         continue;
2154                 data = atomic_clear_long(&tp->tte_data, TD_W);
2155                 if ((data & TD_W) != 0)
2156                         tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2157         }
2158         vm_page_unlock_queues();
2159 }
2160
2161 void
2162 pmap_clear_reference(vm_page_t m)
2163 {
2164         struct tte *tp;
2165         u_long data;
2166
2167         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2168             ("pmap_clear_reference: page %p is not managed", m));
2169         vm_page_lock_queues();
2170         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2171                 if ((tp->tte_data & TD_PV) == 0)
2172                         continue;
2173                 data = atomic_clear_long(&tp->tte_data, TD_REF);
2174                 if ((data & TD_REF) != 0)
2175                         tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2176         }
2177         vm_page_unlock_queues();
2178 }
2179
2180 void
2181 pmap_remove_write(vm_page_t m)
2182 {
2183         struct tte *tp;
2184         u_long data;
2185
2186         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2187             ("pmap_remove_write: page %p is not managed", m));
2188
2189         /*
2190          * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
2191          * another thread while the object is locked.  Thus, if PGA_WRITEABLE
2192          * is clear, no page table entries need updating.
2193          */
2194         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2195         if ((m->oflags & VPO_BUSY) == 0 &&
2196             (m->aflags & PGA_WRITEABLE) == 0)
2197                 return;
2198         vm_page_lock_queues();
2199         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2200                 if ((tp->tte_data & TD_PV) == 0)
2201                         continue;
2202                 data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
2203                 if ((data & TD_W) != 0) {
2204                         vm_page_dirty(m);
2205                         tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2206                 }
2207         }
2208         vm_page_aflag_clear(m, PGA_WRITEABLE);
2209         vm_page_unlock_queues();
2210 }
2211
2212 int
2213 pmap_mincore(pmap_t pm, vm_offset_t addr, vm_paddr_t *locked_pa)
2214 {
2215
2216         /* TODO; */
2217         return (0);
2218 }
2219
2220 /*
2221  * Activate a user pmap.  The pmap must be activated before its address space
2222  * can be accessed in any way.
2223  */
2224 void
2225 pmap_activate(struct thread *td)
2226 {
2227         struct vmspace *vm;
2228         struct pmap *pm;
2229         int context;
2230
2231         critical_enter();
2232         vm = td->td_proc->p_vmspace;
2233         pm = vmspace_pmap(vm);
2234
2235         context = PCPU_GET(tlb_ctx);
2236         if (context == PCPU_GET(tlb_ctx_max)) {
2237                 tlb_flush_user();
2238                 context = PCPU_GET(tlb_ctx_min);
2239         }
2240         PCPU_SET(tlb_ctx, context + 1);
2241
2242         mtx_lock_spin(&sched_lock);
2243         pm->pm_context[curcpu] = context;
2244         CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
2245         PCPU_SET(pmap, pm);
2246         mtx_unlock_spin(&sched_lock);
2247
2248         stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
2249         stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb);
2250         stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) &
2251             TLB_CXR_PGSZ_MASK) | context);
2252         flush(KERNBASE);
2253         critical_exit();
2254 }
2255
2256 void
2257 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2258 {
2259
2260 }
2261
2262 /*
2263  * Increase the starting virtual address of the given mapping if a
2264  * different alignment might result in more superpage mappings.
2265  */
2266 void
2267 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2268     vm_offset_t *addr, vm_size_t size)
2269 {
2270
2271 }