]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/sparc64/sparc64/pmap.c
MFC r362623:
[FreeBSD/stable/8.git] / sys / sparc64 / sparc64 / pmap.c
1 /*-
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  * Copyright (c) 1994 John S. Dyson
5  * All rights reserved.
6  * Copyright (c) 1994 David Greenman
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * the Systems Programming Group of the University of Utah Computer
11  * Science Department and William Jolitz of UUNET Technologies Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
38  */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 /*
44  * Manages physical address maps.
45  *
46  * In addition to hardware address maps, this module is called upon to
47  * provide software-use-only maps which may or may not be stored in the
48  * same form as hardware maps.  These pseudo-maps are used to store
49  * intermediate results from copy operations to and from address spaces.
50  *
51  * Since the information managed by this module is also stored by the
52  * logical address mapping module, this module may throw away valid virtual
53  * to physical mappings at almost any time.  However, invalidations of
54  * mappings must be done as requested.
55  *
56  * In order to cope with hardware architectures which make virtual to
57  * physical map invalidates expensive, this module may delay invalidate
58  * reduced protection operations until such time as they are actually
59  * necessary.  This module is given full information as to which processors
60  * are currently using which maps, and to when physical maps must be made
61  * correct.
62  */
63
64 #include "opt_kstack_pages.h"
65 #include "opt_pmap.h"
66
67 #include <sys/param.h>
68 #include <sys/kernel.h>
69 #include <sys/ktr.h>
70 #include <sys/lock.h>
71 #include <sys/msgbuf.h>
72 #include <sys/mutex.h>
73 #include <sys/proc.h>
74 #include <sys/smp.h>
75 #include <sys/sysctl.h>
76 #include <sys/systm.h>
77 #include <sys/vmmeter.h>
78
79 #include <dev/ofw/openfirm.h>
80
81 #include <vm/vm.h>
82 #include <vm/vm_param.h>
83 #include <vm/vm_kern.h>
84 #include <vm/vm_page.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_extern.h>
88 #include <vm/vm_pageout.h>
89 #include <vm/vm_pager.h>
90
91 #include <machine/cache.h>
92 #include <machine/frame.h>
93 #include <machine/instr.h>
94 #include <machine/md_var.h>
95 #include <machine/metadata.h>
96 #include <machine/ofw_mem.h>
97 #include <machine/smp.h>
98 #include <machine/tlb.h>
99 #include <machine/tte.h>
100 #include <machine/tsb.h>
101 #include <machine/ver.h>
102
103 /* XXX */
104 #include "opt_sched.h"
105 #ifndef SCHED_4BSD
106 #error "sparc64 only works with SCHED_4BSD which uses a global scheduler lock."
107 #endif
108 extern struct mtx sched_lock;
109
110 /*
111  * Virtual address of message buffer
112  */
113 struct msgbuf *msgbufp;
114
115 /*
116  * Map of physical memory reagions
117  */
118 vm_paddr_t phys_avail[128];
119 static struct ofw_mem_region mra[128];
120 struct ofw_mem_region sparc64_memreg[128];
121 int sparc64_nmemreg;
122 static struct ofw_map translations[128];
123 static int translations_size;
124
125 static vm_offset_t pmap_idle_map;
126 static vm_offset_t pmap_temp_map_1;
127 static vm_offset_t pmap_temp_map_2;
128
129 /*
130  * First and last available kernel virtual addresses
131  */
132 vm_offset_t virtual_avail;
133 vm_offset_t virtual_end;
134 vm_offset_t kernel_vm_end;
135
136 vm_offset_t vm_max_kernel_address;
137
138 /*
139  * Kernel pmap
140  */
141 struct pmap kernel_pmap_store;
142
143 /*
144  * Allocate physical memory for use in pmap_bootstrap.
145  */
146 static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
147
148 static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
149 static void pmap_cache_remove(vm_page_t m, vm_offset_t va);
150 static int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2,
151     struct tte *tp, vm_offset_t va);
152
153 /*
154  * Map the given physical page at the specified virtual address in the
155  * target pmap with the protection requested.  If specified the page
156  * will be wired down.
157  *
158  * The page queues and pmap must be locked.
159  */
160 static void pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m,
161     vm_prot_t prot, boolean_t wired);
162
163 extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[];
164 extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[];
165 extern int tl1_dmmu_miss_patch_asi_1[];
166 extern int tl1_dmmu_miss_patch_quad_ldd_1[];
167 extern int tl1_dmmu_miss_patch_tsb_1[];
168 extern int tl1_dmmu_miss_patch_tsb_2[];
169 extern int tl1_dmmu_miss_patch_tsb_mask_1[];
170 extern int tl1_dmmu_miss_patch_tsb_mask_2[];
171 extern int tl1_dmmu_prot_patch_asi_1[];
172 extern int tl1_dmmu_prot_patch_quad_ldd_1[];
173 extern int tl1_dmmu_prot_patch_tsb_1[];
174 extern int tl1_dmmu_prot_patch_tsb_2[];
175 extern int tl1_dmmu_prot_patch_tsb_mask_1[];
176 extern int tl1_dmmu_prot_patch_tsb_mask_2[];
177 extern int tl1_immu_miss_patch_asi_1[];
178 extern int tl1_immu_miss_patch_quad_ldd_1[];
179 extern int tl1_immu_miss_patch_tsb_1[];
180 extern int tl1_immu_miss_patch_tsb_2[];
181 extern int tl1_immu_miss_patch_tsb_mask_1[];
182 extern int tl1_immu_miss_patch_tsb_mask_2[];
183
184 /*
185  * If user pmap is processed with pmap_remove and with pmap_remove and the
186  * resident count drops to 0, there are no more pages to remove, so we
187  * need not continue.
188  */
189 #define PMAP_REMOVE_DONE(pm) \
190         ((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
191
192 /*
193  * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
194  * and pmap_protect() instead of trying each virtual address.
195  */
196 #define PMAP_TSB_THRESH ((TSB_SIZE / 2) * PAGE_SIZE)
197
198 SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "");
199
200 PMAP_STATS_VAR(pmap_nenter);
201 PMAP_STATS_VAR(pmap_nenter_update);
202 PMAP_STATS_VAR(pmap_nenter_replace);
203 PMAP_STATS_VAR(pmap_nenter_new);
204 PMAP_STATS_VAR(pmap_nkenter);
205 PMAP_STATS_VAR(pmap_nkenter_oc);
206 PMAP_STATS_VAR(pmap_nkenter_stupid);
207 PMAP_STATS_VAR(pmap_nkremove);
208 PMAP_STATS_VAR(pmap_nqenter);
209 PMAP_STATS_VAR(pmap_nqremove);
210 PMAP_STATS_VAR(pmap_ncache_enter);
211 PMAP_STATS_VAR(pmap_ncache_enter_c);
212 PMAP_STATS_VAR(pmap_ncache_enter_oc);
213 PMAP_STATS_VAR(pmap_ncache_enter_cc);
214 PMAP_STATS_VAR(pmap_ncache_enter_coc);
215 PMAP_STATS_VAR(pmap_ncache_enter_nc);
216 PMAP_STATS_VAR(pmap_ncache_enter_cnc);
217 PMAP_STATS_VAR(pmap_ncache_remove);
218 PMAP_STATS_VAR(pmap_ncache_remove_c);
219 PMAP_STATS_VAR(pmap_ncache_remove_oc);
220 PMAP_STATS_VAR(pmap_ncache_remove_cc);
221 PMAP_STATS_VAR(pmap_ncache_remove_coc);
222 PMAP_STATS_VAR(pmap_ncache_remove_nc);
223 PMAP_STATS_VAR(pmap_nzero_page);
224 PMAP_STATS_VAR(pmap_nzero_page_c);
225 PMAP_STATS_VAR(pmap_nzero_page_oc);
226 PMAP_STATS_VAR(pmap_nzero_page_nc);
227 PMAP_STATS_VAR(pmap_nzero_page_area);
228 PMAP_STATS_VAR(pmap_nzero_page_area_c);
229 PMAP_STATS_VAR(pmap_nzero_page_area_oc);
230 PMAP_STATS_VAR(pmap_nzero_page_area_nc);
231 PMAP_STATS_VAR(pmap_nzero_page_idle);
232 PMAP_STATS_VAR(pmap_nzero_page_idle_c);
233 PMAP_STATS_VAR(pmap_nzero_page_idle_oc);
234 PMAP_STATS_VAR(pmap_nzero_page_idle_nc);
235 PMAP_STATS_VAR(pmap_ncopy_page);
236 PMAP_STATS_VAR(pmap_ncopy_page_c);
237 PMAP_STATS_VAR(pmap_ncopy_page_oc);
238 PMAP_STATS_VAR(pmap_ncopy_page_nc);
239 PMAP_STATS_VAR(pmap_ncopy_page_dc);
240 PMAP_STATS_VAR(pmap_ncopy_page_doc);
241 PMAP_STATS_VAR(pmap_ncopy_page_sc);
242 PMAP_STATS_VAR(pmap_ncopy_page_soc);
243
244 PMAP_STATS_VAR(pmap_nnew_thread);
245 PMAP_STATS_VAR(pmap_nnew_thread_oc);
246
247 static inline u_long dtlb_get_data(u_int tlb, u_int slot);
248
249 /*
250  * Quick sort callout for comparing memory regions
251  */
252 static int mr_cmp(const void *a, const void *b);
253 static int om_cmp(const void *a, const void *b);
254
255 static int
256 mr_cmp(const void *a, const void *b)
257 {
258         const struct ofw_mem_region *mra;
259         const struct ofw_mem_region *mrb;
260
261         mra = a;
262         mrb = b;
263         if (mra->mr_start < mrb->mr_start)
264                 return (-1);
265         else if (mra->mr_start > mrb->mr_start)
266                 return (1);
267         else
268                 return (0);
269 }
270
271 static int
272 om_cmp(const void *a, const void *b)
273 {
274         const struct ofw_map *oma;
275         const struct ofw_map *omb;
276
277         oma = a;
278         omb = b;
279         if (oma->om_start < omb->om_start)
280                 return (-1);
281         else if (oma->om_start > omb->om_start)
282                 return (1);
283         else
284                 return (0);
285 }
286
287 static inline u_long
288 dtlb_get_data(u_int tlb, u_int slot)
289 {
290         u_long data;
291         register_t s;
292
293         slot = TLB_DAR_SLOT(tlb, slot);
294         /*
295          * We read ASI_DTLB_DATA_ACCESS_REG twice back-to-back in order to
296          * work around errata of USIII and beyond.
297          */
298         s = intr_disable();
299         (void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
300         data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
301         intr_restore(s);
302         return (data);
303 }
304
305 /*
306  * Bootstrap the system enough to run with virtual memory.
307  */
308 void
309 pmap_bootstrap(u_int cpu_impl)
310 {
311         struct pmap *pm;
312         struct tte *tp;
313         vm_offset_t off;
314         vm_offset_t va;
315         vm_paddr_t pa;
316         vm_size_t physsz;
317         vm_size_t virtsz;
318         u_long data;
319         u_long vpn;
320         phandle_t pmem;
321         phandle_t vmem;
322         u_int dtlb_slots_avail;
323         int i;
324         int j;
325         int sz;
326         uint32_t asi;
327         uint32_t colors;
328         uint32_t ldd;
329
330         /*
331          * Set the kernel context.
332          */
333         pmap_set_kctx();
334
335         colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
336
337         /*
338          * Find out what physical memory is available from the PROM and
339          * initialize the phys_avail array.  This must be done before
340          * pmap_bootstrap_alloc is called.
341          */
342         if ((pmem = OF_finddevice("/memory")) == -1)
343                 OF_panic("%s: finddevice /memory", __func__);
344         if ((sz = OF_getproplen(pmem, "available")) == -1)
345                 OF_panic("%s: getproplen /memory/available", __func__);
346         if (sizeof(phys_avail) < sz)
347                 OF_panic("%s: phys_avail too small", __func__);
348         if (sizeof(mra) < sz)
349                 OF_panic("%s: mra too small", __func__);
350         bzero(mra, sz);
351         if (OF_getprop(pmem, "available", mra, sz) == -1)
352                 OF_panic("%s: getprop /memory/available", __func__);
353         sz /= sizeof(*mra);
354         CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
355         qsort(mra, sz, sizeof (*mra), mr_cmp);
356         physsz = 0;
357         getenv_quad("hw.physmem", &physmem);
358         physmem = btoc(physmem);
359         for (i = 0, j = 0; i < sz; i++, j += 2) {
360                 CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
361                     mra[i].mr_size);
362                 if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) {
363                         if (btoc(physsz) < physmem) {
364                                 phys_avail[j] = mra[i].mr_start;
365                                 phys_avail[j + 1] = mra[i].mr_start +
366                                     (ctob(physmem) - physsz);
367                                 physsz = ctob(physmem);
368                         }
369                         break;
370                 }
371                 phys_avail[j] = mra[i].mr_start;
372                 phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
373                 physsz += mra[i].mr_size;
374         }
375         physmem = btoc(physsz);
376
377         /*
378          * Calculate the size of kernel virtual memory, and the size and mask
379          * for the kernel TSB based on the phsyical memory size but limited
380          * by the amount of dTLB slots available for locked entries if we have
381          * to lock the TSB in the TLB (given that for spitfire-class CPUs all
382          * of the dt64 slots can hold locked entries but there is no large
383          * dTLB for unlocked ones, we don't use more than half of it for the
384          * TSB).
385          * Note that for reasons unknown OpenSolaris doesn't take advantage of
386          * ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III.  However, given that no
387          * public documentation is available for these, the latter just might
388          * not support it, yet.
389          */
390         if (cpu_impl == CPU_IMPL_SPARC64V ||
391             cpu_impl >= CPU_IMPL_ULTRASPARCIIIp) {
392                 tsb_kernel_ldd_phys = 1;
393                 virtsz = roundup(5 / 3 * physsz, PAGE_SIZE_4M <<
394                     (PAGE_SHIFT - TTE_SHIFT));
395         } else {
396                 dtlb_slots_avail = 0;
397                 for (i = 0; i < dtlb_slots; i++) {
398                         data = dtlb_get_data(cpu_impl ==
399                             CPU_IMPL_ULTRASPARCIII ? TLB_DAR_T16 :
400                             TLB_DAR_T32, i);
401                         if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
402                                 dtlb_slots_avail++;
403                 }
404 #ifdef SMP
405                 dtlb_slots_avail -= PCPU_PAGES;
406 #endif
407                 if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
408                     cpu_impl < CPU_IMPL_ULTRASPARCIII)
409                         dtlb_slots_avail /= 2;
410                 virtsz = roundup(physsz, PAGE_SIZE_4M <<
411                     (PAGE_SHIFT - TTE_SHIFT));
412                 virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) <<
413                     (PAGE_SHIFT - TTE_SHIFT));
414         }
415         vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
416         tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
417         tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
418
419         /*
420          * Allocate the kernel TSB and lock it in the TLB if necessary.
421          */
422         pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
423         if (pa & PAGE_MASK_4M)
424                 OF_panic("%s: TSB unaligned", __func__);
425         tsb_kernel_phys = pa;
426         if (tsb_kernel_ldd_phys == 0) {
427                 tsb_kernel =
428                     (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
429                 pmap_map_tsb();
430                 bzero(tsb_kernel, tsb_kernel_size);
431         } else {
432                 tsb_kernel =
433                     (struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys);
434                 aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size);
435         }
436
437         /*
438          * Allocate and map the dynamic per-CPU area for the BSP.
439          */
440         pa = pmap_bootstrap_alloc(DPCPU_SIZE, colors);
441         dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pa);
442
443         /*
444          * Allocate and map the message buffer.
445          */
446         pa = pmap_bootstrap_alloc(msgbufsize, colors);
447         msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
448
449         /*
450          * Patch the TSB addresses and mask as well as the ASIs used to load
451          * it into the trap table.
452          */
453
454 #define LDDA_R_I_R(rd, imm_asi, rs1, rs2)                               \
455         (EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) |     \
456             EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) |   \
457             EIF_F3_RS2(rs2))
458 #define OR_R_I_R(rd, imm13, rs1)                                        \
459         (EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) |       \
460             EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
461 #define SETHI(rd, imm22)                                                \
462         (EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) |   \
463             EIF_IMM((imm22) >> 10, 22))
464 #define WR_R_I(rd, imm13, rs1)                                          \
465         (EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) |       \
466             EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
467
468 #define PATCH_ASI(addr, asi) do {                                       \
469         if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0,                   \
470             IF_F3_RS1(addr[0])))                                        \
471                 OF_panic("%s: patched instructions have changed",       \
472                     __func__);                                          \
473         addr[0] |= EIF_IMM((asi), 13);                                  \
474         flush(addr);                                                    \
475 } while (0)
476
477 #define PATCH_LDD(addr, asi) do {                                       \
478         if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0,               \
479             IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0])))                    \
480                 OF_panic("%s: patched instructions have changed",       \
481                     __func__);                                          \
482         addr[0] |= EIF_F3_IMM_ASI(asi);                                 \
483         flush(addr);                                                    \
484 } while (0)
485
486 #define PATCH_TSB(addr, val) do {                                       \
487         if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||                 \
488             addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,                 \
489             IF_F3_RS1(addr[1])) ||                                      \
490             addr[3] != SETHI(IF_F2_RD(addr[3]), 0x0))                   \
491                 OF_panic("%s: patched instructions have changed",       \
492                     __func__);                                          \
493         addr[0] |= EIF_IMM((val) >> 42, 22);                            \
494         addr[1] |= EIF_IMM((val) >> 32, 10);                            \
495         addr[3] |= EIF_IMM((val) >> 10, 22);                            \
496         flush(addr);                                                    \
497         flush(addr + 1);                                                \
498         flush(addr + 3);                                                \
499 } while (0)
500
501 #define PATCH_TSB_MASK(addr, val) do {                                  \
502         if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||                 \
503             addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,                 \
504             IF_F3_RS1(addr[1])))                                        \
505                 OF_panic("%s: patched instructions have changed",       \
506                     __func__);                                          \
507         addr[0] |= EIF_IMM((val) >> 10, 22);                            \
508         addr[1] |= EIF_IMM((val), 10);                                  \
509         flush(addr);                                                    \
510         flush(addr + 1);                                                \
511 } while (0)
512
513         if (tsb_kernel_ldd_phys == 0) {
514                 asi = ASI_N;
515                 ldd = ASI_NUCLEUS_QUAD_LDD;
516                 off = (vm_offset_t)tsb_kernel;
517         } else {
518                 asi = ASI_PHYS_USE_EC;
519                 ldd = ASI_ATOMIC_QUAD_LDD_PHYS;
520                 off = (vm_offset_t)tsb_kernel_phys;
521         }
522         PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys);
523         PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1,
524             tsb_kernel_phys + tsb_kernel_size - 1);
525         PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi);
526         PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd);
527         PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off);
528         PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off);
529         PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask);
530         PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask);
531         PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi);
532         PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd);
533         PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off);
534         PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off);
535         PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask);
536         PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask);
537         PATCH_ASI(tl1_immu_miss_patch_asi_1, asi);
538         PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd);
539         PATCH_TSB(tl1_immu_miss_patch_tsb_1, off);
540         PATCH_TSB(tl1_immu_miss_patch_tsb_2, off);
541         PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask);
542         PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask);
543
544         /*
545          * Enter fake 8k pages for the 4MB kernel pages, so that
546          * pmap_kextract() will work for them.
547          */
548         for (i = 0; i < kernel_tlb_slots; i++) {
549                 pa = kernel_tlbs[i].te_pa;
550                 va = kernel_tlbs[i].te_va;
551                 for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
552                         tp = tsb_kvtotte(va + off);
553                         vpn = TV_VPN(va + off, TS_8K);
554                         data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF |
555                             TD_SW | TD_CP | TD_CV | TD_P | TD_W;
556                         pmap_bootstrap_set_tte(tp, vpn, data);
557                 }
558         }
559
560         /*
561          * Set the start and end of KVA.  The kernel is loaded starting
562          * at the first available 4MB super page, so we advance to the
563          * end of the last one used for it.
564          */
565         virtual_avail = KERNBASE + kernel_tlb_slots * PAGE_SIZE_4M;
566         virtual_end = vm_max_kernel_address;
567         kernel_vm_end = vm_max_kernel_address;
568
569         /*
570          * Allocate kva space for temporary mappings.
571          */
572         pmap_idle_map = virtual_avail;
573         virtual_avail += PAGE_SIZE * colors;
574         pmap_temp_map_1 = virtual_avail;
575         virtual_avail += PAGE_SIZE * colors;
576         pmap_temp_map_2 = virtual_avail;
577         virtual_avail += PAGE_SIZE * colors;
578
579         /*
580          * Allocate a kernel stack with guard page for thread0 and map it
581          * into the kernel TSB.  We must ensure that the virtual address is
582          * colored properly for corresponding CPUs, since we're allocating
583          * from phys_avail so the memory won't have an associated vm_page_t.
584          */
585         pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, colors);
586         kstack0_phys = pa;
587         virtual_avail += roundup(KSTACK_GUARD_PAGES, colors) * PAGE_SIZE;
588         kstack0 = virtual_avail;
589         virtual_avail += roundup(KSTACK_PAGES, colors) * PAGE_SIZE;
590         if (dcache_color_ignore == 0)
591                 KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
592                     ("pmap_bootstrap: kstack0 miscolored"));
593         for (i = 0; i < KSTACK_PAGES; i++) {
594                 pa = kstack0_phys + i * PAGE_SIZE;
595                 va = kstack0 + i * PAGE_SIZE;
596                 tp = tsb_kvtotte(va);
597                 vpn = TV_VPN(va, TS_8K);
598                 data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
599                     TD_CV | TD_P | TD_W;
600                 pmap_bootstrap_set_tte(tp, vpn, data);
601         }
602
603         /*
604          * Calculate the last available physical address.
605          */
606         for (i = 0; phys_avail[i + 2] != 0; i += 2)
607                 ;
608         Maxmem = sparc64_btop(phys_avail[i + 1]);
609
610         /*
611          * Add the PROM mappings to the kernel TSB.
612          */
613         if ((vmem = OF_finddevice("/virtual-memory")) == -1)
614                 OF_panic("%s: finddevice /virtual-memory", __func__);
615         if ((sz = OF_getproplen(vmem, "translations")) == -1)
616                 OF_panic("%s: getproplen translations", __func__);
617         if (sizeof(translations) < sz)
618                 OF_panic("%s: translations too small", __func__);
619         bzero(translations, sz);
620         if (OF_getprop(vmem, "translations", translations, sz) == -1)
621                 OF_panic("%s: getprop /virtual-memory/translations",
622                     __func__);
623         sz /= sizeof(*translations);
624         translations_size = sz;
625         CTR0(KTR_PMAP, "pmap_bootstrap: translations");
626         qsort(translations, sz, sizeof (*translations), om_cmp);
627         for (i = 0; i < sz; i++) {
628                 CTR3(KTR_PMAP,
629                     "translation: start=%#lx size=%#lx tte=%#lx",
630                     translations[i].om_start, translations[i].om_size,
631                     translations[i].om_tte);
632                 if ((translations[i].om_tte & TD_V) == 0)
633                         continue;
634                 if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
635                     translations[i].om_start > VM_MAX_PROM_ADDRESS)
636                         continue;
637                 for (off = 0; off < translations[i].om_size;
638                     off += PAGE_SIZE) {
639                         va = translations[i].om_start + off;
640                         tp = tsb_kvtotte(va);
641                         vpn = TV_VPN(va, TS_8K);
642                         data = ((translations[i].om_tte &
643                             ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) |
644                             (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
645                             cpu_impl < CPU_IMPL_ULTRASPARCIII ?
646                             (TD_DIAG_SF_MASK << TD_DIAG_SF_SHIFT) :
647                             (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) |
648                             (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) +
649                             off;
650                         pmap_bootstrap_set_tte(tp, vpn, data);
651                 }
652         }
653
654         /*
655          * Get the available physical memory ranges from /memory/reg.  These
656          * are only used for kernel dumps, but it may not be wise to do PROM
657          * calls in that situation.
658          */
659         if ((sz = OF_getproplen(pmem, "reg")) == -1)
660                 OF_panic("%s: getproplen /memory/reg", __func__);
661         if (sizeof(sparc64_memreg) < sz)
662                 OF_panic("%s: sparc64_memreg too small", __func__);
663         if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
664                 OF_panic("%s: getprop /memory/reg", __func__);
665         sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
666
667         /*
668          * Initialize the kernel pmap (which is statically allocated).
669          */
670         pm = kernel_pmap;
671         PMAP_LOCK_INIT(pm);
672         for (i = 0; i < MAXCPU; i++)
673                 pm->pm_context[i] = TLB_CTX_KERNEL;
674         pm->pm_active = ~0;
675
676         /*
677          * Flush all non-locked TLB entries possibly left over by the
678          * firmware.
679          */
680         tlb_flush_nonlocked();
681 }
682
683 /*
684  * Map the 4MB kernel TSB pages.
685  */
686 void
687 pmap_map_tsb(void)
688 {
689         vm_offset_t va;
690         vm_paddr_t pa;
691         u_long data;
692         int i;
693
694         for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
695                 va = (vm_offset_t)tsb_kernel + i;
696                 pa = tsb_kernel_phys + i;
697                 data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
698                     TD_P | TD_W;
699                 stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
700                     TLB_TAR_CTX(TLB_CTX_KERNEL));
701                 stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
702         }
703 }
704
705 /*
706  * Set the secondary context to be the kernel context (needed for FP block
707  * operations in the kernel).
708  */
709 void
710 pmap_set_kctx(void)
711 {
712
713         stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) &
714             TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL);
715         flush(KERNBASE);
716 }
717
718 /*
719  * Allocate a physical page of memory directly from the phys_avail map.
720  * Can only be called from pmap_bootstrap before avail start and end are
721  * calculated.
722  */
723 static vm_paddr_t
724 pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
725 {
726         vm_paddr_t pa;
727         int i;
728
729         size = roundup(size, PAGE_SIZE * colors);
730         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
731                 if (phys_avail[i + 1] - phys_avail[i] < size)
732                         continue;
733                 pa = phys_avail[i];
734                 phys_avail[i] += size;
735                 return (pa);
736         }
737         OF_panic("%s: no suitable region found", __func__);
738 }
739
740 /*
741  * Set a TTE.  This function is intended as a helper when tsb_kernel is
742  * direct-mapped but we haven't taken over the trap table, yet, as it's the
743  * case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access
744  * the kernel TSB.
745  */
746 void
747 pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data)
748 {
749
750         if (tsb_kernel_ldd_phys == 0) {
751                 tp->tte_vpn = vpn;
752                 tp->tte_data = data;
753         } else {
754                 stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn),
755                     ASI_PHYS_USE_EC, vpn);
756                 stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data),
757                     ASI_PHYS_USE_EC, data);
758         }
759 }
760
761 /*
762  * Initialize a vm_page's machine-dependent fields.
763  */
764 void
765 pmap_page_init(vm_page_t m)
766 {
767
768         TAILQ_INIT(&m->md.tte_list);
769         m->md.color = DCACHE_COLOR(VM_PAGE_TO_PHYS(m));
770         m->md.flags = 0;
771         m->md.pmap = NULL;
772 }
773
774 /*
775  * Initialize the pmap module.
776  */
777 void
778 pmap_init(void)
779 {
780         vm_offset_t addr;
781         vm_size_t size;
782         int result;
783         int i;
784
785         for (i = 0; i < translations_size; i++) {
786                 addr = translations[i].om_start;
787                 size = translations[i].om_size;
788                 if ((translations[i].om_tte & TD_V) == 0)
789                         continue;
790                 if (addr < VM_MIN_PROM_ADDRESS || addr > VM_MAX_PROM_ADDRESS)
791                         continue;
792                 result = vm_map_find(kernel_map, NULL, 0, &addr, size,
793                     VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
794                 if (result != KERN_SUCCESS || addr != translations[i].om_start)
795                         panic("pmap_init: vm_map_find");
796         }
797 }
798
799 /*
800  * Extract the physical page address associated with the given
801  * map/virtual_address pair.
802  */
803 vm_paddr_t
804 pmap_extract(pmap_t pm, vm_offset_t va)
805 {
806         struct tte *tp;
807         vm_paddr_t pa;
808
809         if (pm == kernel_pmap)
810                 return (pmap_kextract(va));
811         PMAP_LOCK(pm);
812         tp = tsb_tte_lookup(pm, va);
813         if (tp == NULL)
814                 pa = 0;
815         else
816                 pa = TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp));
817         PMAP_UNLOCK(pm);
818         return (pa);
819 }
820
821 /*
822  * Atomically extract and hold the physical page with the given
823  * pmap and virtual address pair if that mapping permits the given
824  * protection.
825  */
826 vm_page_t
827 pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot)
828 {
829         struct tte *tp;
830         vm_page_t m;
831
832         m = NULL;
833         vm_page_lock_queues();
834         if (pm == kernel_pmap) {
835                 if (va >= VM_MIN_DIRECT_ADDRESS) {
836                         tp = NULL;
837                         m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
838                         vm_page_hold(m);
839                 } else {
840                         tp = tsb_kvtotte(va);
841                         if ((tp->tte_data & TD_V) == 0)
842                                 tp = NULL;
843                 }
844         } else {
845                 PMAP_LOCK(pm);
846                 tp = tsb_tte_lookup(pm, va);
847         }
848         if (tp != NULL && ((tp->tte_data & TD_SW) ||
849             (prot & VM_PROT_WRITE) == 0)) {
850                 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
851                 vm_page_hold(m);
852         }
853         vm_page_unlock_queues();
854         if (pm != kernel_pmap)
855                 PMAP_UNLOCK(pm);
856         return (m);
857 }
858
859 /*
860  * Extract the physical page address associated with the given kernel virtual
861  * address.
862  */
863 vm_paddr_t
864 pmap_kextract(vm_offset_t va)
865 {
866         struct tte *tp;
867
868         if (va >= VM_MIN_DIRECT_ADDRESS)
869                 return (TLB_DIRECT_TO_PHYS(va));
870         tp = tsb_kvtotte(va);
871         if ((tp->tte_data & TD_V) == 0)
872                 return (0);
873         return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
874 }
875
876 int
877 pmap_cache_enter(vm_page_t m, vm_offset_t va)
878 {
879         struct tte *tp;
880         int color;
881
882         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
883         KASSERT((m->flags & PG_FICTITIOUS) == 0,
884             ("pmap_cache_enter: fake page"));
885         PMAP_STATS_INC(pmap_ncache_enter);
886
887         if (dcache_color_ignore != 0)
888                 return (1);
889
890         /*
891          * Find the color for this virtual address and note the added mapping.
892          */
893         color = DCACHE_COLOR(va);
894         m->md.colors[color]++;
895
896         /*
897          * If all existing mappings have the same color, the mapping is
898          * cacheable.
899          */
900         if (m->md.color == color) {
901                 KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] == 0,
902                     ("pmap_cache_enter: cacheable, mappings of other color"));
903                 if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
904                         PMAP_STATS_INC(pmap_ncache_enter_c);
905                 else
906                         PMAP_STATS_INC(pmap_ncache_enter_oc);
907                 return (1);
908         }
909
910         /*
911          * If there are no mappings of the other color, and the page still has
912          * the wrong color, this must be a new mapping.  Change the color to
913          * match the new mapping, which is cacheable.  We must flush the page
914          * from the cache now.
915          */
916         if (m->md.colors[DCACHE_OTHER_COLOR(color)] == 0) {
917                 KASSERT(m->md.colors[color] == 1,
918                     ("pmap_cache_enter: changing color, not new mapping"));
919                 dcache_page_inval(VM_PAGE_TO_PHYS(m));
920                 m->md.color = color;
921                 if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
922                         PMAP_STATS_INC(pmap_ncache_enter_cc);
923                 else
924                         PMAP_STATS_INC(pmap_ncache_enter_coc);
925                 return (1);
926         }
927
928         /*
929          * If the mapping is already non-cacheable, just return.
930          */
931         if (m->md.color == -1) {
932                 PMAP_STATS_INC(pmap_ncache_enter_nc);
933                 return (0);
934         }
935
936         PMAP_STATS_INC(pmap_ncache_enter_cnc);
937
938         /*
939          * Mark all mappings as uncacheable, flush any lines with the other
940          * color out of the dcache, and set the color to none (-1).
941          */
942         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
943                 atomic_clear_long(&tp->tte_data, TD_CV);
944                 tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
945         }
946         dcache_page_inval(VM_PAGE_TO_PHYS(m));
947         m->md.color = -1;
948         return (0);
949 }
950
951 static void
952 pmap_cache_remove(vm_page_t m, vm_offset_t va)
953 {
954         struct tte *tp;
955         int color;
956
957         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
958         CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
959             m->md.colors[DCACHE_COLOR(va)]);
960         KASSERT((m->flags & PG_FICTITIOUS) == 0,
961             ("pmap_cache_remove: fake page"));
962         PMAP_STATS_INC(pmap_ncache_remove);
963
964         if (dcache_color_ignore != 0)
965                 return;
966
967         KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
968             ("pmap_cache_remove: no mappings %d <= 0",
969             m->md.colors[DCACHE_COLOR(va)]));
970
971         /*
972          * Find the color for this virtual address and note the removal of
973          * the mapping.
974          */
975         color = DCACHE_COLOR(va);
976         m->md.colors[color]--;
977
978         /*
979          * If the page is cacheable, just return and keep the same color, even
980          * if there are no longer any mappings.
981          */
982         if (m->md.color != -1) {
983                 if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
984                         PMAP_STATS_INC(pmap_ncache_remove_c);
985                 else
986                         PMAP_STATS_INC(pmap_ncache_remove_oc);
987                 return;
988         }
989
990         KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] != 0,
991             ("pmap_cache_remove: uncacheable, no mappings of other color"));
992
993         /*
994          * If the page is not cacheable (color is -1), and the number of
995          * mappings for this color is not zero, just return.  There are
996          * mappings of the other color still, so remain non-cacheable.
997          */
998         if (m->md.colors[color] != 0) {
999                 PMAP_STATS_INC(pmap_ncache_remove_nc);
1000                 return;
1001         }
1002
1003         /*
1004          * The number of mappings for this color is now zero.  Recache the
1005          * other colored mappings, and change the page color to the other
1006          * color.  There should be no lines in the data cache for this page,
1007          * so flushing should not be needed.
1008          */
1009         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1010                 atomic_set_long(&tp->tte_data, TD_CV);
1011                 tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1012         }
1013         m->md.color = DCACHE_OTHER_COLOR(color);
1014
1015         if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
1016                 PMAP_STATS_INC(pmap_ncache_remove_cc);
1017         else
1018                 PMAP_STATS_INC(pmap_ncache_remove_coc);
1019 }
1020
1021 /*
1022  * Map a wired page into kernel virtual address space.
1023  */
1024 void
1025 pmap_kenter(vm_offset_t va, vm_page_t m)
1026 {
1027         vm_offset_t ova;
1028         struct tte *tp;
1029         vm_page_t om;
1030         u_long data;
1031
1032         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1033         PMAP_STATS_INC(pmap_nkenter);
1034         tp = tsb_kvtotte(va);
1035         CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
1036             va, VM_PAGE_TO_PHYS(m), tp, tp->tte_data);
1037         if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
1038                 CTR5(KTR_CT2,
1039         "pmap_kenter: off color va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
1040                     va, VM_PAGE_TO_PHYS(m), m->object,
1041                     m->object ? m->object->type : -1,
1042                     m->pindex);
1043                 PMAP_STATS_INC(pmap_nkenter_oc);
1044         }
1045         if ((tp->tte_data & TD_V) != 0) {
1046                 om = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1047                 ova = TTE_GET_VA(tp);
1048                 if (m == om && va == ova) {
1049                         PMAP_STATS_INC(pmap_nkenter_stupid);
1050                         return;
1051                 }
1052                 TAILQ_REMOVE(&om->md.tte_list, tp, tte_link);
1053                 pmap_cache_remove(om, ova);
1054                 if (va != ova)
1055                         tlb_page_demap(kernel_pmap, ova);
1056         }
1057         data = TD_V | TD_8K | VM_PAGE_TO_PHYS(m) | TD_REF | TD_SW | TD_CP |
1058             TD_P | TD_W;
1059         if (pmap_cache_enter(m, va) != 0)
1060                 data |= TD_CV;
1061         tp->tte_vpn = TV_VPN(va, TS_8K);
1062         tp->tte_data = data;
1063         TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
1064 }
1065
1066 /*
1067  * Map a wired page into kernel virtual address space.  This additionally
1068  * takes a flag argument which is or'ed to the TTE data.  This is used by
1069  * sparc64_bus_mem_map().
1070  * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
1071  * to flush entries that might still be in the cache, if applicable.
1072  */
1073 void
1074 pmap_kenter_flags(vm_offset_t va, vm_paddr_t pa, u_long flags)
1075 {
1076         struct tte *tp;
1077
1078         tp = tsb_kvtotte(va);
1079         CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
1080             va, pa, tp, tp->tte_data);
1081         tp->tte_vpn = TV_VPN(va, TS_8K);
1082         tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
1083 }
1084
1085 /*
1086  * Remove a wired page from kernel virtual address space.
1087  */
1088 void
1089 pmap_kremove(vm_offset_t va)
1090 {
1091         struct tte *tp;
1092         vm_page_t m;
1093
1094         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1095         PMAP_STATS_INC(pmap_nkremove);
1096         tp = tsb_kvtotte(va);
1097         CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
1098             tp->tte_data);
1099         if ((tp->tte_data & TD_V) == 0)
1100                 return;
1101         m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1102         TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1103         pmap_cache_remove(m, va);
1104         TTE_ZERO(tp);
1105 }
1106
1107 /*
1108  * Inverse of pmap_kenter_flags, used by bus_space_unmap().
1109  */
1110 void
1111 pmap_kremove_flags(vm_offset_t va)
1112 {
1113         struct tte *tp;
1114
1115         tp = tsb_kvtotte(va);
1116         CTR3(KTR_PMAP, "pmap_kremove_flags: va=%#lx tp=%p data=%#lx", va, tp,
1117             tp->tte_data);
1118         TTE_ZERO(tp);
1119 }
1120
1121 /*
1122  * Map a range of physical addresses into kernel virtual address space.
1123  *
1124  * The value passed in *virt is a suggested virtual address for the mapping.
1125  * Architectures which can support a direct-mapped physical to virtual region
1126  * can return the appropriate address within that region, leaving '*virt'
1127  * unchanged.
1128  */
1129 vm_offset_t
1130 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1131 {
1132
1133         return (TLB_PHYS_TO_DIRECT(start));
1134 }
1135
1136 /*
1137  * Map a list of wired pages into kernel virtual address space.  This is
1138  * intended for temporary mappings which do not need page modification or
1139  * references recorded.  Existing mappings in the region are overwritten.
1140  */
1141 void
1142 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
1143 {
1144         vm_offset_t va;
1145         int locked;
1146
1147         PMAP_STATS_INC(pmap_nqenter);
1148         va = sva;
1149         if (!(locked = mtx_owned(&vm_page_queue_mtx)))
1150                 vm_page_lock_queues();
1151         while (count-- > 0) {
1152                 pmap_kenter(va, *m);
1153                 va += PAGE_SIZE;
1154                 m++;
1155         }
1156         if (!locked)
1157                 vm_page_unlock_queues();
1158         tlb_range_demap(kernel_pmap, sva, va);
1159 }
1160
1161 /*
1162  * Remove page mappings from kernel virtual address space.  Intended for
1163  * temporary mappings entered by pmap_qenter.
1164  */
1165 void
1166 pmap_qremove(vm_offset_t sva, int count)
1167 {
1168         vm_offset_t va;
1169         int locked;
1170
1171         PMAP_STATS_INC(pmap_nqremove);
1172         va = sva;
1173         if (!(locked = mtx_owned(&vm_page_queue_mtx)))
1174                 vm_page_lock_queues();
1175         while (count-- > 0) {
1176                 pmap_kremove(va);
1177                 va += PAGE_SIZE;
1178         }
1179         if (!locked)
1180                 vm_page_unlock_queues();
1181         tlb_range_demap(kernel_pmap, sva, va);
1182 }
1183
1184 /*
1185  * Initialize the pmap associated with process 0.
1186  */
1187 void
1188 pmap_pinit0(pmap_t pm)
1189 {
1190         int i;
1191
1192         PMAP_LOCK_INIT(pm);
1193         for (i = 0; i < MAXCPU; i++)
1194                 pm->pm_context[i] = TLB_CTX_KERNEL;
1195         pm->pm_active = 0;
1196         pm->pm_tsb = NULL;
1197         pm->pm_tsb_obj = NULL;
1198         bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1199 }
1200
1201 /*
1202  * Initialize a preallocated and zeroed pmap structure, such as one in a
1203  * vmspace structure.
1204  */
1205 int
1206 pmap_pinit(pmap_t pm)
1207 {
1208         vm_page_t ma[TSB_PAGES];
1209         vm_page_t m;
1210         int i;
1211
1212         PMAP_LOCK_INIT(pm);
1213
1214         /*
1215          * Allocate KVA space for the TSB.
1216          */
1217         if (pm->pm_tsb == NULL) {
1218                 pm->pm_tsb = (struct tte *)kmem_alloc_nofault(kernel_map,
1219                     TSB_BSIZE);
1220                 if (pm->pm_tsb == NULL) {
1221                         PMAP_LOCK_DESTROY(pm);
1222                         return (0);
1223                 }
1224         }
1225
1226         /*
1227          * Allocate an object for it.
1228          */
1229         if (pm->pm_tsb_obj == NULL)
1230                 pm->pm_tsb_obj = vm_object_allocate(OBJT_DEFAULT, TSB_PAGES);
1231
1232         mtx_lock_spin(&sched_lock);
1233         for (i = 0; i < MAXCPU; i++)
1234                 pm->pm_context[i] = -1;
1235         pm->pm_active = 0;
1236         mtx_unlock_spin(&sched_lock);
1237
1238         VM_OBJECT_LOCK(pm->pm_tsb_obj);
1239         for (i = 0; i < TSB_PAGES; i++) {
1240                 m = vm_page_grab(pm->pm_tsb_obj, i, VM_ALLOC_NOBUSY |
1241                     VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1242                 m->valid = VM_PAGE_BITS_ALL;
1243                 m->md.pmap = pm;
1244                 ma[i] = m;
1245         }
1246         VM_OBJECT_UNLOCK(pm->pm_tsb_obj);
1247         pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1248
1249         bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1250         return (1);
1251 }
1252
1253 /*
1254  * Release any resources held by the given physical map.
1255  * Called when a pmap initialized by pmap_pinit is being released.
1256  * Should only be called if the map contains no valid mappings.
1257  */
1258 void
1259 pmap_release(pmap_t pm)
1260 {
1261         vm_object_t obj;
1262         vm_page_t m;
1263         struct pcpu *pc;
1264
1265         CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1266             pm->pm_context[curcpu], pm->pm_tsb);
1267         KASSERT(pmap_resident_count(pm) == 0,
1268             ("pmap_release: resident pages %ld != 0",
1269             pmap_resident_count(pm)));
1270
1271         /*
1272          * After the pmap was freed, it might be reallocated to a new process.
1273          * When switching, this might lead us to wrongly assume that we need
1274          * not switch contexts because old and new pmap pointer are equal.
1275          * Therefore, make sure that this pmap is not referenced by any PCPU
1276          * pointer any more.  This could happen in two cases:
1277          * - A process that referenced the pmap is currently exiting on a CPU.
1278          *   However, it is guaranteed to not switch in any more after setting
1279          *   its state to PRS_ZOMBIE.
1280          * - A process that referenced this pmap ran on a CPU, but we switched
1281          *   to a kernel thread, leaving the pmap pointer unchanged.
1282          */
1283         mtx_lock_spin(&sched_lock);
1284         SLIST_FOREACH(pc, &cpuhead, pc_allcpu)
1285                 if (pc->pc_pmap == pm)
1286                         pc->pc_pmap = NULL;
1287         mtx_unlock_spin(&sched_lock);
1288
1289         pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1290         obj = pm->pm_tsb_obj;
1291         VM_OBJECT_LOCK(obj);
1292         KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1293         while (!TAILQ_EMPTY(&obj->memq)) {
1294                 m = TAILQ_FIRST(&obj->memq);
1295                 vm_page_lock_queues();
1296                 if (vm_page_sleep_if_busy(m, FALSE, "pmaprl"))
1297                         continue;
1298                 KASSERT(m->hold_count == 0,
1299                     ("pmap_release: freeing held tsb page"));
1300                 m->md.pmap = NULL;
1301                 m->wire_count--;
1302                 atomic_subtract_int(&cnt.v_wire_count, 1);
1303                 vm_page_free_zero(m);
1304                 vm_page_unlock_queues();
1305         }
1306         VM_OBJECT_UNLOCK(obj);
1307         PMAP_LOCK_DESTROY(pm);
1308 }
1309
1310 /*
1311  * Grow the number of kernel page table entries.  Unneeded.
1312  */
1313 void
1314 pmap_growkernel(vm_offset_t addr)
1315 {
1316
1317         panic("pmap_growkernel: can't grow kernel");
1318 }
1319
1320 int
1321 pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1322     vm_offset_t va)
1323 {
1324         vm_page_t m;
1325         u_long data;
1326
1327         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1328         data = atomic_readandclear_long(&tp->tte_data);
1329         if ((data & TD_FAKE) == 0) {
1330                 m = PHYS_TO_VM_PAGE(TD_PA(data));
1331                 TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1332                 if ((data & TD_WIRED) != 0)
1333                         pm->pm_stats.wired_count--;
1334                 if ((data & TD_PV) != 0) {
1335                         if ((data & TD_W) != 0)
1336                                 vm_page_dirty(m);
1337                         if ((data & TD_REF) != 0)
1338                                 vm_page_flag_set(m, PG_REFERENCED);
1339                         if (TAILQ_EMPTY(&m->md.tte_list))
1340                                 vm_page_flag_clear(m, PG_WRITEABLE);
1341                         pm->pm_stats.resident_count--;
1342                 }
1343                 pmap_cache_remove(m, va);
1344         }
1345         TTE_ZERO(tp);
1346         if (PMAP_REMOVE_DONE(pm))
1347                 return (0);
1348         return (1);
1349 }
1350
1351 /*
1352  * Remove the given range of addresses from the specified map.
1353  */
1354 void
1355 pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1356 {
1357         struct tte *tp;
1358         vm_offset_t va;
1359
1360         CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1361             pm->pm_context[curcpu], start, end);
1362         if (PMAP_REMOVE_DONE(pm))
1363                 return;
1364         vm_page_lock_queues();
1365         PMAP_LOCK(pm);
1366         if (end - start > PMAP_TSB_THRESH) {
1367                 tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1368                 tlb_context_demap(pm);
1369         } else {
1370                 for (va = start; va < end; va += PAGE_SIZE)
1371                         if ((tp = tsb_tte_lookup(pm, va)) != NULL &&
1372                             !pmap_remove_tte(pm, NULL, tp, va))
1373                                 break;
1374                 tlb_range_demap(pm, start, end - 1);
1375         }
1376         PMAP_UNLOCK(pm);
1377         vm_page_unlock_queues();
1378 }
1379
1380 void
1381 pmap_remove_all(vm_page_t m)
1382 {
1383         struct pmap *pm;
1384         struct tte *tpn;
1385         struct tte *tp;
1386         vm_offset_t va;
1387
1388         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1389         for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
1390                 tpn = TAILQ_NEXT(tp, tte_link);
1391                 if ((tp->tte_data & TD_PV) == 0)
1392                         continue;
1393                 pm = TTE_GET_PMAP(tp);
1394                 va = TTE_GET_VA(tp);
1395                 PMAP_LOCK(pm);
1396                 if ((tp->tte_data & TD_WIRED) != 0)
1397                         pm->pm_stats.wired_count--;
1398                 if ((tp->tte_data & TD_REF) != 0)
1399                         vm_page_flag_set(m, PG_REFERENCED);
1400                 if ((tp->tte_data & TD_W) != 0)
1401                         vm_page_dirty(m);
1402                 tp->tte_data &= ~TD_V;
1403                 tlb_page_demap(pm, va);
1404                 TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1405                 pm->pm_stats.resident_count--;
1406                 pmap_cache_remove(m, va);
1407                 TTE_ZERO(tp);
1408                 PMAP_UNLOCK(pm);
1409         }
1410         vm_page_flag_clear(m, PG_WRITEABLE);
1411 }
1412
1413 static int
1414 pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1415     vm_offset_t va)
1416 {
1417         u_long data;
1418         vm_page_t m;
1419
1420         data = atomic_clear_long(&tp->tte_data, TD_REF | TD_SW | TD_W);
1421         if ((data & TD_PV) != 0) {
1422                 m = PHYS_TO_VM_PAGE(TD_PA(data));
1423                 if ((data & TD_REF) != 0)
1424                         vm_page_flag_set(m, PG_REFERENCED);
1425                 if ((data & TD_W) != 0)
1426                         vm_page_dirty(m);
1427         }
1428         return (1);
1429 }
1430
1431 /*
1432  * Set the physical protection on the specified range of this map as requested.
1433  */
1434 void
1435 pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1436 {
1437         vm_offset_t va;
1438         struct tte *tp;
1439
1440         CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1441             pm->pm_context[curcpu], sva, eva, prot);
1442
1443         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1444                 pmap_remove(pm, sva, eva);
1445                 return;
1446         }
1447
1448         if (prot & VM_PROT_WRITE)
1449                 return;
1450
1451         vm_page_lock_queues();
1452         PMAP_LOCK(pm);
1453         if (eva - sva > PMAP_TSB_THRESH) {
1454                 tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1455                 tlb_context_demap(pm);
1456         } else {
1457                 for (va = sva; va < eva; va += PAGE_SIZE)
1458                         if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1459                                 pmap_protect_tte(pm, NULL, tp, va);
1460                 tlb_range_demap(pm, sva, eva - 1);
1461         }
1462         PMAP_UNLOCK(pm);
1463         vm_page_unlock_queues();
1464 }
1465
1466 /*
1467  * Map the given physical page at the specified virtual address in the
1468  * target pmap with the protection requested.  If specified the page
1469  * will be wired down.
1470  */
1471 void
1472 pmap_enter(pmap_t pm, vm_offset_t va, vm_prot_t access, vm_page_t m,
1473     vm_prot_t prot, boolean_t wired)
1474 {
1475
1476         vm_page_lock_queues();
1477         PMAP_LOCK(pm);
1478         pmap_enter_locked(pm, va, m, prot, wired);
1479         vm_page_unlock_queues();
1480         PMAP_UNLOCK(pm);
1481 }
1482
1483 /*
1484  * Map the given physical page at the specified virtual address in the
1485  * target pmap with the protection requested.  If specified the page
1486  * will be wired down.
1487  *
1488  * The page queues and pmap must be locked.
1489  */
1490 static void
1491 pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1492     boolean_t wired)
1493 {
1494         struct tte *tp;
1495         vm_paddr_t pa;
1496         u_long data;
1497         int i;
1498
1499         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1500         PMAP_LOCK_ASSERT(pm, MA_OWNED);
1501         PMAP_STATS_INC(pmap_nenter);
1502         pa = VM_PAGE_TO_PHYS(m);
1503
1504         /*
1505          * If this is a fake page from the device_pager, but it covers actual
1506          * physical memory, convert to the real backing page.
1507          */
1508         if ((m->flags & PG_FICTITIOUS) != 0) {
1509                 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
1510                         if (pa >= phys_avail[i] && pa <= phys_avail[i + 1]) {
1511                                 m = PHYS_TO_VM_PAGE(pa);
1512                                 break;
1513                         }
1514                 }
1515         }
1516
1517         CTR6(KTR_PMAP,
1518             "pmap_enter_locked: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1519             pm->pm_context[curcpu], m, va, pa, prot, wired);
1520
1521         /*
1522          * If there is an existing mapping, and the physical address has not
1523          * changed, must be protection or wiring change.
1524          */
1525         if ((tp = tsb_tte_lookup(pm, va)) != NULL && TTE_GET_PA(tp) == pa) {
1526                 CTR0(KTR_PMAP, "pmap_enter_locked: update");
1527                 PMAP_STATS_INC(pmap_nenter_update);
1528
1529                 /*
1530                  * Wiring change, just update stats.
1531                  */
1532                 if (wired) {
1533                         if ((tp->tte_data & TD_WIRED) == 0) {
1534                                 tp->tte_data |= TD_WIRED;
1535                                 pm->pm_stats.wired_count++;
1536                         }
1537                 } else {
1538                         if ((tp->tte_data & TD_WIRED) != 0) {
1539                                 tp->tte_data &= ~TD_WIRED;
1540                                 pm->pm_stats.wired_count--;
1541                         }
1542                 }
1543
1544                 /*
1545                  * Save the old bits and clear the ones we're interested in.
1546                  */
1547                 data = tp->tte_data;
1548                 tp->tte_data &= ~(TD_EXEC | TD_SW | TD_W);
1549
1550                 /*
1551                  * If we're turning off write permissions, sense modify status.
1552                  */
1553                 if ((prot & VM_PROT_WRITE) != 0) {
1554                         tp->tte_data |= TD_SW;
1555                         if (wired)
1556                                 tp->tte_data |= TD_W;
1557                         vm_page_flag_set(m, PG_WRITEABLE);
1558                 } else if ((data & TD_W) != 0)
1559                         vm_page_dirty(m);
1560
1561                 /*
1562                  * If we're turning on execute permissions, flush the icache.
1563                  */
1564                 if ((prot & VM_PROT_EXECUTE) != 0) {
1565                         if ((data & TD_EXEC) == 0)
1566                                 icache_page_inval(pa);
1567                         tp->tte_data |= TD_EXEC;
1568                 }
1569
1570                 /*
1571                  * Delete the old mapping.
1572                  */
1573                 tlb_page_demap(pm, TTE_GET_VA(tp));
1574         } else {
1575                 /*
1576                  * If there is an existing mapping, but its for a different
1577                  * phsyical address, delete the old mapping.
1578                  */
1579                 if (tp != NULL) {
1580                         CTR0(KTR_PMAP, "pmap_enter_locked: replace");
1581                         PMAP_STATS_INC(pmap_nenter_replace);
1582                         pmap_remove_tte(pm, NULL, tp, va);
1583                         tlb_page_demap(pm, va);
1584                 } else {
1585                         CTR0(KTR_PMAP, "pmap_enter_locked: new");
1586                         PMAP_STATS_INC(pmap_nenter_new);
1587                 }
1588
1589                 /*
1590                  * Now set up the data and install the new mapping.
1591                  */
1592                 data = TD_V | TD_8K | TD_PA(pa);
1593                 if (pm == kernel_pmap)
1594                         data |= TD_P;
1595                 if ((prot & VM_PROT_WRITE) != 0) {
1596                         data |= TD_SW;
1597                         vm_page_flag_set(m, PG_WRITEABLE);
1598                 }
1599                 if (prot & VM_PROT_EXECUTE) {
1600                         data |= TD_EXEC;
1601                         icache_page_inval(pa);
1602                 }
1603
1604                 /*
1605                  * If its wired update stats.  We also don't need reference or
1606                  * modify tracking for wired mappings, so set the bits now.
1607                  */
1608                 if (wired) {
1609                         pm->pm_stats.wired_count++;
1610                         data |= TD_REF | TD_WIRED;
1611                         if ((prot & VM_PROT_WRITE) != 0)
1612                                 data |= TD_W;
1613                 }
1614
1615                 tsb_tte_enter(pm, m, va, TS_8K, data);
1616         }
1617 }
1618
1619 /*
1620  * Maps a sequence of resident pages belonging to the same object.
1621  * The sequence begins with the given page m_start.  This page is
1622  * mapped at the given virtual address start.  Each subsequent page is
1623  * mapped at a virtual address that is offset from start by the same
1624  * amount as the page is offset from m_start within the object.  The
1625  * last page in the sequence is the page with the largest offset from
1626  * m_start that can be mapped at a virtual address less than the given
1627  * virtual address end.  Not every virtual page between start and end
1628  * is mapped; only those for which a resident page exists with the
1629  * corresponding offset from m_start are mapped.
1630  */
1631 void
1632 pmap_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1633     vm_page_t m_start, vm_prot_t prot)
1634 {
1635         vm_page_t m;
1636         vm_pindex_t diff, psize;
1637
1638         psize = atop(end - start);
1639         m = m_start;
1640         PMAP_LOCK(pm);
1641         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1642                 pmap_enter_locked(pm, start + ptoa(diff), m, prot &
1643                     (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
1644                 m = TAILQ_NEXT(m, listq);
1645         }
1646         PMAP_UNLOCK(pm);
1647 }
1648
1649 void
1650 pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1651 {
1652
1653         PMAP_LOCK(pm);
1654         pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1655             FALSE);
1656         PMAP_UNLOCK(pm);
1657 }
1658
1659 void
1660 pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1661     vm_pindex_t pindex, vm_size_t size)
1662 {
1663
1664         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1665         KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1666             ("pmap_object_init_pt: non-device object"));
1667 }
1668
1669 /*
1670  * Change the wiring attribute for a map/virtual-address pair.
1671  * The mapping must already exist in the pmap.
1672  */
1673 void
1674 pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired)
1675 {
1676         struct tte *tp;
1677         u_long data;
1678
1679         PMAP_LOCK(pm);
1680         if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1681                 if (wired) {
1682                         data = atomic_set_long(&tp->tte_data, TD_WIRED);
1683                         if ((data & TD_WIRED) == 0)
1684                                 pm->pm_stats.wired_count++;
1685                 } else {
1686                         data = atomic_clear_long(&tp->tte_data, TD_WIRED);
1687                         if ((data & TD_WIRED) != 0)
1688                                 pm->pm_stats.wired_count--;
1689                 }
1690         }
1691         PMAP_UNLOCK(pm);
1692 }
1693
1694 static int
1695 pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp,
1696     vm_offset_t va)
1697 {
1698         vm_page_t m;
1699         u_long data;
1700
1701         if ((tp->tte_data & TD_FAKE) != 0)
1702                 return (1);
1703         if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1704                 data = tp->tte_data &
1705                     ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1706                 m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1707                 tsb_tte_enter(dst_pmap, m, va, TS_8K, data);
1708         }
1709         return (1);
1710 }
1711
1712 void
1713 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1714     vm_size_t len, vm_offset_t src_addr)
1715 {
1716         struct tte *tp;
1717         vm_offset_t va;
1718
1719         if (dst_addr != src_addr)
1720                 return;
1721         vm_page_lock_queues();
1722         if (dst_pmap < src_pmap) {
1723                 PMAP_LOCK(dst_pmap);
1724                 PMAP_LOCK(src_pmap);
1725         } else {
1726                 PMAP_LOCK(src_pmap);
1727                 PMAP_LOCK(dst_pmap);
1728         }
1729         if (len > PMAP_TSB_THRESH) {
1730                 tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1731                     pmap_copy_tte);
1732                 tlb_context_demap(dst_pmap);
1733         } else {
1734                 for (va = src_addr; va < src_addr + len; va += PAGE_SIZE)
1735                         if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1736                                 pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1737                 tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1738         }
1739         vm_page_unlock_queues();
1740         PMAP_UNLOCK(src_pmap);
1741         PMAP_UNLOCK(dst_pmap);
1742 }
1743
1744 void
1745 pmap_zero_page(vm_page_t m)
1746 {
1747         struct tte *tp;
1748         vm_offset_t va;
1749         vm_paddr_t pa;
1750
1751         KASSERT((m->flags & PG_FICTITIOUS) == 0,
1752             ("pmap_zero_page: fake page"));
1753         PMAP_STATS_INC(pmap_nzero_page);
1754         pa = VM_PAGE_TO_PHYS(m);
1755         if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1756                 PMAP_STATS_INC(pmap_nzero_page_c);
1757                 va = TLB_PHYS_TO_DIRECT(pa);
1758                 cpu_block_zero((void *)va, PAGE_SIZE);
1759         } else if (m->md.color == -1) {
1760                 PMAP_STATS_INC(pmap_nzero_page_nc);
1761                 aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1762         } else {
1763                 PMAP_STATS_INC(pmap_nzero_page_oc);
1764                 PMAP_LOCK(kernel_pmap);
1765                 va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1766                 tp = tsb_kvtotte(va);
1767                 tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1768                 tp->tte_vpn = TV_VPN(va, TS_8K);
1769                 cpu_block_zero((void *)va, PAGE_SIZE);
1770                 tlb_page_demap(kernel_pmap, va);
1771                 PMAP_UNLOCK(kernel_pmap);
1772         }
1773 }
1774
1775 void
1776 pmap_zero_page_area(vm_page_t m, int off, int size)
1777 {
1778         struct tte *tp;
1779         vm_offset_t va;
1780         vm_paddr_t pa;
1781
1782         KASSERT((m->flags & PG_FICTITIOUS) == 0,
1783             ("pmap_zero_page_area: fake page"));
1784         KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1785         PMAP_STATS_INC(pmap_nzero_page_area);
1786         pa = VM_PAGE_TO_PHYS(m);
1787         if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1788                 PMAP_STATS_INC(pmap_nzero_page_area_c);
1789                 va = TLB_PHYS_TO_DIRECT(pa);
1790                 bzero((void *)(va + off), size);
1791         } else if (m->md.color == -1) {
1792                 PMAP_STATS_INC(pmap_nzero_page_area_nc);
1793                 aszero(ASI_PHYS_USE_EC, pa + off, size);
1794         } else {
1795                 PMAP_STATS_INC(pmap_nzero_page_area_oc);
1796                 PMAP_LOCK(kernel_pmap);
1797                 va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1798                 tp = tsb_kvtotte(va);
1799                 tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1800                 tp->tte_vpn = TV_VPN(va, TS_8K);
1801                 bzero((void *)(va + off), size);
1802                 tlb_page_demap(kernel_pmap, va);
1803                 PMAP_UNLOCK(kernel_pmap);
1804         }
1805 }
1806
1807 void
1808 pmap_zero_page_idle(vm_page_t m)
1809 {
1810         struct tte *tp;
1811         vm_offset_t va;
1812         vm_paddr_t pa;
1813
1814         KASSERT((m->flags & PG_FICTITIOUS) == 0,
1815             ("pmap_zero_page_idle: fake page"));
1816         PMAP_STATS_INC(pmap_nzero_page_idle);
1817         pa = VM_PAGE_TO_PHYS(m);
1818         if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1819                 PMAP_STATS_INC(pmap_nzero_page_idle_c);
1820                 va = TLB_PHYS_TO_DIRECT(pa);
1821                 cpu_block_zero((void *)va, PAGE_SIZE);
1822         } else if (m->md.color == -1) {
1823                 PMAP_STATS_INC(pmap_nzero_page_idle_nc);
1824                 aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1825         } else {
1826                 PMAP_STATS_INC(pmap_nzero_page_idle_oc);
1827                 va = pmap_idle_map + (m->md.color * PAGE_SIZE);
1828                 tp = tsb_kvtotte(va);
1829                 tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1830                 tp->tte_vpn = TV_VPN(va, TS_8K);
1831                 cpu_block_zero((void *)va, PAGE_SIZE);
1832                 tlb_page_demap(kernel_pmap, va);
1833         }
1834 }
1835
1836 void
1837 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1838 {
1839         vm_offset_t vdst;
1840         vm_offset_t vsrc;
1841         vm_paddr_t pdst;
1842         vm_paddr_t psrc;
1843         struct tte *tp;
1844
1845         KASSERT((mdst->flags & PG_FICTITIOUS) == 0,
1846             ("pmap_copy_page: fake dst page"));
1847         KASSERT((msrc->flags & PG_FICTITIOUS) == 0,
1848             ("pmap_copy_page: fake src page"));
1849         PMAP_STATS_INC(pmap_ncopy_page);
1850         pdst = VM_PAGE_TO_PHYS(mdst);
1851         psrc = VM_PAGE_TO_PHYS(msrc);
1852         if (dcache_color_ignore != 0 ||
1853             (msrc->md.color == DCACHE_COLOR(psrc) &&
1854             mdst->md.color == DCACHE_COLOR(pdst))) {
1855                 PMAP_STATS_INC(pmap_ncopy_page_c);
1856                 vdst = TLB_PHYS_TO_DIRECT(pdst);
1857                 vsrc = TLB_PHYS_TO_DIRECT(psrc);
1858                 cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1859         } else if (msrc->md.color == -1 && mdst->md.color == -1) {
1860                 PMAP_STATS_INC(pmap_ncopy_page_nc);
1861                 ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
1862         } else if (msrc->md.color == -1) {
1863                 if (mdst->md.color == DCACHE_COLOR(pdst)) {
1864                         PMAP_STATS_INC(pmap_ncopy_page_dc);
1865                         vdst = TLB_PHYS_TO_DIRECT(pdst);
1866                         ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1867                             PAGE_SIZE);
1868                 } else {
1869                         PMAP_STATS_INC(pmap_ncopy_page_doc);
1870                         PMAP_LOCK(kernel_pmap);
1871                         vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1872                         tp = tsb_kvtotte(vdst);
1873                         tp->tte_data =
1874                             TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1875                         tp->tte_vpn = TV_VPN(vdst, TS_8K);
1876                         ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1877                             PAGE_SIZE);
1878                         tlb_page_demap(kernel_pmap, vdst);
1879                         PMAP_UNLOCK(kernel_pmap);
1880                 }
1881         } else if (mdst->md.color == -1) {
1882                 if (msrc->md.color == DCACHE_COLOR(psrc)) {
1883                         PMAP_STATS_INC(pmap_ncopy_page_sc);
1884                         vsrc = TLB_PHYS_TO_DIRECT(psrc);
1885                         ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1886                             PAGE_SIZE);
1887                 } else {
1888                         PMAP_STATS_INC(pmap_ncopy_page_soc);
1889                         PMAP_LOCK(kernel_pmap);
1890                         vsrc = pmap_temp_map_1 + (msrc->md.color * PAGE_SIZE);
1891                         tp = tsb_kvtotte(vsrc);
1892                         tp->tte_data =
1893                             TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1894                         tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1895                         ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1896                             PAGE_SIZE);
1897                         tlb_page_demap(kernel_pmap, vsrc);
1898                         PMAP_UNLOCK(kernel_pmap);
1899                 }
1900         } else {
1901                 PMAP_STATS_INC(pmap_ncopy_page_oc);
1902                 PMAP_LOCK(kernel_pmap);
1903                 vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1904                 tp = tsb_kvtotte(vdst);
1905                 tp->tte_data =
1906                     TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1907                 tp->tte_vpn = TV_VPN(vdst, TS_8K);
1908                 vsrc = pmap_temp_map_2 + (msrc->md.color * PAGE_SIZE);
1909                 tp = tsb_kvtotte(vsrc);
1910                 tp->tte_data =
1911                     TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1912                 tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1913                 cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1914                 tlb_page_demap(kernel_pmap, vdst);
1915                 tlb_page_demap(kernel_pmap, vsrc);
1916                 PMAP_UNLOCK(kernel_pmap);
1917         }
1918 }
1919
1920 /*
1921  * Returns true if the pmap's pv is one of the first
1922  * 16 pvs linked to from this page.  This count may
1923  * be changed upwards or downwards in the future; it
1924  * is only necessary that true be returned for a small
1925  * subset of pmaps for proper page aging.
1926  */
1927 boolean_t
1928 pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1929 {
1930         struct tte *tp;
1931         int loops;
1932
1933         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1934         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
1935                 return (FALSE);
1936         loops = 0;
1937         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1938                 if ((tp->tte_data & TD_PV) == 0)
1939                         continue;
1940                 if (TTE_GET_PMAP(tp) == pm)
1941                         return (TRUE);
1942                 if (++loops >= 16)
1943                         break;
1944         }
1945         return (FALSE);
1946 }
1947
1948 /*
1949  * Return the number of managed mappings to the given physical page
1950  * that are wired.
1951  */
1952 int
1953 pmap_page_wired_mappings(vm_page_t m)
1954 {
1955         struct tte *tp;
1956         int count;
1957
1958         count = 0;
1959         if ((m->flags & PG_FICTITIOUS) != 0)
1960                 return (count);
1961         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1962         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
1963                 if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED))
1964                         count++;
1965         return (count);
1966 }
1967
1968 /*
1969  * Remove all pages from specified address space, this aids process exit
1970  * speeds.  This is much faster than pmap_remove in the case of running down
1971  * an entire address space.  Only works for the current pmap.
1972  */
1973 void
1974 pmap_remove_pages(pmap_t pm)
1975 {
1976
1977 }
1978
1979 /*
1980  * Returns TRUE if the given page has a managed mapping.
1981  */
1982 boolean_t
1983 pmap_page_is_mapped(vm_page_t m)
1984 {
1985         struct tte *tp;
1986
1987         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
1988                 return (FALSE);
1989         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1990         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
1991                 if ((tp->tte_data & TD_PV) != 0)
1992                         return (TRUE);
1993         return (FALSE);
1994 }
1995
1996 /*
1997  * Return a count of reference bits for a page, clearing those bits.
1998  * It is not necessary for every reference bit to be cleared, but it
1999  * is necessary that 0 only be returned when there are truly no
2000  * reference bits set.
2001  *
2002  * XXX: The exact number of bits to check and clear is a matter that
2003  * should be tested and standardized at some point in the future for
2004  * optimal aging of shared pages.
2005  */
2006 int
2007 pmap_ts_referenced(vm_page_t m)
2008 {
2009         struct tte *tpf;
2010         struct tte *tpn;
2011         struct tte *tp;
2012         u_long data;
2013         int count;
2014
2015         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2016         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2017                 return (0);
2018         count = 0;
2019         if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) {
2020                 tpf = tp;
2021                 do {
2022                         tpn = TAILQ_NEXT(tp, tte_link);
2023                         TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
2024                         TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
2025                         if ((tp->tte_data & TD_PV) == 0)
2026                                 continue;
2027                         data = atomic_clear_long(&tp->tte_data, TD_REF);
2028                         if ((data & TD_REF) != 0 && ++count > 4)
2029                                 break;
2030                 } while ((tp = tpn) != NULL && tp != tpf);
2031         }
2032         return (count);
2033 }
2034
2035 boolean_t
2036 pmap_is_modified(vm_page_t m)
2037 {
2038         struct tte *tp;
2039
2040         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2041         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2042                 return (FALSE);
2043         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2044                 if ((tp->tte_data & TD_PV) == 0)
2045                         continue;
2046                 if ((tp->tte_data & TD_W) != 0)
2047                         return (TRUE);
2048         }
2049         return (FALSE);
2050 }
2051
2052 /*
2053  *      pmap_is_prefaultable:
2054  *
2055  *      Return whether or not the specified virtual address is elgible
2056  *      for prefault.
2057  */
2058 boolean_t
2059 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2060 {
2061         boolean_t rv;
2062
2063         PMAP_LOCK(pmap);
2064         rv = tsb_tte_lookup(pmap, addr) == NULL;
2065         PMAP_UNLOCK(pmap);
2066         return (rv);
2067 }
2068
2069 void
2070 pmap_clear_modify(vm_page_t m)
2071 {
2072         struct tte *tp;
2073         u_long data;
2074
2075         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2076         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2077                 return;
2078         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2079                 if ((tp->tte_data & TD_PV) == 0)
2080                         continue;
2081                 data = atomic_clear_long(&tp->tte_data, TD_W);
2082                 if ((data & TD_W) != 0)
2083                         tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2084         }
2085 }
2086
2087 void
2088 pmap_clear_reference(vm_page_t m)
2089 {
2090         struct tte *tp;
2091         u_long data;
2092
2093         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2094         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
2095                 return;
2096         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2097                 if ((tp->tte_data & TD_PV) == 0)
2098                         continue;
2099                 data = atomic_clear_long(&tp->tte_data, TD_REF);
2100                 if ((data & TD_REF) != 0)
2101                         tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2102         }
2103 }
2104
2105 void
2106 pmap_remove_write(vm_page_t m)
2107 {
2108         struct tte *tp;
2109         u_long data;
2110
2111         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2112         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
2113             (m->flags & PG_WRITEABLE) == 0)
2114                 return;
2115         TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2116                 if ((tp->tte_data & TD_PV) == 0)
2117                         continue;
2118                 data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
2119                 if ((data & TD_W) != 0) {
2120                         vm_page_dirty(m);
2121                         tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2122                 }
2123         }
2124         vm_page_flag_clear(m, PG_WRITEABLE);
2125 }
2126
2127 int
2128 pmap_mincore(pmap_t pm, vm_offset_t addr)
2129 {
2130
2131         /* TODO; */
2132         return (0);
2133 }
2134
2135 /*
2136  * Activate a user pmap.  The pmap must be activated before its address space
2137  * can be accessed in any way.
2138  */
2139 void
2140 pmap_activate(struct thread *td)
2141 {
2142         struct vmspace *vm;
2143         struct pmap *pm;
2144         int context;
2145
2146         critical_enter();
2147         vm = td->td_proc->p_vmspace;
2148         pm = vmspace_pmap(vm);
2149
2150         context = PCPU_GET(tlb_ctx);
2151         if (context == PCPU_GET(tlb_ctx_max)) {
2152                 tlb_flush_user();
2153                 context = PCPU_GET(tlb_ctx_min);
2154         }
2155         PCPU_SET(tlb_ctx, context + 1);
2156
2157         mtx_lock_spin(&sched_lock);
2158         pm->pm_context[curcpu] = context;
2159         pm->pm_active |= PCPU_GET(cpumask);
2160         PCPU_SET(pmap, pm);
2161         mtx_unlock_spin(&sched_lock);
2162
2163         stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
2164         stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb);
2165         stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) &
2166             TLB_CXR_PGSZ_MASK) | context);
2167         flush(KERNBASE);
2168         critical_exit();
2169 }
2170
2171 void
2172 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2173 {
2174
2175 }
2176
2177 /*
2178  * Increase the starting virtual address of the given mapping if a
2179  * different alignment might result in more superpage mappings.
2180  */
2181 void
2182 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2183     vm_offset_t *addr, vm_size_t size)
2184 {
2185
2186 }