sys/arm/arm/pmap.c

   1 /* From: $NetBSD: pmap.c,v 1.148 2004/04/03 04:35:48 bsh Exp $ */
   2 /*-
   3  * Copyright 2004 Olivier Houchard.
   4  * Copyright 2003 Wasabi Systems, Inc.
   5  * All rights reserved.
   6  *
   7  * Written by Steve C. Woodford for Wasabi Systems, Inc.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  * 3. All advertising materials mentioning features or use of this software
  18  *    must display the following acknowledgement:
  19  *      This product includes software developed for the NetBSD Project by
  20  *      Wasabi Systems, Inc.
  21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  22  *    or promote products derived from this software without specific prior
  23  *    written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  35  * POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38 /*-
  39  * Copyright (c) 2002-2003 Wasabi Systems, Inc.
  40  * Copyright (c) 2001 Richard Earnshaw
  41  * Copyright (c) 2001-2002 Christopher Gilbert
  42  * All rights reserved.
  43  *
  44  * 1. Redistributions of source code must retain the above copyright
  45  *    notice, this list of conditions and the following disclaimer.
  46  * 2. Redistributions in binary form must reproduce the above copyright
  47  *    notice, this list of conditions and the following disclaimer in the
  48  *    documentation and/or other materials provided with the distribution.
  49  * 3. The name of the company nor the name of the author may be used to
  50  *    endorse or promote products derived from this software without specific
  51  *    prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
  54  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  55  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  56  * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  57  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  58  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  59  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  */
  65 /*-
  66  * Copyright (c) 1999 The NetBSD Foundation, Inc.
  67  * All rights reserved.
  68  *
  69  * This code is derived from software contributed to The NetBSD Foundation
  70  * by Charles M. Hannum.
  71  *
  72  * Redistribution and use in source and binary forms, with or without
  73  * modification, are permitted provided that the following conditions
  74  * are met:
  75  * 1. Redistributions of source code must retain the above copyright
  76  *    notice, this list of conditions and the following disclaimer.
  77  * 2. Redistributions in binary form must reproduce the above copyright
  78  *    notice, this list of conditions and the following disclaimer in the
  79  *    documentation and/or other materials provided with the distribution.
  80  * 3. All advertising materials mentioning features or use of this software
  81  *    must display the following acknowledgement:
  82  *        This product includes software developed by the NetBSD
  83  *        Foundation, Inc. and its contributors.
  84  * 4. Neither the name of The NetBSD Foundation nor the names of its
  85  *    contributors may be used to endorse or promote products derived
  86  *    from this software without specific prior written permission.
  87  *
  88  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  89  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  90  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  91  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  92  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  93  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  94  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  95  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  96  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  97  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  98  * POSSIBILITY OF SUCH DAMAGE.
  99  */
 100
 101 /*-
 102  * Copyright (c) 1994-1998 Mark Brinicombe.
 103  * Copyright (c) 1994 Brini.
 104  * All rights reserved.
 105  *
 106  * This code is derived from software written for Brini by Mark Brinicombe
 107  *
 108  * Redistribution and use in source and binary forms, with or without
 109  * modification, are permitted provided that the following conditions
 110  * are met:
 111  * 1. Redistributions of source code must retain the above copyright
 112  *    notice, this list of conditions and the following disclaimer.
 113  * 2. Redistributions in binary form must reproduce the above copyright
 114  *    notice, this list of conditions and the following disclaimer in the
 115  *    documentation and/or other materials provided with the distribution.
 116  * 3. All advertising materials mentioning features or use of this software
 117  *    must display the following acknowledgement:
 118  *      This product includes software developed by Mark Brinicombe.
 119  * 4. The name of the author may not be used to endorse or promote products
 120  *    derived from this software without specific prior written permission.
 121  *
 122  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 123  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 124  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 125  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 126  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 127  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 128  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 129  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 130  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 131  *
 132  * RiscBSD kernel project
 133  *
 134  * pmap.c
 135  *
 136  * Machine dependant vm stuff
 137  *
 138  * Created      : 20/09/94
 139  */
 140
 141 /*
 142  * Special compilation symbols
 143  * PMAP_DEBUG           - Build in pmap_debug_level code
 144  */
 145 /* Include header files */
 146
 147 #include "opt_vm.h"
 148
 149 #include <sys/cdefs.h>
 150 __FBSDID("$FreeBSD$");
 151 #include <sys/param.h>
 152 #include <sys/systm.h>
 153 #include <sys/kernel.h>
 154 #include <sys/proc.h>
 155 #include <sys/malloc.h>
 156 #include <sys/msgbuf.h>
 157 #include <sys/vmmeter.h>
 158 #include <sys/mman.h>
 159 #include <sys/smp.h>
 160 #include <sys/sx.h>
 161 #include <sys/sched.h>
 162
 163 #include <vm/vm.h>
 164 #include <vm/uma.h>
 165 #include <vm/pmap.h>
 166 #include <vm/vm_kern.h>
 167 #include <vm/vm_object.h>
 168 #include <vm/vm_map.h>
 169 #include <vm/vm_page.h>
 170 #include <vm/vm_pageout.h>
 171 #include <vm/vm_extern.h>
 172 #include <sys/lock.h>
 173 #include <sys/mutex.h>
 174 #include <machine/md_var.h>
 175 #include <machine/vmparam.h>
 176 #include <machine/cpu.h>
 177 #include <machine/cpufunc.h>
 178 #include <machine/pcb.h>
 179
 180 #ifdef PMAP_DEBUG
 181 #define PDEBUG(_lev_,_stat_) \
 182         if (pmap_debug_level >= (_lev_)) \
 183                 ((_stat_))
 184 #define dprintf printf
 185
 186 int pmap_debug_level = 0;
 187 #define PMAP_INLINE
 188 #else   /* PMAP_DEBUG */
 189 #define PDEBUG(_lev_,_stat_) /* Nothing */
 190 #define dprintf(x, arg...)
 191 #define PMAP_INLINE __inline
 192 #endif  /* PMAP_DEBUG */
 193
 194 extern struct pv_addr systempage;
 195 /*
 196  * Internal function prototypes
 197  */
 198 static void pmap_free_pv_entry (pv_entry_t);
 199 static pv_entry_t pmap_get_pv_entry(void);
 200
 201 static void             pmap_enter_locked(pmap_t, vm_offset_t, vm_page_t,
 202     vm_prot_t, boolean_t, int);
 203 static void             pmap_vac_me_harder(struct vm_page *, pmap_t,
 204     vm_offset_t);
 205 static void             pmap_vac_me_kpmap(struct vm_page *, pmap_t,
 206     vm_offset_t);
 207 static void             pmap_vac_me_user(struct vm_page *, pmap_t, vm_offset_t);
 208 static void             pmap_alloc_l1(pmap_t);
 209 static void             pmap_free_l1(pmap_t);
 210 static void             pmap_use_l1(pmap_t);
 211
 212 static int              pmap_clearbit(struct vm_page *, u_int);
 213
 214 static struct l2_bucket *pmap_get_l2_bucket(pmap_t, vm_offset_t);
 215 static struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vm_offset_t);
 216 static void             pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int);
 217 static vm_offset_t      kernel_pt_lookup(vm_paddr_t);
 218
 219 static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1");
 220
 221 vm_offset_t avail_end;          /* PA of last available physical page */
 222 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
 223 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
 224 vm_offset_t pmap_curmaxkvaddr;
 225 vm_paddr_t kernel_l1pa;
 226
 227 extern void *end;
 228 vm_offset_t kernel_vm_end = 0;
 229
 230 struct pmap kernel_pmap_store;
 231 pmap_t kernel_pmap;
 232
 233 static pt_entry_t *csrc_pte, *cdst_pte;
 234 static vm_offset_t csrcp, cdstp;
 235 static struct mtx cmtx;
 236
 237 static void             pmap_init_l1(struct l1_ttable *, pd_entry_t *);
 238 /*
 239  * These routines are called when the CPU type is identified to set up
 240  * the PTE prototypes, cache modes, etc.
 241  *
 242  * The variables are always here, just in case LKMs need to reference
 243  * them (though, they shouldn't).
 244  */
 245
 246 pt_entry_t      pte_l1_s_cache_mode;
 247 pt_entry_t      pte_l1_s_cache_mode_pt;
 248 pt_entry_t      pte_l1_s_cache_mask;
 249
 250 pt_entry_t      pte_l2_l_cache_mode;
 251 pt_entry_t      pte_l2_l_cache_mode_pt;
 252 pt_entry_t      pte_l2_l_cache_mask;
 253
 254 pt_entry_t      pte_l2_s_cache_mode;
 255 pt_entry_t      pte_l2_s_cache_mode_pt;
 256 pt_entry_t      pte_l2_s_cache_mask;
 257
 258 pt_entry_t      pte_l2_s_prot_u;
 259 pt_entry_t      pte_l2_s_prot_w;
 260 pt_entry_t      pte_l2_s_prot_mask;
 261
 262 pt_entry_t      pte_l1_s_proto;
 263 pt_entry_t      pte_l1_c_proto;
 264 pt_entry_t      pte_l2_s_proto;
 265
 266 void            (*pmap_copy_page_func)(vm_paddr_t, vm_paddr_t);
 267 void            (*pmap_zero_page_func)(vm_paddr_t, int, int);
 268 /*
 269  * Which pmap is currently 'live' in the cache
 270  *
 271  * XXXSCW: Fix for SMP ...
 272  */
 273 union pmap_cache_state *pmap_cache_state;
 274
 275 /* static pt_entry_t *msgbufmap;*/
 276 struct msgbuf *msgbufp = 0;
 277
 278 extern void bcopy_page(vm_offset_t, vm_offset_t);
 279 extern void bzero_page(vm_offset_t);
 280
 281 extern vm_offset_t alloc_firstaddr;
 282
 283 char *_tmppt;
 284
 285 /*
 286  * Metadata for L1 translation tables.
 287  */
 288 struct l1_ttable {
 289         /* Entry on the L1 Table list */
 290         SLIST_ENTRY(l1_ttable) l1_link;
 291
 292         /* Entry on the L1 Least Recently Used list */
 293         TAILQ_ENTRY(l1_ttable) l1_lru;
 294
 295         /* Track how many domains are allocated from this L1 */
 296         volatile u_int l1_domain_use_count;
 297
 298         /*
 299          * A free-list of domain numbers for this L1.
 300          * We avoid using ffs() and a bitmap to track domains since ffs()
 301          * is slow on ARM.
 302          */
 303         u_int8_t l1_domain_first;
 304         u_int8_t l1_domain_free[PMAP_DOMAINS];
 305
 306         /* Physical address of this L1 page table */
 307         vm_paddr_t l1_physaddr;
 308
 309         /* KVA of this L1 page table */
 310         pd_entry_t *l1_kva;
 311 };
 312
 313 /*
 314  * Convert a virtual address into its L1 table index. That is, the
 315  * index used to locate the L2 descriptor table pointer in an L1 table.
 316  * This is basically used to index l1->l1_kva[].
 317  *
 318  * Each L2 descriptor table represents 1MB of VA space.
 319  */
 320 #define L1_IDX(va)              (((vm_offset_t)(va)) >> L1_S_SHIFT)
 321
 322 /*
 323  * L1 Page Tables are tracked using a Least Recently Used list.
 324  *  - New L1s are allocated from the HEAD.
 325  *  - Freed L1s are added to the TAIl.
 326  *  - Recently accessed L1s (where an 'access' is some change to one of
 327  *    the userland pmaps which owns this L1) are moved to the TAIL.
 328  */
 329 static TAILQ_HEAD(, l1_ttable) l1_lru_list;
 330 /*
 331  * A list of all L1 tables
 332  */
 333 static SLIST_HEAD(, l1_ttable) l1_list;
 334 static struct mtx l1_lru_lock;
 335
 336 /*
 337  * The l2_dtable tracks L2_BUCKET_SIZE worth of L1 slots.
 338  *
 339  * This is normally 16MB worth L2 page descriptors for any given pmap.
 340  * Reference counts are maintained for L2 descriptors so they can be
 341  * freed when empty.
 342  */
 343 struct l2_dtable {
 344         /* The number of L2 page descriptors allocated to this l2_dtable */
 345         u_int l2_occupancy;
 346
 347         /* List of L2 page descriptors */
 348         struct l2_bucket {
 349                 pt_entry_t *l2b_kva;    /* KVA of L2 Descriptor Table */
 350                 vm_paddr_t l2b_phys;    /* Physical address of same */
 351                 u_short l2b_l1idx;      /* This L2 table's L1 index */
 352                 u_short l2b_occupancy;  /* How many active descriptors */
 353         } l2_bucket[L2_BUCKET_SIZE];
 354 };
 355
 356 /* pmap_kenter_internal flags */
 357 #define KENTER_CACHE    0x1
 358 #define KENTER_USER     0x2
 359
 360 /*
 361  * Given an L1 table index, calculate the corresponding l2_dtable index
 362  * and bucket index within the l2_dtable.
 363  */
 364 #define L2_IDX(l1idx)           (((l1idx) >> L2_BUCKET_LOG2) & \
 365                                  (L2_SIZE - 1))
 366 #define L2_BUCKET(l1idx)        ((l1idx) & (L2_BUCKET_SIZE - 1))
 367
 368 /*
 369  * Given a virtual address, this macro returns the
 370  * virtual address required to drop into the next L2 bucket.
 371  */
 372 #define L2_NEXT_BUCKET(va)      (((va) & L1_S_FRAME) + L1_S_SIZE)
 373
 374 /*
 375  * L2 allocation.
 376  */
 377 #define pmap_alloc_l2_dtable()          \
 378                 (void*)uma_zalloc(l2table_zone, M_NOWAIT|M_USE_RESERVE)
 379 #define pmap_free_l2_dtable(l2)         \
 380                 uma_zfree(l2table_zone, l2)
 381
 382 /*
 383  * We try to map the page tables write-through, if possible.  However, not
 384  * all CPUs have a write-through cache mode, so on those we have to sync
 385  * the cache when we frob page tables.
 386  *
 387  * We try to evaluate this at compile time, if possible.  However, it's
 388  * not always possible to do that, hence this run-time var.
 389  */
 390 int     pmap_needs_pte_sync;
 391
 392 /*
 393  * Macro to determine if a mapping might be resident in the
 394  * instruction cache and/or TLB
 395  */
 396 #define PV_BEEN_EXECD(f)  (((f) & (PVF_REF | PVF_EXEC)) == (PVF_REF | PVF_EXEC))
 397
 398 /*
 399  * Macro to determine if a mapping might be resident in the
 400  * data cache and/or TLB
 401  */
 402 #define PV_BEEN_REFD(f)   (((f) & PVF_REF) != 0)
 403
 404 #ifndef PMAP_SHPGPERPROC
 405 #define PMAP_SHPGPERPROC 200
 406 #endif
 407
 408 #define pmap_is_current(pm)     ((pm) == pmap_kernel() || \
 409             curproc->p_vmspace->vm_map.pmap == (pm))
 410 static uma_zone_t pvzone;
 411 uma_zone_t l2zone;
 412 static uma_zone_t l2table_zone;
 413 static vm_offset_t pmap_kernel_l2dtable_kva;
 414 static vm_offset_t pmap_kernel_l2ptp_kva;
 415 static vm_paddr_t pmap_kernel_l2ptp_phys;
 416 static struct vm_object pvzone_obj;
 417 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
 418
 419 /*
 420  * This list exists for the benefit of pmap_map_chunk().  It keeps track
 421  * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can
 422  * find them as necessary.
 423  *
 424  * Note that the data on this list MUST remain valid after initarm() returns,
 425  * as pmap_bootstrap() uses it to contruct L2 table metadata.
 426  */
 427 SLIST_HEAD(, pv_addr) kernel_pt_list = SLIST_HEAD_INITIALIZER(kernel_pt_list);
 428
 429 static void
 430 pmap_init_l1(struct l1_ttable *l1, pd_entry_t *l1pt)
 431 {
 432         int i;
 433
 434         l1->l1_kva = l1pt;
 435         l1->l1_domain_use_count = 0;
 436         l1->l1_domain_first = 0;
 437
 438         for (i = 0; i < PMAP_DOMAINS; i++)
 439                 l1->l1_domain_free[i] = i + 1;
 440
 441         /*
 442          * Copy the kernel's L1 entries to each new L1.
 443          */
 444         if (l1pt != pmap_kernel()->pm_l1->l1_kva)
 445                 memcpy(l1pt, pmap_kernel()->pm_l1->l1_kva, L1_TABLE_SIZE);
 446
 447         if ((l1->l1_physaddr = pmap_extract(pmap_kernel(), (vm_offset_t)l1pt)) == 0)
 448                 panic("pmap_init_l1: can't get PA of L1 at %p", l1pt);
 449         SLIST_INSERT_HEAD(&l1_list, l1, l1_link);
 450         TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
 451 }
 452
 453 static vm_offset_t
 454 kernel_pt_lookup(vm_paddr_t pa)
 455 {
 456         struct pv_addr *pv;
 457
 458         SLIST_FOREACH(pv, &kernel_pt_list, pv_list) {
 459                 if (pv->pv_pa == pa)
 460                         return (pv->pv_va);
 461         }
 462         return (0);
 463 }
 464
 465 #if (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0
 466 void
 467 pmap_pte_init_generic(void)
 468 {
 469
 470         pte_l1_s_cache_mode = L1_S_B|L1_S_C;
 471         pte_l1_s_cache_mask = L1_S_CACHE_MASK_generic;
 472
 473         pte_l2_l_cache_mode = L2_B|L2_C;
 474         pte_l2_l_cache_mask = L2_L_CACHE_MASK_generic;
 475
 476         pte_l2_s_cache_mode = L2_B|L2_C;
 477         pte_l2_s_cache_mask = L2_S_CACHE_MASK_generic;
 478
 479         /*
 480          * If we have a write-through cache, set B and C.  If
 481          * we have a write-back cache, then we assume setting
 482          * only C will make those pages write-through.
 483          */
 484         if (cpufuncs.cf_dcache_wb_range == (void *) cpufunc_nullop) {
 485                 pte_l1_s_cache_mode_pt = L1_S_B|L1_S_C;
 486                 pte_l2_l_cache_mode_pt = L2_B|L2_C;
 487                 pte_l2_s_cache_mode_pt = L2_B|L2_C;
 488         } else {
 489                 pte_l1_s_cache_mode_pt = L1_S_C;
 490                 pte_l2_l_cache_mode_pt = L2_C;
 491                 pte_l2_s_cache_mode_pt = L2_C;
 492         }
 493
 494         pte_l2_s_prot_u = L2_S_PROT_U_generic;
 495         pte_l2_s_prot_w = L2_S_PROT_W_generic;
 496         pte_l2_s_prot_mask = L2_S_PROT_MASK_generic;
 497
 498         pte_l1_s_proto = L1_S_PROTO_generic;
 499         pte_l1_c_proto = L1_C_PROTO_generic;
 500         pte_l2_s_proto = L2_S_PROTO_generic;
 501
 502         pmap_copy_page_func = pmap_copy_page_generic;
 503         pmap_zero_page_func = pmap_zero_page_generic;
 504 }
 505
 506 #if defined(CPU_ARM8)
 507 void
 508 pmap_pte_init_arm8(void)
 509 {
 510
 511         /*
 512          * ARM8 is compatible with generic, but we need to use
 513          * the page tables uncached.
 514          */
 515         pmap_pte_init_generic();
 516
 517         pte_l1_s_cache_mode_pt = 0;
 518         pte_l2_l_cache_mode_pt = 0;
 519         pte_l2_s_cache_mode_pt = 0;
 520 }
 521 #endif /* CPU_ARM8 */
 522
 523 #if defined(CPU_ARM9) && defined(ARM9_CACHE_WRITE_THROUGH)
 524 void
 525 pmap_pte_init_arm9(void)
 526 {
 527
 528         /*
 529          * ARM9 is compatible with generic, but we want to use
 530          * write-through caching for now.
 531          */
 532         pmap_pte_init_generic();
 533
 534         pte_l1_s_cache_mode = L1_S_C;
 535         pte_l2_l_cache_mode = L2_C;
 536         pte_l2_s_cache_mode = L2_C;
 537
 538         pte_l1_s_cache_mode_pt = L1_S_C;
 539         pte_l2_l_cache_mode_pt = L2_C;
 540         pte_l2_s_cache_mode_pt = L2_C;
 541 }
 542 #endif /* CPU_ARM9 */
 543 #endif /* (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 */
 544
 545 #if defined(CPU_ARM10)
 546 void
 547 pmap_pte_init_arm10(void)
 548 {
 549
 550         /*
 551          * ARM10 is compatible with generic, but we want to use
 552          * write-through caching for now.
 553          */
 554         pmap_pte_init_generic();
 555
 556         pte_l1_s_cache_mode = L1_S_B | L1_S_C;
 557         pte_l2_l_cache_mode = L2_B | L2_C;
 558         pte_l2_s_cache_mode = L2_B | L2_C;
 559
 560         pte_l1_s_cache_mode_pt = L1_S_C;
 561         pte_l2_l_cache_mode_pt = L2_C;
 562         pte_l2_s_cache_mode_pt = L2_C;
 563
 564 }
 565 #endif /* CPU_ARM10 */
 566
 567 #if  ARM_MMU_SA1 == 1
 568 void
 569 pmap_pte_init_sa1(void)
 570 {
 571
 572         /*
 573          * The StrongARM SA-1 cache does not have a write-through
 574          * mode.  So, do the generic initialization, then reset
 575          * the page table cache mode to B=1,C=1, and note that
 576          * the PTEs need to be sync'd.
 577          */
 578         pmap_pte_init_generic();
 579
 580         pte_l1_s_cache_mode_pt = L1_S_B|L1_S_C;
 581         pte_l2_l_cache_mode_pt = L2_B|L2_C;
 582         pte_l2_s_cache_mode_pt = L2_B|L2_C;
 583
 584         pmap_needs_pte_sync = 1;
 585 }
 586 #endif /* ARM_MMU_SA1 == 1*/
 587
 588 #if ARM_MMU_XSCALE == 1
 589 #if (ARM_NMMUS > 1)
 590 static u_int xscale_use_minidata;
 591 #endif
 592
 593 void
 594 pmap_pte_init_xscale(void)
 595 {
 596         uint32_t auxctl;
 597         int write_through = 0;
 598
 599         pte_l1_s_cache_mode = L1_S_B|L1_S_C|L1_S_XSCALE_P;
 600         pte_l1_s_cache_mask = L1_S_CACHE_MASK_xscale;
 601
 602         pte_l2_l_cache_mode = L2_B|L2_C;
 603         pte_l2_l_cache_mask = L2_L_CACHE_MASK_xscale;
 604
 605         pte_l2_s_cache_mode = L2_B|L2_C;
 606         pte_l2_s_cache_mask = L2_S_CACHE_MASK_xscale;
 607
 608         pte_l1_s_cache_mode_pt = L1_S_C;
 609         pte_l2_l_cache_mode_pt = L2_C;
 610         pte_l2_s_cache_mode_pt = L2_C;
 611 #ifdef XSCALE_CACHE_READ_WRITE_ALLOCATE
 612         /*
 613          * The XScale core has an enhanced mode where writes that
 614          * miss the cache cause a cache line to be allocated.  This
 615          * is significantly faster than the traditional, write-through
 616          * behavior of this case.
 617          */
 618         pte_l1_s_cache_mode |= L1_S_XSCALE_TEX(TEX_XSCALE_X);
 619         pte_l2_l_cache_mode |= L2_XSCALE_L_TEX(TEX_XSCALE_X);
 620         pte_l2_s_cache_mode |= L2_XSCALE_T_TEX(TEX_XSCALE_X);
 621 #endif /* XSCALE_CACHE_READ_WRITE_ALLOCATE */
 622 #ifdef XSCALE_CACHE_WRITE_THROUGH
 623         /*
 624          * Some versions of the XScale core have various bugs in
 625          * their cache units, the work-around for which is to run
 626          * the cache in write-through mode.  Unfortunately, this
 627          * has a major (negative) impact on performance.  So, we
 628          * go ahead and run fast-and-loose, in the hopes that we
 629          * don't line up the planets in a way that will trip the
 630          * bugs.
 631          *
 632          * However, we give you the option to be slow-but-correct.
 633          */
 634         write_through = 1;
 635 #elif defined(XSCALE_CACHE_WRITE_BACK)
 636         /* force write back cache mode */
 637         write_through = 0;
 638 #elif defined(CPU_XSCALE_PXA2X0)
 639         /*
 640          * Intel PXA2[15]0 processors are known to have a bug in
 641          * write-back cache on revision 4 and earlier (stepping
 642          * A[01] and B[012]).  Fixed for C0 and later.
 643          */
 644         {
 645                 uint32_t id, type;
 646
 647                 id = cpufunc_id();
 648                 type = id & ~(CPU_ID_XSCALE_COREREV_MASK|CPU_ID_REVISION_MASK);
 649
 650                 if (type == CPU_ID_PXA250 || type == CPU_ID_PXA210) {
 651                         if ((id & CPU_ID_REVISION_MASK) < 5) {
 652                                 /* write through for stepping A0-1 and B0-2 */
 653                                 write_through = 1;
 654                         }
 655                 }
 656         }
 657 #endif /* XSCALE_CACHE_WRITE_THROUGH */
 658
 659         if (write_through) {
 660                 pte_l1_s_cache_mode = L1_S_C;
 661                 pte_l2_l_cache_mode = L2_C;
 662                 pte_l2_s_cache_mode = L2_C;
 663         }
 664
 665 #if (ARM_NMMUS > 1)
 666         xscale_use_minidata = 1;
 667 #endif
 668
 669         pte_l2_s_prot_u = L2_S_PROT_U_xscale;
 670         pte_l2_s_prot_w = L2_S_PROT_W_xscale;
 671         pte_l2_s_prot_mask = L2_S_PROT_MASK_xscale;
 672
 673         pte_l1_s_proto = L1_S_PROTO_xscale;
 674         pte_l1_c_proto = L1_C_PROTO_xscale;
 675         pte_l2_s_proto = L2_S_PROTO_xscale;
 676
 677         pmap_copy_page_func = pmap_copy_page_xscale;
 678         pmap_zero_page_func = pmap_zero_page_xscale;
 679
 680         /*
 681          * Disable ECC protection of page table access, for now.
 682          */
 683         __asm __volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (auxctl));
 684         auxctl &= ~XSCALE_AUXCTL_P;
 685         __asm __volatile("mcr p15, 0, %0, c1, c0, 1" : : "r" (auxctl));
 686 }
 687
 688 /*
 689  * xscale_setup_minidata:
 690  *
 691  *      Set up the mini-data cache clean area.  We require the
 692  *      caller to allocate the right amount of physically and
 693  *      virtually contiguous space.
 694  */
 695 extern vm_offset_t xscale_minidata_clean_addr;
 696 extern vm_size_t xscale_minidata_clean_size; /* already initialized */
 697 void
 698 xscale_setup_minidata(vm_offset_t l1pt, vm_offset_t va, vm_paddr_t pa)
 699 {
 700         pd_entry_t *pde = (pd_entry_t *) l1pt;
 701         pt_entry_t *pte;
 702         vm_size_t size;
 703         uint32_t auxctl;
 704
 705         xscale_minidata_clean_addr = va;
 706
 707         /* Round it to page size. */
 708         size = (xscale_minidata_clean_size + L2_S_OFFSET) & L2_S_FRAME;
 709
 710         for (; size != 0;
 711              va += L2_S_SIZE, pa += L2_S_SIZE, size -= L2_S_SIZE) {
 712                 pte = (pt_entry_t *) kernel_pt_lookup(
 713                     pde[L1_IDX(va)] & L1_C_ADDR_MASK);
 714                 if (pte == NULL)
 715                         panic("xscale_setup_minidata: can't find L2 table for "
 716                             "VA 0x%08x", (u_int32_t) va);
 717                 pte[l2pte_index(va)] =
 718                     L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, VM_PROT_READ) |
 719                     L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);
 720         }
 721
 722         /*
 723          * Configure the mini-data cache for write-back with
 724          * read/write-allocate.
 725          *
 726          * NOTE: In order to reconfigure the mini-data cache, we must
 727          * make sure it contains no valid data!  In order to do that,
 728          * we must issue a global data cache invalidate command!
 729          *
 730          * WE ASSUME WE ARE RUNNING UN-CACHED WHEN THIS ROUTINE IS CALLED!
 731          * THIS IS VERY IMPORTANT!
 732          */
 733
 734         /* Invalidate data and mini-data. */
 735         __asm __volatile("mcr p15, 0, %0, c7, c6, 0" : : "r" (0));
 736         __asm __volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (auxctl));
 737         auxctl = (auxctl & ~XSCALE_AUXCTL_MD_MASK) | XSCALE_AUXCTL_MD_WB_RWA;
 738         __asm __volatile("mcr p15, 0, %0, c1, c0, 1" : : "r" (auxctl));
 739 }
 740 #endif
 741
 742 /*
 743  * Allocate an L1 translation table for the specified pmap.
 744  * This is called at pmap creation time.
 745  */
 746 static void
 747 pmap_alloc_l1(pmap_t pm)
 748 {
 749         struct l1_ttable *l1;
 750         u_int8_t domain;
 751
 752         /*
 753          * Remove the L1 at the head of the LRU list
 754          */
 755         mtx_lock(&l1_lru_lock);
 756         l1 = TAILQ_FIRST(&l1_lru_list);
 757         TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
 758
 759         /*
 760          * Pick the first available domain number, and update
 761          * the link to the next number.
 762          */
 763         domain = l1->l1_domain_first;
 764         l1->l1_domain_first = l1->l1_domain_free[domain];
 765
 766         /*
 767          * If there are still free domain numbers in this L1,
 768          * put it back on the TAIL of the LRU list.
 769          */
 770         if (++l1->l1_domain_use_count < PMAP_DOMAINS)
 771                 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
 772
 773         mtx_unlock(&l1_lru_lock);
 774
 775         /*
 776          * Fix up the relevant bits in the pmap structure
 777          */
 778         pm->pm_l1 = l1;
 779         pm->pm_domain = domain;
 780 }
 781
 782 /*
 783  * Free an L1 translation table.
 784  * This is called at pmap destruction time.
 785  */
 786 static void
 787 pmap_free_l1(pmap_t pm)
 788 {
 789         struct l1_ttable *l1 = pm->pm_l1;
 790
 791         mtx_lock(&l1_lru_lock);
 792
 793         /*
 794          * If this L1 is currently on the LRU list, remove it.
 795          */
 796         if (l1->l1_domain_use_count < PMAP_DOMAINS)
 797                 TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
 798
 799         /*
 800          * Free up the domain number which was allocated to the pmap
 801          */
 802         l1->l1_domain_free[pm->pm_domain] = l1->l1_domain_first;
 803         l1->l1_domain_first = pm->pm_domain;
 804         l1->l1_domain_use_count--;
 805
 806         /*
 807          * The L1 now must have at least 1 free domain, so add
 808          * it back to the LRU list. If the use count is zero,
 809          * put it at the head of the list, otherwise it goes
 810          * to the tail.
 811          */
 812         if (l1->l1_domain_use_count == 0) {
 813                 TAILQ_INSERT_HEAD(&l1_lru_list, l1, l1_lru);
 814         }       else
 815                 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
 816
 817         mtx_unlock(&l1_lru_lock);
 818 }
 819
 820 static PMAP_INLINE void
 821 pmap_use_l1(pmap_t pm)
 822 {
 823         struct l1_ttable *l1;
 824
 825         /*
 826          * Do nothing if we're in interrupt context.
 827          * Access to an L1 by the kernel pmap must not affect
 828          * the LRU list.
 829          */
 830         if (pm == pmap_kernel())
 831                 return;
 832
 833         l1 = pm->pm_l1;
 834
 835         /*
 836          * If the L1 is not currently on the LRU list, just return
 837          */
 838         if (l1->l1_domain_use_count == PMAP_DOMAINS)
 839                 return;
 840
 841         mtx_lock(&l1_lru_lock);
 842
 843         /*
 844          * Check the use count again, now that we've acquired the lock
 845          */
 846         if (l1->l1_domain_use_count == PMAP_DOMAINS) {
 847                 mtx_unlock(&l1_lru_lock);
 848                 return;
 849         }
 850
 851         /*
 852          * Move the L1 to the back of the LRU list
 853          */
 854         TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
 855         TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
 856
 857         mtx_unlock(&l1_lru_lock);
 858 }
 859
 860
 861 /*
 862  * Returns a pointer to the L2 bucket associated with the specified pmap
 863  * and VA, or NULL if no L2 bucket exists for the address.
 864  */
 865 static PMAP_INLINE struct l2_bucket *
 866 pmap_get_l2_bucket(pmap_t pm, vm_offset_t va)
 867 {
 868         struct l2_dtable *l2;
 869         struct l2_bucket *l2b;
 870         u_short l1idx;
 871
 872         l1idx = L1_IDX(va);
 873
 874         if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL ||
 875             (l2b = &l2->l2_bucket[L2_BUCKET(l1idx)])->l2b_kva == NULL)
 876                 return (NULL);
 877
 878         return (l2b);
 879 }
 880
 881 /*
 882  * Returns a pointer to the L2 bucket associated with the specified pmap
 883  * and VA.
 884  *
 885  * If no L2 bucket exists, perform the necessary allocations to put an L2
 886  * bucket/page table in place.
 887  *
 888  * Note that if a new L2 bucket/page was allocated, the caller *must*
 889  * increment the bucket occupancy counter appropriately *before*
 890  * releasing the pmap's lock to ensure no other thread or cpu deallocates
 891  * the bucket/page in the meantime.
 892  */
 893 static struct l2_bucket *
 894 pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va)
 895 {
 896         struct l2_dtable *l2;
 897         struct l2_bucket *l2b;
 898         u_short l1idx;
 899
 900         l1idx = L1_IDX(va);
 901
 902         PMAP_ASSERT_LOCKED(pm);
 903         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 904         if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
 905                 /*
 906                  * No mapping at this address, as there is
 907                  * no entry in the L1 table.
 908                  * Need to allocate a new l2_dtable.
 909                  */
 910 again_l2table:
 911                 PMAP_UNLOCK(pm);
 912                 vm_page_unlock_queues();
 913                 if ((l2 = pmap_alloc_l2_dtable()) == NULL) {
 914                         vm_page_lock_queues();
 915                         PMAP_LOCK(pm);
 916                         return (NULL);
 917                 }
 918                 vm_page_lock_queues();
 919                 PMAP_LOCK(pm);
 920                 if (pm->pm_l2[L2_IDX(l1idx)] != NULL) {
 921                         PMAP_UNLOCK(pm);
 922                         vm_page_unlock_queues();
 923                         uma_zfree(l2table_zone, l2);
 924                         vm_page_lock_queues();
 925                         PMAP_LOCK(pm);
 926                         l2 = pm->pm_l2[L2_IDX(l1idx)];
 927                         if (l2 == NULL)
 928                                 goto again_l2table;
 929                         /*
 930                          * Someone already allocated the l2_dtable while
 931                          * we were doing the same.
 932                          */
 933                 } else {
 934                         bzero(l2, sizeof(*l2));
 935                         /*
 936                          * Link it into the parent pmap
 937                          */
 938                         pm->pm_l2[L2_IDX(l1idx)] = l2;
 939                 }
 940         }
 941
 942         l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
 943
 944         /*
 945          * Fetch pointer to the L2 page table associated with the address.
 946          */
 947         if (l2b->l2b_kva == NULL) {
 948                 pt_entry_t *ptep;
 949
 950                 /*
 951                  * No L2 page table has been allocated. Chances are, this
 952                  * is because we just allocated the l2_dtable, above.
 953                  */
 954 again_ptep:
 955                 PMAP_UNLOCK(pm);
 956                 vm_page_unlock_queues();
 957                 ptep = (void*)uma_zalloc(l2zone, M_NOWAIT|M_USE_RESERVE);
 958                 vm_page_lock_queues();
 959                 PMAP_LOCK(pm);
 960                 if (l2b->l2b_kva != 0) {
 961                         /* We lost the race. */
 962                         PMAP_UNLOCK(pm);
 963                         vm_page_unlock_queues();
 964                         uma_zfree(l2zone, ptep);
 965                         vm_page_lock_queues();
 966                         PMAP_LOCK(pm);
 967                         if (l2b->l2b_kva == 0)
 968                                 goto again_ptep;
 969                         return (l2b);
 970                 }
 971                 l2b->l2b_phys = vtophys(ptep);
 972                 if (ptep == NULL) {
 973                         /*
 974                          * Oops, no more L2 page tables available at this
 975                          * time. We may need to deallocate the l2_dtable
 976                          * if we allocated a new one above.
 977                          */
 978                         if (l2->l2_occupancy == 0) {
 979                                 pm->pm_l2[L2_IDX(l1idx)] = NULL;
 980                                 pmap_free_l2_dtable(l2);
 981                         }
 982                         return (NULL);
 983                 }
 984
 985                 l2->l2_occupancy++;
 986                 l2b->l2b_kva = ptep;
 987                 l2b->l2b_l1idx = l1idx;
 988         }
 989
 990         return (l2b);
 991 }
 992
 993 static PMAP_INLINE void
 994 #ifndef PMAP_INCLUDE_PTE_SYNC
 995 pmap_free_l2_ptp(pt_entry_t *l2)
 996 #else
 997 pmap_free_l2_ptp(boolean_t need_sync, pt_entry_t *l2)
 998 #endif
 999 {
1000 #ifdef PMAP_INCLUDE_PTE_SYNC
1001         /*
1002          * Note: With a write-back cache, we may need to sync this
1003          * L2 table before re-using it.
1004          * This is because it may have belonged to a non-current
1005          * pmap, in which case the cache syncs would have been
1006          * skipped when the pages were being unmapped. If the
1007          * L2 table were then to be immediately re-allocated to
1008          * the *current* pmap, it may well contain stale mappings
1009          * which have not yet been cleared by a cache write-back
1010          * and so would still be visible to the mmu.
1011          */
1012         if (need_sync)
1013                 PTE_SYNC_RANGE(l2, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
1014 #endif
1015         uma_zfree(l2zone, l2);
1016 }
1017 /*
1018  * One or more mappings in the specified L2 descriptor table have just been
1019  * invalidated.
1020  *
1021  * Garbage collect the metadata and descriptor table itself if necessary.
1022  *
1023  * The pmap lock must be acquired when this is called (not necessary
1024  * for the kernel pmap).
1025  */
1026 static void
1027 pmap_free_l2_bucket(pmap_t pm, struct l2_bucket *l2b, u_int count)
1028 {
1029         struct l2_dtable *l2;
1030         pd_entry_t *pl1pd, l1pd;
1031         pt_entry_t *ptep;
1032         u_short l1idx;
1033
1034
1035         /*
1036          * Update the bucket's reference count according to how many
1037          * PTEs the caller has just invalidated.
1038          */
1039         l2b->l2b_occupancy -= count;
1040
1041         /*
1042          * Note:
1043          *
1044          * Level 2 page tables allocated to the kernel pmap are never freed
1045          * as that would require checking all Level 1 page tables and
1046          * removing any references to the Level 2 page table. See also the
1047          * comment elsewhere about never freeing bootstrap L2 descriptors.
1048          *
1049          * We make do with just invalidating the mapping in the L2 table.
1050          *
1051          * This isn't really a big deal in practice and, in fact, leads
1052          * to a performance win over time as we don't need to continually
1053          * alloc/free.
1054          */
1055         if (l2b->l2b_occupancy > 0 || pm == pmap_kernel())
1056                 return;
1057
1058         /*
1059          * There are no more valid mappings in this level 2 page table.
1060          * Go ahead and NULL-out the pointer in the bucket, then
1061          * free the page table.
1062          */
1063         l1idx = l2b->l2b_l1idx;
1064         ptep = l2b->l2b_kva;
1065         l2b->l2b_kva = NULL;
1066
1067         pl1pd = &pm->pm_l1->l1_kva[l1idx];
1068
1069         /*
1070          * If the L1 slot matches the pmap's domain
1071          * number, then invalidate it.
1072          */
1073         l1pd = *pl1pd & (L1_TYPE_MASK | L1_C_DOM_MASK);
1074         if (l1pd == (L1_C_DOM(pm->pm_domain) | L1_TYPE_C)) {
1075                 *pl1pd = 0;
1076                 PTE_SYNC(pl1pd);
1077         }
1078
1079         /*
1080          * Release the L2 descriptor table back to the pool cache.
1081          */
1082 #ifndef PMAP_INCLUDE_PTE_SYNC
1083         pmap_free_l2_ptp(ptep);
1084 #else
1085         pmap_free_l2_ptp(!pmap_is_current(pm), ptep);
1086 #endif
1087
1088         /*
1089          * Update the reference count in the associated l2_dtable
1090          */
1091         l2 = pm->pm_l2[L2_IDX(l1idx)];
1092         if (--l2->l2_occupancy > 0)
1093                 return;
1094
1095         /*
1096          * There are no more valid mappings in any of the Level 1
1097          * slots managed by this l2_dtable. Go ahead and NULL-out
1098          * the pointer in the parent pmap and free the l2_dtable.
1099          */
1100         pm->pm_l2[L2_IDX(l1idx)] = NULL;
1101         pmap_free_l2_dtable(l2);
1102 }
1103
1104 /*
1105  * Pool cache constructors for L2 descriptor tables, metadata and pmap
1106  * structures.
1107  */
1108 static int
1109 pmap_l2ptp_ctor(void *mem, int size, void *arg, int flags)
1110 {
1111 #ifndef PMAP_INCLUDE_PTE_SYNC
1112         struct l2_bucket *l2b;
1113         pt_entry_t *ptep, pte;
1114 #ifdef ARM_USE_SMALL_ALLOC
1115         pd_entry_t *pde;
1116 #endif
1117         vm_offset_t va = (vm_offset_t)mem & ~PAGE_MASK;
1118
1119         /*
1120          * The mappings for these page tables were initially made using
1121          * pmap_kenter() by the pool subsystem. Therefore, the cache-
1122          * mode will not be right for page table mappings. To avoid
1123          * polluting the pmap_kenter() code with a special case for
1124          * page tables, we simply fix up the cache-mode here if it's not
1125          * correct.
1126          */
1127 #ifdef ARM_USE_SMALL_ALLOC
1128         pde = &kernel_pmap->pm_l1->l1_kva[L1_IDX(va)];
1129         if (!l1pte_section_p(*pde)) {
1130 #endif
1131                 l2b = pmap_get_l2_bucket(pmap_kernel(), va);
1132                 ptep = &l2b->l2b_kva[l2pte_index(va)];
1133                 pte = *ptep;
1134
1135                 if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
1136                         /*
1137                          * Page tables must have the cache-mode set to
1138                          * Write-Thru.
1139                          */
1140                         *ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
1141                         PTE_SYNC(ptep);
1142                         cpu_tlb_flushD_SE(va);
1143                         cpu_cpwait();
1144                 }
1145 #ifdef ARM_USE_SMALL_ALLOC
1146         }
1147 #endif
1148 #endif
1149         memset(mem, 0, L2_TABLE_SIZE_REAL);
1150         PTE_SYNC_RANGE(mem, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
1151         return (0);
1152 }
1153
1154 /*
1155  * A bunch of routines to conditionally flush the caches/TLB depending
1156  * on whether the specified pmap actually needs to be flushed at any
1157  * given time.
1158  */
1159 static PMAP_INLINE void
1160 pmap_tlb_flushID_SE(pmap_t pm, vm_offset_t va)
1161 {
1162
1163         if (pmap_is_current(pm))
1164                 cpu_tlb_flushID_SE(va);
1165 }
1166
1167 static PMAP_INLINE void
1168 pmap_tlb_flushD_SE(pmap_t pm, vm_offset_t va)
1169 {
1170
1171         if (pmap_is_current(pm))
1172                 cpu_tlb_flushD_SE(va);
1173 }
1174
1175 static PMAP_INLINE void
1176 pmap_tlb_flushID(pmap_t pm)
1177 {
1178
1179         if (pmap_is_current(pm))
1180                 cpu_tlb_flushID();
1181 }
1182 static PMAP_INLINE void
1183 pmap_tlb_flushD(pmap_t pm)
1184 {
1185
1186         if (pmap_is_current(pm))
1187                 cpu_tlb_flushD();
1188 }
1189
1190 static PMAP_INLINE void
1191 pmap_idcache_wbinv_range(pmap_t pm, vm_offset_t va, vm_size_t len)
1192 {
1193
1194         if (pmap_is_current(pm))
1195                 cpu_idcache_wbinv_range(va, len);
1196 }
1197
1198 static PMAP_INLINE void
1199 pmap_dcache_wb_range(pmap_t pm, vm_offset_t va, vm_size_t len,
1200     boolean_t do_inv, boolean_t rd_only)
1201 {
1202
1203         if (pmap_is_current(pm)) {
1204                 if (do_inv) {
1205                         if (rd_only)
1206                                 cpu_dcache_inv_range(va, len);
1207                         else
1208                                 cpu_dcache_wbinv_range(va, len);
1209                 } else
1210                 if (!rd_only)
1211                         cpu_dcache_wb_range(va, len);
1212         }
1213 }
1214
1215 static PMAP_INLINE void
1216 pmap_idcache_wbinv_all(pmap_t pm)
1217 {
1218
1219         if (pmap_is_current(pm))
1220                 cpu_idcache_wbinv_all();
1221 }
1222
1223 static PMAP_INLINE void
1224 pmap_dcache_wbinv_all(pmap_t pm)
1225 {
1226
1227         if (pmap_is_current(pm))
1228                 cpu_dcache_wbinv_all();
1229 }
1230
1231 /*
1232  * PTE_SYNC_CURRENT:
1233  *
1234  *     Make sure the pte is written out to RAM.
1235  *     We need to do this for one of two cases:
1236  *       - We're dealing with the kernel pmap
1237  *       - There is no pmap active in the cache/tlb.
1238  *       - The specified pmap is 'active' in the cache/tlb.
1239  */
1240 #ifdef PMAP_INCLUDE_PTE_SYNC
1241 #define PTE_SYNC_CURRENT(pm, ptep)      \
1242 do {                                    \
1243         if (PMAP_NEEDS_PTE_SYNC &&      \
1244             pmap_is_current(pm))        \
1245                 PTE_SYNC(ptep);         \
1246 } while (/*CONSTCOND*/0)
1247 #else
1248 #define PTE_SYNC_CURRENT(pm, ptep)      /* nothing */
1249 #endif
1250
1251 /*
1252  * Since we have a virtually indexed cache, we may need to inhibit caching if
1253  * there is more than one mapping and at least one of them is writable.
1254  * Since we purge the cache on every context switch, we only need to check for
1255  * other mappings within the same pmap, or kernel_pmap.
1256  * This function is also called when a page is unmapped, to possibly reenable
1257  * caching on any remaining mappings.
1258  *
1259  * The code implements the following logic, where:
1260  *
1261  * KW = # of kernel read/write pages
1262  * KR = # of kernel read only pages
1263  * UW = # of user read/write pages
1264  * UR = # of user read only pages
1265  *
1266  * KC = kernel mapping is cacheable
1267  * UC = user mapping is cacheable
1268  *
1269  *               KW=0,KR=0  KW=0,KR>0  KW=1,KR=0  KW>1,KR>=0
1270  *             +---------------------------------------------
1271  * UW=0,UR=0   | ---        KC=1       KC=1       KC=0
1272  * UW=0,UR>0   | UC=1       KC=1,UC=1  KC=0,UC=0  KC=0,UC=0
1273  * UW=1,UR=0   | UC=1       KC=0,UC=0  KC=0,UC=0  KC=0,UC=0
1274  * UW>1,UR>=0  | UC=0       KC=0,UC=0  KC=0,UC=0  KC=0,UC=0
1275  */
1276
1277 static const int pmap_vac_flags[4][4] = {
1278         {-1,            0,              0,              PVF_KNC},
1279         {0,             0,              PVF_NC,         PVF_NC},
1280         {0,             PVF_NC,         PVF_NC,         PVF_NC},
1281         {PVF_UNC,       PVF_NC,         PVF_NC,         PVF_NC}
1282 };
1283
1284 static PMAP_INLINE int
1285 pmap_get_vac_flags(const struct vm_page *pg)
1286 {
1287         int kidx, uidx;
1288
1289         kidx = 0;
1290         if (pg->md.kro_mappings || pg->md.krw_mappings > 1)
1291                 kidx |= 1;
1292         if (pg->md.krw_mappings)
1293                 kidx |= 2;
1294
1295         uidx = 0;
1296         if (pg->md.uro_mappings || pg->md.urw_mappings > 1)
1297                 uidx |= 1;
1298         if (pg->md.urw_mappings)
1299                 uidx |= 2;
1300
1301         return (pmap_vac_flags[uidx][kidx]);
1302 }
1303
1304 static __inline void
1305 pmap_vac_me_harder(struct vm_page *pg, pmap_t pm, vm_offset_t va)
1306 {
1307         int nattr;
1308
1309         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1310         nattr = pmap_get_vac_flags(pg);
1311
1312         if (nattr < 0) {
1313                 pg->md.pvh_attrs &= ~PVF_NC;
1314                 return;
1315         }
1316
1317         if (nattr == 0 && (pg->md.pvh_attrs & PVF_NC) == 0) {
1318                 return;
1319         }
1320
1321         if (pm == pmap_kernel())
1322                 pmap_vac_me_kpmap(pg, pm, va);
1323         else
1324                 pmap_vac_me_user(pg, pm, va);
1325
1326         pg->md.pvh_attrs = (pg->md.pvh_attrs & ~PVF_NC) | nattr;
1327 }
1328
1329 static void
1330 pmap_vac_me_kpmap(struct vm_page *pg, pmap_t pm, vm_offset_t va)
1331 {
1332         u_int u_cacheable, u_entries;
1333         struct pv_entry *pv;
1334         pmap_t last_pmap = pm;
1335
1336         /*
1337          * Pass one, see if there are both kernel and user pmaps for
1338          * this page.  Calculate whether there are user-writable or
1339          * kernel-writable pages.
1340          */
1341         u_cacheable = 0;
1342         TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) {
1343                 if (pv->pv_pmap != pm && (pv->pv_flags & PVF_NC) == 0)
1344                         u_cacheable++;
1345         }
1346
1347         u_entries = pg->md.urw_mappings + pg->md.uro_mappings;
1348
1349         /*
1350          * We know we have just been updating a kernel entry, so if
1351          * all user pages are already cacheable, then there is nothing
1352          * further to do.
1353          */
1354         if (pg->md.k_mappings == 0 && u_cacheable == u_entries)
1355                 return;
1356
1357         if (u_entries) {
1358                 /*
1359                  * Scan over the list again, for each entry, if it
1360                  * might not be set correctly, call pmap_vac_me_user
1361                  * to recalculate the settings.
1362                  */
1363                 TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) {
1364                         /*
1365                          * We know kernel mappings will get set
1366                          * correctly in other calls.  We also know
1367                          * that if the pmap is the same as last_pmap
1368                          * then we've just handled this entry.
1369                          */
1370                         if (pv->pv_pmap == pm || pv->pv_pmap == last_pmap)
1371                                 continue;
1372
1373                         /*
1374                          * If there are kernel entries and this page
1375                          * is writable but non-cacheable, then we can
1376                          * skip this entry also.
1377                          */
1378                         if (pg->md.k_mappings &&
1379                             (pv->pv_flags & (PVF_NC | PVF_WRITE)) ==
1380                             (PVF_NC | PVF_WRITE))
1381                                 continue;
1382
1383                         /*
1384                          * Similarly if there are no kernel-writable
1385                          * entries and the page is already
1386                          * read-only/cacheable.
1387                          */
1388                         if (pg->md.krw_mappings == 0 &&
1389                             (pv->pv_flags & (PVF_NC | PVF_WRITE)) == 0)
1390                                 continue;
1391
1392                         /*
1393                          * For some of the remaining cases, we know
1394                          * that we must recalculate, but for others we
1395                          * can't tell if they are correct or not, so
1396                          * we recalculate anyway.
1397                          */
1398                         pmap_vac_me_user(pg, (last_pmap = pv->pv_pmap), 0);
1399                 }
1400
1401                 if (pg->md.k_mappings == 0)
1402                         return;
1403         }
1404
1405         pmap_vac_me_user(pg, pm, va);
1406 }
1407
1408 static void
1409 pmap_vac_me_user(struct vm_page *pg, pmap_t pm, vm_offset_t va)
1410 {
1411         pmap_t kpmap = pmap_kernel();
1412         struct pv_entry *pv, *npv;
1413         struct l2_bucket *l2b;
1414         pt_entry_t *ptep, pte;
1415         u_int entries = 0;
1416         u_int writable = 0;
1417         u_int cacheable_entries = 0;
1418         u_int kern_cacheable = 0;
1419         u_int other_writable = 0;
1420
1421         /*
1422          * Count mappings and writable mappings in this pmap.
1423          * Include kernel mappings as part of our own.
1424          * Keep a pointer to the first one.
1425          */
1426         npv = TAILQ_FIRST(&pg->md.pv_list);
1427         TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) {
1428                 /* Count mappings in the same pmap */
1429                 if (pm == pv->pv_pmap || kpmap == pv->pv_pmap) {
1430                         if (entries++ == 0)
1431                                 npv = pv;
1432
1433                         /* Cacheable mappings */
1434                         if ((pv->pv_flags & PVF_NC) == 0) {
1435                                 cacheable_entries++;
1436                                 if (kpmap == pv->pv_pmap)
1437                                         kern_cacheable++;
1438                         }
1439
1440                         /* Writable mappings */
1441                         if (pv->pv_flags & PVF_WRITE)
1442                                 ++writable;
1443                 } else
1444                 if (pv->pv_flags & PVF_WRITE)
1445                         other_writable = 1;
1446         }
1447
1448         /*
1449          * Enable or disable caching as necessary.
1450          * Note: the first entry might be part of the kernel pmap,
1451          * so we can't assume this is indicative of the state of the
1452          * other (maybe non-kpmap) entries.
1453          */
1454         if ((entries > 1 && writable) ||
1455             (entries > 0 && pm == kpmap && other_writable)) {
1456                 if (cacheable_entries == 0)
1457                         return;
1458
1459                 for (pv = npv; pv; pv = TAILQ_NEXT(pv, pv_list)) {
1460                         if ((pm != pv->pv_pmap && kpmap != pv->pv_pmap) ||
1461                             (pv->pv_flags & PVF_NC))
1462                                 continue;
1463
1464                         pv->pv_flags |= PVF_NC;
1465
1466                         l2b = pmap_get_l2_bucket(pv->pv_pmap, pv->pv_va);
1467                         ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
1468                         pte = *ptep & ~L2_S_CACHE_MASK;
1469
1470                         if ((va != pv->pv_va || pm != pv->pv_pmap) &&
1471                             l2pte_valid(pte)) {
1472                                 if (PV_BEEN_EXECD(pv->pv_flags)) {
1473                                         pmap_idcache_wbinv_range(pv->pv_pmap,
1474                                             pv->pv_va, PAGE_SIZE);
1475                                         pmap_tlb_flushID_SE(pv->pv_pmap,
1476                                             pv->pv_va);
1477                                 } else
1478                                 if (PV_BEEN_REFD(pv->pv_flags)) {
1479                                         pmap_dcache_wb_range(pv->pv_pmap,
1480                                             pv->pv_va, PAGE_SIZE, TRUE,
1481                                             (pv->pv_flags & PVF_WRITE) == 0);
1482                                         pmap_tlb_flushD_SE(pv->pv_pmap,
1483                                             pv->pv_va);
1484                                 }
1485                         }
1486
1487                         *ptep = pte;
1488                         PTE_SYNC_CURRENT(pv->pv_pmap, ptep);
1489                 }
1490                 cpu_cpwait();
1491         } else
1492         if (entries > cacheable_entries) {
1493                 /*
1494                  * Turn cacheing back on for some pages.  If it is a kernel
1495                  * page, only do so if there are no other writable pages.
1496                  */
1497                 for (pv = npv; pv; pv = TAILQ_NEXT(pv, pv_list)) {
1498                         if (!(pv->pv_flags & PVF_NC) || (pm != pv->pv_pmap &&
1499                             (kpmap != pv->pv_pmap || other_writable)))
1500                                 continue;
1501
1502                         pv->pv_flags &= ~PVF_NC;
1503
1504                         l2b = pmap_get_l2_bucket(pv->pv_pmap, pv->pv_va);
1505                         ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
1506                         pte = (*ptep & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode;
1507
1508                         if (l2pte_valid(pte)) {
1509                                 if (PV_BEEN_EXECD(pv->pv_flags)) {
1510                                         pmap_tlb_flushID_SE(pv->pv_pmap,
1511                                             pv->pv_va);
1512                                 } else
1513                                 if (PV_BEEN_REFD(pv->pv_flags)) {
1514                                         pmap_tlb_flushD_SE(pv->pv_pmap,
1515                                             pv->pv_va);
1516                                 }
1517                         }
1518
1519                         *ptep = pte;
1520                         PTE_SYNC_CURRENT(pv->pv_pmap, ptep);
1521                 }
1522         }
1523 }
1524
1525 /*
1526  * Modify pte bits for all ptes corresponding to the given physical address.
1527  * We use `maskbits' rather than `clearbits' because we're always passing
1528  * constants and the latter would require an extra inversion at run-time.
1529  */
1530 static int
1531 pmap_clearbit(struct vm_page *pg, u_int maskbits)
1532 {
1533         struct l2_bucket *l2b;
1534         struct pv_entry *pv;
1535         pt_entry_t *ptep, npte, opte;
1536         pmap_t pm;
1537         vm_offset_t va;
1538         u_int oflags;
1539         int count = 0;
1540
1541         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1542
1543         /*
1544          * Clear saved attributes (modify, reference)
1545          */
1546         pg->md.pvh_attrs &= ~(maskbits & (PVF_MOD | PVF_REF));
1547
1548         if (TAILQ_EMPTY(&pg->md.pv_list)) {
1549                 return (0);
1550         }
1551
1552         /*
1553          * Loop over all current mappings setting/clearing as appropos
1554          */
1555         TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) {
1556                 va = pv->pv_va;
1557                 pm = pv->pv_pmap;
1558                 oflags = pv->pv_flags;
1559                 pv->pv_flags &= ~maskbits;
1560
1561                 PMAP_LOCK(pm);
1562
1563                 l2b = pmap_get_l2_bucket(pm, va);
1564
1565                 ptep = &l2b->l2b_kva[l2pte_index(va)];
1566                 npte = opte = *ptep;
1567
1568                 if (maskbits & (PVF_WRITE|PVF_MOD)) {
1569                         if ((pv->pv_flags & PVF_NC)) {
1570                                 /*
1571                                  * Entry is not cacheable:
1572                                  *
1573                                  * Don't turn caching on again if this is a
1574                                  * modified emulation. This would be
1575                                  * inconsitent with the settings created by
1576                                  * pmap_vac_me_harder(). Otherwise, it's safe
1577                                  * to re-enable cacheing.
1578                                  *
1579                                  * There's no need to call pmap_vac_me_harder()
1580                                  * here: all pages are losing their write
1581                                  * permission.
1582                                  */
1583                                 if (maskbits & PVF_WRITE) {
1584                                         npte |= pte_l2_s_cache_mode;
1585                                         pv->pv_flags &= ~PVF_NC;
1586                                 }
1587                         } else
1588                         if (opte & L2_S_PROT_W) {
1589                                 vm_page_dirty(pg);
1590                                 /*
1591                                  * Entry is writable/cacheable: check if pmap
1592                                  * is current if it is flush it, otherwise it
1593                                  * won't be in the cache
1594                                  */
1595                                 if (PV_BEEN_EXECD(oflags))
1596                                         pmap_idcache_wbinv_range(pm, pv->pv_va,
1597                                             PAGE_SIZE);
1598                                 else
1599                                 if (PV_BEEN_REFD(oflags))
1600                                         pmap_dcache_wb_range(pm, pv->pv_va,
1601                                             PAGE_SIZE,
1602                                             (maskbits & PVF_REF) ? TRUE : FALSE,
1603                                             FALSE);
1604                         }
1605
1606                         /* make the pte read only */
1607                         npte &= ~L2_S_PROT_W;
1608
1609                         if (maskbits & PVF_WRITE) {
1610                                 /*
1611                                  * Keep alias accounting up to date
1612                                  */
1613                                 if (pv->pv_pmap == pmap_kernel()) {
1614                                         if (oflags & PVF_WRITE) {
1615                                                 pg->md.krw_mappings--;
1616                                                 pg->md.kro_mappings++;
1617                                         }
1618                                 } else
1619                                 if (oflags & PVF_WRITE) {
1620                                         pg->md.urw_mappings--;
1621                                         pg->md.uro_mappings++;
1622                                 }
1623                         }
1624                 }
1625
1626                 if (maskbits & PVF_REF) {
1627                         if ((pv->pv_flags & PVF_NC) == 0 &&
1628                             (maskbits & (PVF_WRITE|PVF_MOD)) == 0) {
1629                                 /*
1630                                  * Check npte here; we may have already
1631                                  * done the wbinv above, and the validity
1632                                  * of the PTE is the same for opte and
1633                                  * npte.
1634                                  */
1635                                 if (npte & L2_S_PROT_W) {
1636                                         if (PV_BEEN_EXECD(oflags))
1637                                                 pmap_idcache_wbinv_range(pm,
1638                                                     pv->pv_va, PAGE_SIZE);
1639                                         else
1640                                         if (PV_BEEN_REFD(oflags))
1641                                                 pmap_dcache_wb_range(pm,
1642                                                     pv->pv_va, PAGE_SIZE,
1643                                                     TRUE, FALSE);
1644                                 } else
1645                                 if ((npte & L2_TYPE_MASK) != L2_TYPE_INV) {
1646                                         /* XXXJRT need idcache_inv_range */
1647                                         if (PV_BEEN_EXECD(oflags))
1648                                                 pmap_idcache_wbinv_range(pm,
1649                                                     pv->pv_va, PAGE_SIZE);
1650                                         else
1651                                         if (PV_BEEN_REFD(oflags))
1652                                                 pmap_dcache_wb_range(pm,
1653                                                     pv->pv_va, PAGE_SIZE,
1654                                                     TRUE, TRUE);
1655                                 }
1656                         }
1657
1658                         /*
1659                          * Make the PTE invalid so that we will take a
1660                          * page fault the next time the mapping is
1661                          * referenced.
1662                          */
1663                         npte &= ~L2_TYPE_MASK;
1664                         npte |= L2_TYPE_INV;
1665                 }
1666
1667                 if (npte != opte) {
1668                         count++;
1669                         *ptep = npte;
1670                         PTE_SYNC(ptep);
1671                         /* Flush the TLB entry if a current pmap. */
1672                         if (PV_BEEN_EXECD(oflags))
1673                                 pmap_tlb_flushID_SE(pm, pv->pv_va);
1674                         else
1675                         if (PV_BEEN_REFD(oflags))
1676                                 pmap_tlb_flushD_SE(pm, pv->pv_va);
1677                 }
1678
1679                 PMAP_UNLOCK(pm);
1680
1681         }
1682
1683         if (maskbits & PVF_WRITE)
1684                 vm_page_flag_clear(pg, PG_WRITEABLE);
1685         return (count);
1686 }
1687
1688 /*
1689  * main pv_entry manipulation functions:
1690  *   pmap_enter_pv: enter a mapping onto a vm_page list
1691  *   pmap_remove_pv: remove a mappiing from a vm_page list
1692  *
1693  * NOTE: pmap_enter_pv expects to lock the pvh itself
1694  *       pmap_remove_pv expects te caller to lock the pvh before calling
1695  */
1696
1697 /*
1698  * pmap_enter_pv: enter a mapping onto a vm_page lst
1699  *
1700  * => caller should hold the proper lock on pmap_main_lock
1701  * => caller should have pmap locked
1702  * => we will gain the lock on the vm_page and allocate the new pv_entry
1703  * => caller should adjust ptp's wire_count before calling
1704  * => caller should not adjust pmap's wire_count
1705  */
1706 static void
1707 pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, pmap_t pm,
1708     vm_offset_t va, u_int flags)
1709 {
1710
1711         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1712         PMAP_ASSERT_LOCKED(pm);
1713         pve->pv_pmap = pm;
1714         pve->pv_va = va;
1715         pve->pv_flags = flags;
1716
1717         TAILQ_INSERT_HEAD(&pg->md.pv_list, pve, pv_list);
1718         TAILQ_INSERT_HEAD(&pm->pm_pvlist, pve, pv_plist);
1719         pg->md.pvh_attrs |= flags & (PVF_REF | PVF_MOD);
1720         if (pm == pmap_kernel()) {
1721                 if (flags & PVF_WRITE)
1722                         pg->md.krw_mappings++;
1723                 else
1724                         pg->md.kro_mappings++;
1725         }
1726         if (flags & PVF_WRITE)
1727                 pg->md.urw_mappings++;
1728         else
1729                 pg->md.uro_mappings++;
1730         pg->md.pv_list_count++;
1731         if (pve->pv_flags & PVF_WIRED)
1732                 ++pm->pm_stats.wired_count;
1733         vm_page_flag_set(pg, PG_REFERENCED);
1734 }
1735
1736 /*
1737  *
1738  * pmap_find_pv: Find a pv entry
1739  *
1740  * => caller should hold lock on vm_page
1741  */
1742 static PMAP_INLINE struct pv_entry *
1743 pmap_find_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va)
1744 {
1745         struct pv_entry *pv;
1746
1747         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1748         TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list)
1749             if (pm == pv->pv_pmap && va == pv->pv_va)
1750                     break;
1751         return (pv);
1752 }
1753
1754 /*
1755  * vector_page_setprot:
1756  *
1757  *      Manipulate the protection of the vector page.
1758  */
1759 void
1760 vector_page_setprot(int prot)
1761 {
1762         struct l2_bucket *l2b;
1763         pt_entry_t *ptep;
1764
1765         l2b = pmap_get_l2_bucket(pmap_kernel(), vector_page);
1766
1767         ptep = &l2b->l2b_kva[l2pte_index(vector_page)];
1768
1769         *ptep = (*ptep & ~L1_S_PROT_MASK) | L2_S_PROT(PTE_KERNEL, prot);
1770         PTE_SYNC(ptep);
1771         cpu_tlb_flushD_SE(vector_page);
1772         cpu_cpwait();
1773 }
1774
1775 /*
1776  * pmap_remove_pv: try to remove a mapping from a pv_list
1777  *
1778  * => caller should hold proper lock on pmap_main_lock
1779  * => pmap should be locked
1780  * => caller should hold lock on vm_page [so that attrs can be adjusted]
1781  * => caller should adjust ptp's wire_count and free PTP if needed
1782  * => caller should NOT adjust pmap's wire_count
1783  * => we return the removed pve
1784  */
1785
1786 static void
1787 pmap_nuke_pv(struct vm_page *pg, pmap_t pm, struct pv_entry *pve)
1788 {
1789
1790         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1791         PMAP_ASSERT_LOCKED(pm);
1792         TAILQ_REMOVE(&pg->md.pv_list, pve, pv_list);
1793         TAILQ_REMOVE(&pm->pm_pvlist, pve, pv_plist);
1794         if (pve->pv_flags & PVF_WIRED)
1795                 --pm->pm_stats.wired_count;
1796         pg->md.pv_list_count--;
1797         if (pg->md.pvh_attrs & PVF_MOD)
1798                 vm_page_dirty(pg);
1799         if (pm == pmap_kernel()) {
1800                 if (pve->pv_flags & PVF_WRITE)
1801                         pg->md.krw_mappings--;
1802                 else
1803                         pg->md.kro_mappings--;
1804         } else
1805                 if (pve->pv_flags & PVF_WRITE)
1806                         pg->md.urw_mappings--;
1807                 else
1808                         pg->md.uro_mappings--;
1809         if (TAILQ_FIRST(&pg->md.pv_list) == NULL ||
1810             (pg->md.krw_mappings == 0 && pg->md.urw_mappings == 0)) {
1811                 pg->md.pvh_attrs &= ~PVF_MOD;
1812                 if (TAILQ_FIRST(&pg->md.pv_list) == NULL)
1813                         pg->md.pvh_attrs &= ~PVF_REF;
1814                 vm_page_flag_clear(pg, PG_WRITEABLE);
1815         }
1816         if (TAILQ_FIRST(&pg->md.pv_list))
1817                 vm_page_flag_set(pg, PG_REFERENCED);
1818         if (pve->pv_flags & PVF_WRITE)
1819                 pmap_vac_me_harder(pg, pm, 0);
1820 }
1821
1822 static struct pv_entry *
1823 pmap_remove_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va)
1824 {
1825         struct pv_entry *pve;
1826
1827         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1828         pve = TAILQ_FIRST(&pg->md.pv_list);
1829
1830         while (pve) {
1831                 if (pve->pv_pmap == pm && pve->pv_va == va) {   /* match? */
1832                         pmap_nuke_pv(pg, pm, pve);
1833                         break;
1834                 }
1835                 pve = TAILQ_NEXT(pve, pv_list);
1836         }
1837
1838         return(pve);                            /* return removed pve */
1839 }
1840 /*
1841  *
1842  * pmap_modify_pv: Update pv flags
1843  *
1844  * => caller should hold lock on vm_page [so that attrs can be adjusted]
1845  * => caller should NOT adjust pmap's wire_count
1846  * => caller must call pmap_vac_me_harder() if writable status of a page
1847  *    may have changed.
1848  * => we return the old flags
1849  *
1850  * Modify a physical-virtual mapping in the pv table
1851  */
1852 static u_int
1853 pmap_modify_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va,
1854     u_int clr_mask, u_int set_mask)
1855 {
1856         struct pv_entry *npv;
1857         u_int flags, oflags;
1858
1859         PMAP_ASSERT_LOCKED(pm);
1860         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1861         if ((npv = pmap_find_pv(pg, pm, va)) == NULL)
1862                 return (0);
1863
1864         /*
1865          * There is at least one VA mapping this page.
1866          */
1867
1868         if (clr_mask & (PVF_REF | PVF_MOD))
1869                 pg->md.pvh_attrs |= set_mask & (PVF_REF | PVF_MOD);
1870
1871         oflags = npv->pv_flags;
1872         npv->pv_flags = flags = (oflags & ~clr_mask) | set_mask;
1873
1874         if ((flags ^ oflags) & PVF_WIRED) {
1875                 if (flags & PVF_WIRED)
1876                         ++pm->pm_stats.wired_count;
1877                 else
1878                         --pm->pm_stats.wired_count;
1879         }
1880
1881         if ((flags ^ oflags) & PVF_WRITE) {
1882                 if (pm == pmap_kernel()) {
1883                         if (flags & PVF_WRITE) {
1884                                 pg->md.krw_mappings++;
1885                                 pg->md.kro_mappings--;
1886                         } else {
1887                                 pg->md.kro_mappings++;
1888                                 pg->md.krw_mappings--;
1889                         }
1890                 } else
1891                 if (flags & PVF_WRITE) {
1892                         pg->md.urw_mappings++;
1893                         pg->md.uro_mappings--;
1894                 } else {
1895                         pg->md.uro_mappings++;
1896                         pg->md.urw_mappings--;
1897                 }
1898                 if (pg->md.krw_mappings == 0 && pg->md.urw_mappings == 0) {
1899                         pg->md.pvh_attrs &= ~PVF_MOD;
1900                         vm_page_flag_clear(pg, PG_WRITEABLE);
1901                 }
1902                 pmap_vac_me_harder(pg, pm, 0);
1903         }
1904
1905         return (oflags);
1906 }
1907
1908 /* Function to set the debug level of the pmap code */
1909 #ifdef PMAP_DEBUG
1910 void
1911 pmap_debug(int level)
1912 {
1913         pmap_debug_level = level;
1914         dprintf("pmap_debug: level=%d\n", pmap_debug_level);
1915 }
1916 #endif  /* PMAP_DEBUG */
1917
1918 void
1919 pmap_pinit0(struct pmap *pmap)
1920 {
1921         PDEBUG(1, printf("pmap_pinit0: pmap = %08x\n", (u_int32_t) pmap));
1922
1923         dprintf("pmap_pinit0: pmap = %08x, pm_pdir = %08x\n",
1924                 (u_int32_t) pmap, (u_int32_t) pmap->pm_pdir);
1925         bcopy(kernel_pmap, pmap, sizeof(*pmap));
1926         bzero(&pmap->pm_mtx, sizeof(pmap->pm_mtx));
1927         PMAP_LOCK_INIT(pmap);
1928 }
1929
1930 /*
1931  *      Initialize a vm_page's machine-dependent fields.
1932  */
1933 void
1934 pmap_page_init(vm_page_t m)
1935 {
1936
1937         TAILQ_INIT(&m->md.pv_list);
1938         m->md.pv_list_count = 0;
1939 }
1940
1941 /*
1942  *      Initialize the pmap module.
1943  *      Called by vm_init, to initialize any structures that the pmap
1944  *      system needs to map virtual memory.
1945  */
1946 void
1947 pmap_init(void)
1948 {
1949         int shpgperproc = PMAP_SHPGPERPROC;
1950
1951         PDEBUG(1, printf("pmap_init: phys_start = %08x\n"));
1952
1953         /*
1954          * init the pv free list
1955          */
1956         pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL,
1957             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
1958         /*
1959          * Now it is safe to enable pv_table recording.
1960          */
1961         PDEBUG(1, printf("pmap_init: done!\n"));
1962
1963         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
1964
1965         pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
1966         pv_entry_high_water = 9 * (pv_entry_max / 10);
1967         l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor,
1968             NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
1969         l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable),
1970             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1971             UMA_ZONE_VM | UMA_ZONE_NOFREE);
1972
1973         uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
1974
1975 }
1976
1977 int
1978 pmap_fault_fixup(pmap_t pm, vm_offset_t va, vm_prot_t ftype, int user)
1979 {
1980         struct l2_dtable *l2;
1981         struct l2_bucket *l2b;
1982         pd_entry_t *pl1pd, l1pd;
1983         pt_entry_t *ptep, pte;
1984         vm_paddr_t pa;
1985         u_int l1idx;
1986         int rv = 0;
1987
1988         l1idx = L1_IDX(va);
1989         vm_page_lock_queues();
1990         PMAP_LOCK(pm);
1991
1992         /*
1993          * If there is no l2_dtable for this address, then the process
1994          * has no business accessing it.
1995          *
1996          * Note: This will catch userland processes trying to access
1997          * kernel addresses.
1998          */
1999         l2 = pm->pm_l2[L2_IDX(l1idx)];
2000         if (l2 == NULL)
2001                 goto out;
2002
2003         /*
2004          * Likewise if there is no L2 descriptor table
2005          */
2006         l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
2007         if (l2b->l2b_kva == NULL)
2008                 goto out;
2009
2010         /*
2011          * Check the PTE itself.
2012          */
2013         ptep = &l2b->l2b_kva[l2pte_index(va)];
2014         pte = *ptep;
2015         if (pte == 0)
2016                 goto out;
2017
2018         /*
2019          * Catch a userland access to the vector page mapped at 0x0
2020          */
2021         if (user && (pte & L2_S_PROT_U) == 0)
2022                 goto out;
2023         if (va == vector_page)
2024                 goto out;
2025
2026         pa = l2pte_pa(pte);
2027
2028         if ((ftype & VM_PROT_WRITE) && (pte & L2_S_PROT_W) == 0) {
2029                 /*
2030                  * This looks like a good candidate for "page modified"
2031                  * emulation...
2032                  */
2033                 struct pv_entry *pv;
2034                 struct vm_page *pg;
2035
2036                 /* Extract the physical address of the page */
2037                 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) {
2038                         goto out;
2039                 }
2040                 /* Get the current flags for this page. */
2041
2042                 pv = pmap_find_pv(pg, pm, va);
2043                 if (pv == NULL) {
2044                         goto out;
2045                 }
2046
2047                 /*
2048                  * Do the flags say this page is writable? If not then it
2049                  * is a genuine write fault. If yes then the write fault is
2050                  * our fault as we did not reflect the write access in the
2051                  * PTE. Now we know a write has occurred we can correct this
2052                  * and also set the modified bit
2053                  */
2054                 if ((pv->pv_flags & PVF_WRITE) == 0) {
2055                         goto out;
2056                 }
2057
2058                 pg->md.pvh_attrs |= PVF_REF | PVF_MOD;
2059                 vm_page_dirty(pg);
2060                 pv->pv_flags |= PVF_REF | PVF_MOD;
2061
2062                 /*
2063                  * Re-enable write permissions for the page.  No need to call
2064                  * pmap_vac_me_harder(), since this is just a
2065                  * modified-emulation fault, and the PVF_WRITE bit isn't
2066                  * changing. We've already set the cacheable bits based on
2067                  * the assumption that we can write to this page.
2068                  */
2069                 *ptep = (pte & ~L2_TYPE_MASK) | L2_S_PROTO | L2_S_PROT_W;
2070                 PTE_SYNC(ptep);
2071                 rv = 1;
2072         } else
2073         if ((pte & L2_TYPE_MASK) == L2_TYPE_INV) {
2074                 /*
2075                  * This looks like a good candidate for "page referenced"
2076                  * emulation.
2077                  */
2078                 struct pv_entry *pv;
2079                 struct vm_page *pg;
2080
2081                 /* Extract the physical address of the page */
2082                 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
2083                         goto out;
2084                 /* Get the current flags for this page. */
2085
2086                 pv = pmap_find_pv(pg, pm, va);
2087                 if (pv == NULL)
2088                         goto out;
2089
2090                 pg->md.pvh_attrs |= PVF_REF;
2091                 pv->pv_flags |= PVF_REF;
2092
2093
2094                 *ptep = (pte & ~L2_TYPE_MASK) | L2_S_PROTO;
2095                 PTE_SYNC(ptep);
2096                 rv = 1;
2097         }
2098
2099         /*
2100          * We know there is a valid mapping here, so simply
2101          * fix up the L1 if necessary.
2102          */
2103         pl1pd = &pm->pm_l1->l1_kva[l1idx];
2104         l1pd = l2b->l2b_phys | L1_C_DOM(pm->pm_domain) | L1_C_PROTO;
2105         if (*pl1pd != l1pd) {
2106                 *pl1pd = l1pd;
2107                 PTE_SYNC(pl1pd);
2108                 rv = 1;
2109         }
2110
2111 #ifdef CPU_SA110
2112         /*
2113          * There are bugs in the rev K SA110.  This is a check for one
2114          * of them.
2115          */
2116         if (rv == 0 && curcpu()->ci_arm_cputype == CPU_ID_SA110 &&
2117             curcpu()->ci_arm_cpurev < 3) {
2118                 /* Always current pmap */
2119                 if (l2pte_valid(pte)) {
2120                         extern int kernel_debug;
2121                         if (kernel_debug & 1) {
2122                                 struct proc *p = curlwp->l_proc;
2123                                 printf("prefetch_abort: page is already "
2124                                     "mapped - pte=%p *pte=%08x\n", ptep, pte);
2125                                 printf("prefetch_abort: pc=%08lx proc=%p "
2126                                     "process=%s\n", va, p, p->p_comm);
2127                                 printf("prefetch_abort: far=%08x fs=%x\n",
2128                                     cpu_faultaddress(), cpu_faultstatus());
2129                         }
2130 #ifdef DDB
2131                         if (kernel_debug & 2)
2132                                 Debugger();
2133 #endif
2134                         rv = 1;
2135                 }
2136         }
2137 #endif /* CPU_SA110 */
2138
2139 #ifdef DEBUG
2140         /*
2141          * If 'rv == 0' at this point, it generally indicates that there is a
2142          * stale TLB entry for the faulting address. This happens when two or
2143          * more processes are sharing an L1. Since we don't flush the TLB on
2144          * a context switch between such processes, we can take domain faults
2145          * for mappings which exist at the same VA in both processes. EVEN IF
2146          * WE'VE RECENTLY FIXED UP THE CORRESPONDING L1 in pmap_enter(), for
2147          * example.
2148          *
2149          * This is extremely likely to happen if pmap_enter() updated the L1
2150          * entry for a recently entered mapping. In this case, the TLB is
2151          * flushed for the new mapping, but there may still be TLB entries for
2152          * other mappings belonging to other processes in the 1MB range
2153          * covered by the L1 entry.
2154          *
2155          * Since 'rv == 0', we know that the L1 already contains the correct
2156          * value, so the fault must be due to a stale TLB entry.
2157          *
2158          * Since we always need to flush the TLB anyway in the case where we
2159          * fixed up the L1, or frobbed the L2 PTE, we effectively deal with
2160          * stale TLB entries dynamically.
2161          *
2162          * However, the above condition can ONLY happen if the current L1 is
2163          * being shared. If it happens when the L1 is unshared, it indicates
2164          * that other parts of the pmap are not doing their job WRT managing
2165          * the TLB.
2166          */
2167         if (rv == 0 && pm->pm_l1->l1_domain_use_count == 1) {
2168                 extern int last_fault_code;
2169                 printf("fixup: pm %p, va 0x%lx, ftype %d - nothing to do!\n",
2170                     pm, va, ftype);
2171                 printf("fixup: l2 %p, l2b %p, ptep %p, pl1pd %p\n",
2172                     l2, l2b, ptep, pl1pd);
2173                 printf("fixup: pte 0x%x, l1pd 0x%x, last code 0x%x\n",
2174                     pte, l1pd, last_fault_code);
2175 #ifdef DDB
2176                 Debugger();
2177 #endif
2178         }
2179 #endif
2180
2181         cpu_tlb_flushID_SE(va);
2182         cpu_cpwait();
2183
2184         rv = 1;
2185
2186 out:
2187         vm_page_unlock_queues();
2188         PMAP_UNLOCK(pm);
2189         return (rv);
2190 }
2191
2192 void
2193 pmap_postinit(void)
2194 {
2195         struct l2_bucket *l2b;
2196         struct l1_ttable *l1;
2197         pd_entry_t *pl1pt;
2198         pt_entry_t *ptep, pte;
2199         vm_offset_t va, eva;
2200         u_int loop, needed;
2201
2202         needed = (maxproc / PMAP_DOMAINS) + ((maxproc % PMAP_DOMAINS) ? 1 : 0);
2203         needed -= 1;
2204         l1 = malloc(sizeof(*l1) * needed, M_VMPMAP, M_WAITOK);
2205
2206         for (loop = 0; loop < needed; loop++, l1++) {
2207                 /* Allocate a L1 page table */
2208                 va = (vm_offset_t)contigmalloc(L1_TABLE_SIZE, M_VMPMAP, 0, 0x0,
2209                     0xffffffff, L1_TABLE_SIZE, 0);
2210
2211                 if (va == 0)
2212                         panic("Cannot allocate L1 KVM");
2213
2214                 eva = va + L1_TABLE_SIZE;
2215                 pl1pt = (pd_entry_t *)va;
2216
2217                 while (va < eva) {
2218                                 l2b = pmap_get_l2_bucket(pmap_kernel(), va);
2219                                 ptep = &l2b->l2b_kva[l2pte_index(va)];
2220                                 pte = *ptep;
2221                                 pte = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
2222                                 *ptep = pte;
2223                                 PTE_SYNC(ptep);
2224                                 cpu_tlb_flushD_SE(va);
2225
2226                                 va += PAGE_SIZE;
2227                 }
2228                 pmap_init_l1(l1, pl1pt);
2229         }
2230
2231
2232 #ifdef DEBUG
2233         printf("pmap_postinit: Allocated %d static L1 descriptor tables\n",
2234             needed);
2235 #endif
2236 }
2237
2238 /*
2239  * This is used to stuff certain critical values into the PCB where they
2240  * can be accessed quickly from cpu_switch() et al.
2241  */
2242 void
2243 pmap_set_pcb_pagedir(pmap_t pm, struct pcb *pcb)
2244 {
2245         struct l2_bucket *l2b;
2246
2247         pcb->pcb_pagedir = pm->pm_l1->l1_physaddr;
2248         pcb->pcb_dacr = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) |
2249             (DOMAIN_CLIENT << (pm->pm_domain * 2));
2250
2251         if (vector_page < KERNBASE) {
2252                 pcb->pcb_pl1vec = &pm->pm_l1->l1_kva[L1_IDX(vector_page)];
2253                 l2b = pmap_get_l2_bucket(pm, vector_page);
2254                 pcb->pcb_l1vec = l2b->l2b_phys | L1_C_PROTO |
2255                     L1_C_DOM(pm->pm_domain) | L1_C_DOM(PMAP_DOMAIN_KERNEL);
2256         } else
2257                 pcb->pcb_pl1vec = NULL;
2258 }
2259
2260 void
2261 pmap_activate(struct thread *td)
2262 {
2263         pmap_t pm;
2264         struct pcb *pcb;
2265
2266         pm = vmspace_pmap(td->td_proc->p_vmspace);
2267         pcb = td->td_pcb;
2268
2269         critical_enter();
2270         pmap_set_pcb_pagedir(pm, pcb);
2271
2272         if (td == curthread) {
2273                 u_int cur_dacr, cur_ttb;
2274
2275                 __asm __volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cur_ttb));
2276                 __asm __volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(cur_dacr));
2277
2278                 cur_ttb &= ~(L1_TABLE_SIZE - 1);
2279
2280                 if (cur_ttb == (u_int)pcb->pcb_pagedir &&
2281                     cur_dacr == pcb->pcb_dacr) {
2282                         /*
2283                          * No need to switch address spaces.
2284                          */
2285                         critical_exit();
2286                         return;
2287                 }
2288
2289
2290                 /*
2291                  * We MUST, I repeat, MUST fix up the L1 entry corresponding
2292                  * to 'vector_page' in the incoming L1 table before switching
2293                  * to it otherwise subsequent interrupts/exceptions (including
2294                  * domain faults!) will jump into hyperspace.
2295                  */
2296                 if (pcb->pcb_pl1vec) {
2297
2298                         *pcb->pcb_pl1vec = pcb->pcb_l1vec;
2299                         /*
2300                          * Don't need to PTE_SYNC() at this point since
2301                          * cpu_setttb() is about to flush both the cache
2302                          * and the TLB.
2303                          */
2304                 }
2305
2306                 cpu_domains(pcb->pcb_dacr);
2307                 cpu_setttb(pcb->pcb_pagedir);
2308         }
2309         critical_exit();
2310 }
2311
2312 static int
2313 pmap_set_pt_cache_mode(pd_entry_t *kl1, vm_offset_t va)
2314 {
2315         pd_entry_t *pdep, pde;
2316         pt_entry_t *ptep, pte;
2317         vm_offset_t pa;
2318         int rv = 0;
2319
2320         /*
2321          * Make sure the descriptor itself has the correct cache mode
2322          */
2323         pdep = &kl1[L1_IDX(va)];
2324         pde = *pdep;
2325
2326         if (l1pte_section_p(pde)) {
2327                 if ((pde & L1_S_CACHE_MASK) != pte_l1_s_cache_mode_pt) {
2328                         *pdep = (pde & ~L1_S_CACHE_MASK) |
2329                             pte_l1_s_cache_mode_pt;
2330                         PTE_SYNC(pdep);
2331                         cpu_dcache_wbinv_range((vm_offset_t)pdep,
2332                             sizeof(*pdep));
2333                         rv = 1;
2334                 }
2335         } else {
2336                 pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK);
2337                 ptep = (pt_entry_t *)kernel_pt_lookup(pa);
2338                 if (ptep == NULL)
2339                         panic("pmap_bootstrap: No L2 for L2 @ va %p\n", ptep);
2340
2341                 ptep = &ptep[l2pte_index(va)];
2342                 pte = *ptep;
2343                 if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
2344                         *ptep = (pte & ~L2_S_CACHE_MASK) |
2345                             pte_l2_s_cache_mode_pt;
2346                         PTE_SYNC(ptep);
2347                         cpu_dcache_wbinv_range((vm_offset_t)ptep,
2348                             sizeof(*ptep));
2349                         rv = 1;
2350                 }
2351         }
2352
2353         return (rv);
2354 }
2355
2356 static void
2357 pmap_alloc_specials(vm_offset_t *availp, int pages, vm_offset_t *vap,
2358     pt_entry_t **ptep)
2359 {
2360         vm_offset_t va = *availp;
2361         struct l2_bucket *l2b;
2362
2363         if (ptep) {
2364                 l2b = pmap_get_l2_bucket(pmap_kernel(), va);
2365                 if (l2b == NULL)
2366                         panic("pmap_alloc_specials: no l2b for 0x%x", va);
2367
2368                 *ptep = &l2b->l2b_kva[l2pte_index(va)];
2369         }
2370
2371         *vap = va;
2372         *availp = va + (PAGE_SIZE * pages);
2373 }
2374
2375 /*
2376  *      Bootstrap the system enough to run with virtual memory.
2377  *
2378  *      On the arm this is called after mapping has already been enabled
2379  *      and just syncs the pmap module with what has already been done.
2380  *      [We can't call it easily with mapping off since the kernel is not
2381  *      mapped with PA == VA, hence we would have to relocate every address
2382  *      from the linked base (virtual) address "KERNBASE" to the actual
2383  *      (physical) address starting relative to 0]
2384  */
2385 #define PMAP_STATIC_L2_SIZE 16
2386 #ifdef ARM_USE_SMALL_ALLOC
2387 extern struct mtx smallalloc_mtx;
2388 #endif
2389
2390 void
2391 pmap_bootstrap(vm_offset_t firstaddr, vm_offset_t lastaddr, struct pv_addr *l1pt)
2392 {
2393         static struct l1_ttable static_l1;
2394         static struct l2_dtable static_l2[PMAP_STATIC_L2_SIZE];
2395         struct l1_ttable *l1 = &static_l1;
2396         struct l2_dtable *l2;
2397         struct l2_bucket *l2b;
2398         pd_entry_t pde;
2399         pd_entry_t *kernel_l1pt = (pd_entry_t *)l1pt->pv_va;
2400         pt_entry_t *ptep;
2401         vm_paddr_t pa;
2402         vm_offset_t va;
2403         vm_size_t size;
2404         int l1idx, l2idx, l2next = 0;
2405
2406         PDEBUG(1, printf("firstaddr = %08x, loadaddr = %08x\n",
2407             firstaddr, loadaddr));
2408
2409         virtual_avail = firstaddr;
2410         kernel_pmap = &kernel_pmap_store;
2411         kernel_pmap->pm_l1 = l1;
2412         kernel_l1pa = l1pt->pv_pa;
2413
2414         /*
2415          * Scan the L1 translation table created by initarm() and create
2416          * the required metadata for all valid mappings found in it.
2417          */
2418         for (l1idx = 0; l1idx < (L1_TABLE_SIZE / sizeof(pd_entry_t)); l1idx++) {
2419                 pde = kernel_l1pt[l1idx];
2420
2421                 /*
2422                  * We're only interested in Coarse mappings.
2423                  * pmap_extract() can deal with section mappings without
2424                  * recourse to checking L2 metadata.
2425                  */
2426                 if ((pde & L1_TYPE_MASK) != L1_TYPE_C)
2427                         continue;
2428
2429                 /*
2430                  * Lookup the KVA of this L2 descriptor table
2431                  */
2432                 pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK);
2433                 ptep = (pt_entry_t *)kernel_pt_lookup(pa);
2434
2435                 if (ptep == NULL) {
2436                         panic("pmap_bootstrap: No L2 for va 0x%x, pa 0x%lx",
2437                             (u_int)l1idx << L1_S_SHIFT, (long unsigned int)pa);
2438                 }
2439
2440                 /*
2441                  * Fetch the associated L2 metadata structure.
2442                  * Allocate a new one if necessary.
2443                  */
2444                 if ((l2 = kernel_pmap->pm_l2[L2_IDX(l1idx)]) == NULL) {
2445                         if (l2next == PMAP_STATIC_L2_SIZE)
2446                                 panic("pmap_bootstrap: out of static L2s");
2447                         kernel_pmap->pm_l2[L2_IDX(l1idx)] = l2 =
2448                             &static_l2[l2next++];
2449                 }
2450
2451                 /*
2452                  * One more L1 slot tracked...
2453                  */
2454                 l2->l2_occupancy++;
2455
2456                 /*
2457                  * Fill in the details of the L2 descriptor in the
2458                  * appropriate bucket.
2459                  */
2460                 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
2461                 l2b->l2b_kva = ptep;
2462                 l2b->l2b_phys = pa;
2463                 l2b->l2b_l1idx = l1idx;
2464
2465                 /*
2466                  * Establish an initial occupancy count for this descriptor
2467                  */
2468                 for (l2idx = 0;
2469                     l2idx < (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
2470                     l2idx++) {
2471                         if ((ptep[l2idx] & L2_TYPE_MASK) != L2_TYPE_INV) {
2472                                 l2b->l2b_occupancy++;
2473                         }
2474                 }
2475
2476                 /*
2477                  * Make sure the descriptor itself has the correct cache mode.
2478                  * If not, fix it, but whine about the problem. Port-meisters
2479                  * should consider this a clue to fix up their initarm()
2480                  * function. :)
2481                  */
2482                 if (pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)ptep)) {
2483                         printf("pmap_bootstrap: WARNING! wrong cache mode for "
2484                             "L2 pte @ %p\n", ptep);
2485                 }
2486         }
2487
2488
2489         /*
2490          * Ensure the primary (kernel) L1 has the correct cache mode for
2491          * a page table. Bitch if it is not correctly set.
2492          */
2493         for (va = (vm_offset_t)kernel_l1pt;
2494             va < ((vm_offset_t)kernel_l1pt + L1_TABLE_SIZE); va += PAGE_SIZE) {
2495                 if (pmap_set_pt_cache_mode(kernel_l1pt, va))
2496                         printf("pmap_bootstrap: WARNING! wrong cache mode for "
2497                             "primary L1 @ 0x%x\n", va);
2498         }
2499
2500         cpu_dcache_wbinv_all();
2501         cpu_tlb_flushID();
2502         cpu_cpwait();
2503
2504         PMAP_LOCK_INIT(kernel_pmap);
2505         kernel_pmap->pm_active = -1;
2506         kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL;
2507         TAILQ_INIT(&kernel_pmap->pm_pvlist);
2508
2509         /*
2510          * Reserve some special page table entries/VA space for temporary
2511          * mapping of pages.
2512          */
2513 #define SYSMAP(c, p, v, n)                                              \
2514     v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
2515
2516         pmap_alloc_specials(&virtual_avail, 1, &csrcp, &csrc_pte);
2517         pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)csrc_pte);
2518         pmap_alloc_specials(&virtual_avail, 1, &cdstp, &cdst_pte);
2519         pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)cdst_pte);
2520         size = ((lastaddr - pmap_curmaxkvaddr) + L1_S_OFFSET) / L1_S_SIZE;
2521         pmap_alloc_specials(&virtual_avail,
2522             round_page(size * L2_TABLE_SIZE_REAL) / PAGE_SIZE,
2523             &pmap_kernel_l2ptp_kva, NULL);
2524
2525         size = (size + (L2_BUCKET_SIZE - 1)) / L2_BUCKET_SIZE;
2526         pmap_alloc_specials(&virtual_avail,
2527             round_page(size * sizeof(struct l2_dtable)) / PAGE_SIZE,
2528             &pmap_kernel_l2dtable_kva, NULL);
2529
2530         pmap_alloc_specials(&virtual_avail,
2531             1, (vm_offset_t*)&_tmppt, NULL);
2532         SLIST_INIT(&l1_list);
2533         TAILQ_INIT(&l1_lru_list);
2534         mtx_init(&l1_lru_lock, "l1 list lock", NULL, MTX_DEF);
2535         pmap_init_l1(l1, kernel_l1pt);
2536         cpu_dcache_wbinv_all();
2537
2538         virtual_avail = round_page(virtual_avail);
2539         virtual_end = lastaddr;
2540         kernel_vm_end = pmap_curmaxkvaddr;
2541         arm_nocache_startaddr = lastaddr;
2542         mtx_init(&cmtx, "TMP mappings mtx", NULL, MTX_DEF);
2543
2544 #ifdef ARM_USE_SMALL_ALLOC
2545         mtx_init(&smallalloc_mtx, "Small alloc page list", NULL, MTX_DEF);
2546         arm_init_smallalloc();
2547 #endif
2548         pmap_set_pcb_pagedir(kernel_pmap, thread0.td_pcb);
2549 }
2550
2551 /***************************************************
2552  * Pmap allocation/deallocation routines.
2553  ***************************************************/
2554
2555 /*
2556  * Release any resources held by the given physical map.
2557  * Called when a pmap initialized by pmap_pinit is being released.
2558  * Should only be called if the map contains no valid mappings.
2559  */
2560 void
2561 pmap_release(pmap_t pmap)
2562 {
2563         struct pcb *pcb;
2564
2565         pmap_idcache_wbinv_all(pmap);
2566         pmap_tlb_flushID(pmap);
2567         cpu_cpwait();
2568         if (vector_page < KERNBASE) {
2569                 struct pcb *curpcb = PCPU_GET(curpcb);
2570                 pcb = thread0.td_pcb;
2571                 if (pmap_is_current(pmap)) {
2572                         /*
2573                          * Frob the L1 entry corresponding to the vector
2574                          * page so that it contains the kernel pmap's domain
2575                          * number. This will ensure pmap_remove() does not
2576                          * pull the current vector page out from under us.
2577                          */
2578                         critical_enter();
2579                         *pcb->pcb_pl1vec = pcb->pcb_l1vec;
2580                         cpu_domains(pcb->pcb_dacr);
2581                         cpu_setttb(pcb->pcb_pagedir);
2582                         critical_exit();
2583                 }
2584                 pmap_remove(pmap, vector_page, vector_page + PAGE_SIZE);
2585                 /*
2586                  * Make sure cpu_switch(), et al, DTRT. This is safe to do
2587                  * since this process has no remaining mappings of its own.
2588                  */
2589                 curpcb->pcb_pl1vec = pcb->pcb_pl1vec;
2590                 curpcb->pcb_l1vec = pcb->pcb_l1vec;
2591                 curpcb->pcb_dacr = pcb->pcb_dacr;
2592                 curpcb->pcb_pagedir = pcb->pcb_pagedir;
2593
2594         }
2595         pmap_free_l1(pmap);
2596         PMAP_LOCK_DESTROY(pmap);
2597
2598         dprintf("pmap_release()\n");
2599 }
2600
2601
2602
2603 /*
2604  * Helper function for pmap_grow_l2_bucket()
2605  */
2606 static __inline int
2607 pmap_grow_map(vm_offset_t va, pt_entry_t cache_mode, vm_paddr_t *pap)
2608 {
2609         struct l2_bucket *l2b;
2610         pt_entry_t *ptep;
2611         vm_paddr_t pa;
2612         struct vm_page *pg;
2613
2614         pg = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
2615         if (pg == NULL)
2616                 return (1);
2617         pa = VM_PAGE_TO_PHYS(pg);
2618
2619         if (pap)
2620                 *pap = pa;
2621
2622         l2b = pmap_get_l2_bucket(pmap_kernel(), va);
2623
2624         ptep = &l2b->l2b_kva[l2pte_index(va)];
2625         *ptep = L2_S_PROTO | pa | cache_mode |
2626             L2_S_PROT(PTE_KERNEL, VM_PROT_READ | VM_PROT_WRITE);
2627         PTE_SYNC(ptep);
2628         return (0);
2629 }
2630
2631 /*
2632  * This is the same as pmap_alloc_l2_bucket(), except that it is only
2633  * used by pmap_growkernel().
2634  */
2635 static __inline struct l2_bucket *
2636 pmap_grow_l2_bucket(pmap_t pm, vm_offset_t va)
2637 {
2638         struct l2_dtable *l2;
2639         struct l2_bucket *l2b;
2640         struct l1_ttable *l1;
2641         pd_entry_t *pl1pd;
2642         u_short l1idx;
2643         vm_offset_t nva;
2644
2645         l1idx = L1_IDX(va);
2646
2647         if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
2648                 /*
2649                  * No mapping at this address, as there is
2650                  * no entry in the L1 table.
2651                  * Need to allocate a new l2_dtable.
2652                  */
2653                 nva = pmap_kernel_l2dtable_kva;
2654                 if ((nva & PAGE_MASK) == 0) {
2655                         /*
2656                          * Need to allocate a backing page
2657                          */
2658                         if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
2659                                 return (NULL);
2660                 }
2661
2662                 l2 = (struct l2_dtable *)nva;
2663                 nva += sizeof(struct l2_dtable);
2664
2665                 if ((nva & PAGE_MASK) < (pmap_kernel_l2dtable_kva &
2666                     PAGE_MASK)) {
2667                         /*
2668                          * The new l2_dtable straddles a page boundary.
2669                          * Map in another page to cover it.
2670                          */
2671                         if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
2672                                 return (NULL);
2673                 }
2674
2675                 pmap_kernel_l2dtable_kva = nva;
2676
2677                 /*
2678                  * Link it into the parent pmap
2679                  */
2680                 pm->pm_l2[L2_IDX(l1idx)] = l2;
2681                 memset(l2, 0, sizeof(*l2));
2682         }
2683
2684         l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
2685
2686         /*
2687          * Fetch pointer to the L2 page table associated with the address.
2688          */
2689         if (l2b->l2b_kva == NULL) {
2690                 pt_entry_t *ptep;
2691
2692                 /*
2693                  * No L2 page table has been allocated. Chances are, this
2694                  * is because we just allocated the l2_dtable, above.
2695                  */
2696                 nva = pmap_kernel_l2ptp_kva;
2697                 ptep = (pt_entry_t *)nva;
2698                 if ((nva & PAGE_MASK) == 0) {
2699                         /*
2700                          * Need to allocate a backing page
2701                          */
2702                         if (pmap_grow_map(nva, pte_l2_s_cache_mode_pt,
2703                             &pmap_kernel_l2ptp_phys))
2704                                 return (NULL);
2705                         PTE_SYNC_RANGE(ptep, PAGE_SIZE / sizeof(pt_entry_t));
2706                 }
2707                 memset(ptep, 0, L2_TABLE_SIZE_REAL);
2708                 l2->l2_occupancy++;
2709                 l2b->l2b_kva = ptep;
2710                 l2b->l2b_l1idx = l1idx;
2711                 l2b->l2b_phys = pmap_kernel_l2ptp_phys;
2712
2713                 pmap_kernel_l2ptp_kva += L2_TABLE_SIZE_REAL;
2714                 pmap_kernel_l2ptp_phys += L2_TABLE_SIZE_REAL;
2715         }
2716
2717         /* Distribute new L1 entry to all other L1s */
2718         SLIST_FOREACH(l1, &l1_list, l1_link) {
2719                         pl1pd = &l1->l1_kva[L1_IDX(va)];
2720                         *pl1pd = l2b->l2b_phys | L1_C_DOM(PMAP_DOMAIN_KERNEL) |
2721                             L1_C_PROTO;
2722                         PTE_SYNC(pl1pd);
2723         }
2724
2725         return (l2b);
2726 }
2727
2728
2729 /*
2730  * grow the number of kernel page table entries, if needed
2731  */
2732 void
2733 pmap_growkernel(vm_offset_t addr)
2734 {
2735         pmap_t kpm = pmap_kernel();
2736
2737         if (addr <= pmap_curmaxkvaddr)
2738                 return;         /* we are OK */
2739
2740         /*
2741          * whoops!   we need to add kernel PTPs
2742          */
2743
2744         /* Map 1MB at a time */
2745         for (; pmap_curmaxkvaddr < addr; pmap_curmaxkvaddr += L1_S_SIZE)
2746                 pmap_grow_l2_bucket(kpm, pmap_curmaxkvaddr);
2747
2748         /*
2749          * flush out the cache, expensive but growkernel will happen so
2750          * rarely
2751          */
2752         cpu_dcache_wbinv_all();
2753         cpu_tlb_flushD();
2754         cpu_cpwait();
2755         kernel_vm_end = pmap_curmaxkvaddr;
2756
2757 }
2758
2759
2760 /*
2761  * Remove all pages from specified address space
2762  * this aids process exit speeds.  Also, this code
2763  * is special cased for current process only, but
2764  * can have the more generic (and slightly slower)
2765  * mode enabled.  This is much faster than pmap_remove
2766  * in the case of running down an entire address space.
2767  */
2768 void
2769 pmap_remove_pages(pmap_t pmap)
2770 {
2771         struct pv_entry *pv, *npv;
2772         struct l2_bucket *l2b = NULL;
2773         vm_page_t m;
2774         pt_entry_t *pt;
2775
2776         vm_page_lock_queues();
2777         PMAP_LOCK(pmap);
2778         for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
2779                 if (pv->pv_flags & PVF_WIRED) {
2780                         /* The page is wired, cannot remove it now. */
2781                         npv = TAILQ_NEXT(pv, pv_plist);
2782                         continue;
2783                 }
2784                 pmap->pm_stats.resident_count--;
2785                 l2b = pmap_get_l2_bucket(pmap, pv->pv_va);
2786                 KASSERT(l2b != NULL, ("No L2 bucket in pmap_remove_pages"));
2787                 pt = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
2788                 m = PHYS_TO_VM_PAGE(*pt & L2_ADDR_MASK);
2789 #ifdef ARM_USE_SMALL_ALLOC
2790                 KASSERT((vm_offset_t)m >= alloc_firstaddr, ("Trying to access non-existent page va %x pte %x", pv->pv_va, *pt));
2791 #else
2792                 KASSERT((vm_offset_t)m >= KERNBASE, ("Trying to access non-existent page va %x pte %x", pv->pv_va, *pt));
2793 #endif
2794                 *pt = 0;
2795                 PTE_SYNC(pt);
2796                 npv = TAILQ_NEXT(pv, pv_plist);
2797                 pmap_nuke_pv(m, pmap, pv);
2798                 if (TAILQ_EMPTY(&m->md.pv_list))
2799                         vm_page_flag_clear(m, PG_WRITEABLE);
2800                 pmap_free_pv_entry(pv);
2801         }
2802         vm_page_unlock_queues();
2803         cpu_idcache_wbinv_all();
2804         cpu_tlb_flushID();
2805         cpu_cpwait();
2806         PMAP_UNLOCK(pmap);
2807 }
2808
2809
2810 /***************************************************
2811  * Low level mapping routines.....
2812  ***************************************************/
2813
2814 /* Map a section into the KVA. */
2815
2816 void
2817 pmap_kenter_section(vm_offset_t va, vm_offset_t pa, int flags)
2818 {
2819         pd_entry_t pd = L1_S_PROTO | pa | L1_S_PROT(PTE_KERNEL,
2820             VM_PROT_READ|VM_PROT_WRITE) | L1_S_DOM(PMAP_DOMAIN_KERNEL);
2821         struct l1_ttable *l1;
2822
2823         KASSERT(((va | pa) & L1_S_OFFSET) == 0,
2824             ("Not a valid section mapping"));
2825         if (flags & SECTION_CACHE)
2826                 pd |= pte_l1_s_cache_mode;
2827         else if (flags & SECTION_PT)
2828                 pd |= pte_l1_s_cache_mode_pt;
2829         SLIST_FOREACH(l1, &l1_list, l1_link) {
2830                 l1->l1_kva[L1_IDX(va)] = pd;
2831                 PTE_SYNC(&l1->l1_kva[L1_IDX(va)]);
2832         }
2833 }
2834
2835 /*
2836  * add a wired page to the kva
2837  * note that in order for the mapping to take effect -- you
2838  * should do a invltlb after doing the pmap_kenter...
2839  */
2840 static PMAP_INLINE void
2841 pmap_kenter_internal(vm_offset_t va, vm_offset_t pa, int flags)
2842 {
2843         struct l2_bucket *l2b;
2844         pt_entry_t *pte;
2845         pt_entry_t opte;
2846         PDEBUG(1, printf("pmap_kenter: va = %08x, pa = %08x\n",
2847             (uint32_t) va, (uint32_t) pa));
2848
2849
2850         l2b = pmap_get_l2_bucket(pmap_kernel(), va);
2851         if (l2b == NULL)
2852                 l2b = pmap_grow_l2_bucket(pmap_kernel(), va);
2853         KASSERT(l2b != NULL, ("No L2 Bucket"));
2854         pte = &l2b->l2b_kva[l2pte_index(va)];
2855         opte = *pte;
2856         PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n",
2857             (uint32_t) pte, opte, *pte));
2858         if (l2pte_valid(opte)) {
2859                 cpu_dcache_wbinv_range(va, PAGE_SIZE);
2860                 cpu_tlb_flushD_SE(va);
2861                 cpu_cpwait();
2862         } else {
2863                 if (opte == 0)
2864                         l2b->l2b_occupancy++;
2865         }
2866         *pte = L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL,
2867             VM_PROT_READ | VM_PROT_WRITE);
2868         if (flags & KENTER_CACHE)
2869                 *pte |= pte_l2_s_cache_mode;
2870         if (flags & KENTER_USER)
2871                 *pte |= L2_S_PROT_U;
2872         PTE_SYNC(pte);
2873 }
2874
2875 void
2876 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
2877 {
2878         pmap_kenter_internal(va, pa, KENTER_CACHE);
2879 }
2880
2881 void
2882 pmap_kenter_nocache(vm_offset_t va, vm_paddr_t pa)
2883 {
2884
2885         pmap_kenter_internal(va, pa, 0);
2886 }
2887
2888 void
2889 pmap_kenter_user(vm_offset_t va, vm_paddr_t pa)
2890 {
2891
2892         pmap_kenter_internal(va, pa, KENTER_CACHE|KENTER_USER);
2893         /*
2894          * Call pmap_fault_fixup now, to make sure we'll have no exception
2895          * at the first use of the new address, or bad things will happen,
2896          * as we use one of these addresses in the exception handlers.
2897          */
2898         pmap_fault_fixup(pmap_kernel(), va, VM_PROT_READ|VM_PROT_WRITE, 1);
2899 }
2900
2901 /*
2902  * remove a page rom the kernel pagetables
2903  */
2904 PMAP_INLINE void
2905 pmap_kremove(vm_offset_t va)
2906 {
2907         struct l2_bucket *l2b;
2908         pt_entry_t *pte, opte;
2909
2910         l2b = pmap_get_l2_bucket(pmap_kernel(), va);
2911         if (!l2b)
2912                 return;
2913         KASSERT(l2b != NULL, ("No L2 Bucket"));
2914         pte = &l2b->l2b_kva[l2pte_index(va)];
2915         opte = *pte;
2916         if (l2pte_valid(opte)) {
2917                 cpu_dcache_wbinv_range(va, PAGE_SIZE);
2918                 cpu_tlb_flushD_SE(va);
2919                 cpu_cpwait();
2920                 *pte = 0;
2921         }
2922 }
2923
2924
2925 /*
2926  *      Used to map a range of physical addresses into kernel
2927  *      virtual address space.
2928  *
2929  *      The value passed in '*virt' is a suggested virtual address for
2930  *      the mapping. Architectures which can support a direct-mapped
2931  *      physical to virtual region can return the appropriate address
2932  *      within that region, leaving '*virt' unchanged. Other
2933  *      architectures should map the pages starting at '*virt' and
2934  *      update '*virt' with the first usable address after the mapped
2935  *      region.
2936  */
2937 vm_offset_t
2938 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
2939 {
2940 #ifdef ARM_USE_SMALL_ALLOC
2941         return (arm_ptovirt(start));
2942 #else
2943         vm_offset_t sva = *virt;
2944         vm_offset_t va = sva;
2945
2946         PDEBUG(1, printf("pmap_map: virt = %08x, start = %08x, end = %08x, "
2947             "prot = %d\n", (uint32_t) *virt, (uint32_t) start, (uint32_t) end,
2948             prot));
2949
2950         while (start < end) {
2951                 pmap_kenter(va, start);
2952                 va += PAGE_SIZE;
2953                 start += PAGE_SIZE;
2954         }
2955         *virt = va;
2956         return (sva);
2957 #endif
2958 }
2959
2960 static void
2961 pmap_wb_page(vm_page_t m)
2962 {
2963         struct pv_entry *pv;
2964
2965         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list)
2966             pmap_dcache_wb_range(pv->pv_pmap, pv->pv_va, PAGE_SIZE, FALSE,
2967                 (pv->pv_flags & PVF_WRITE) == 0);
2968 }
2969
2970 static void
2971 pmap_inv_page(vm_page_t m)
2972 {
2973         struct pv_entry *pv;
2974
2975         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list)
2976             pmap_dcache_wb_range(pv->pv_pmap, pv->pv_va, PAGE_SIZE, TRUE, TRUE);
2977 }
2978 /*
2979  * Add a list of wired pages to the kva
2980  * this routine is only used for temporary
2981  * kernel mappings that do not need to have
2982  * page modification or references recorded.
2983  * Note that old mappings are simply written
2984  * over.  The page *must* be wired.
2985  */
2986 void
2987 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
2988 {
2989         int i;
2990
2991         for (i = 0; i < count; i++) {
2992                 pmap_wb_page(m[i]);
2993                 pmap_kenter_internal(va, VM_PAGE_TO_PHYS(m[i]),
2994                     KENTER_CACHE);
2995                 va += PAGE_SIZE;
2996         }
2997 }
2998
2999
3000 /*
3001  * this routine jerks page mappings from the
3002  * kernel -- it is meant only for temporary mappings.
3003  */
3004 void
3005 pmap_qremove(vm_offset_t va, int count)
3006 {
3007         vm_paddr_t pa;
3008         int i;
3009
3010         for (i = 0; i < count; i++) {
3011                 pa = vtophys(va);
3012                 if (pa) {
3013                         pmap_inv_page(PHYS_TO_VM_PAGE(pa));
3014                         pmap_kremove(va);
3015                 }
3016                 va += PAGE_SIZE;
3017         }
3018 }
3019
3020
3021 /*
3022  * pmap_object_init_pt preloads the ptes for a given object
3023  * into the specified pmap.  This eliminates the blast of soft
3024  * faults on process startup and immediately after an mmap.
3025  */
3026 void
3027 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
3028     vm_pindex_t pindex, vm_size_t size)
3029 {
3030
3031         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
3032         KASSERT(object->type == OBJT_DEVICE,
3033             ("pmap_object_init_pt: non-device object"));
3034 }
3035
3036
3037 /*
3038  *      pmap_is_prefaultable:
3039  *
3040  *      Return whether or not the specified virtual address is elgible
3041  *      for prefault.
3042  */
3043 boolean_t
3044 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
3045 {
3046         pd_entry_t *pde;
3047         pt_entry_t *pte;
3048
3049         if (!pmap_get_pde_pte(pmap, addr, &pde, &pte))
3050                 return (FALSE);
3051         KASSERT(pte != NULL, ("Valid mapping but no pte ?"));
3052         if (*pte == 0)
3053                 return (TRUE);
3054         return (FALSE);
3055 }
3056
3057 /*
3058  * Fetch pointers to the PDE/PTE for the given pmap/VA pair.
3059  * Returns TRUE if the mapping exists, else FALSE.
3060  *
3061  * NOTE: This function is only used by a couple of arm-specific modules.
3062  * It is not safe to take any pmap locks here, since we could be right
3063  * in the middle of debugging the pmap anyway...
3064  *
3065  * It is possible for this routine to return FALSE even though a valid
3066  * mapping does exist. This is because we don't lock, so the metadata
3067  * state may be inconsistent.
3068  *
3069  * NOTE: We can return a NULL *ptp in the case where the L1 pde is
3070  * a "section" mapping.
3071  */
3072 boolean_t
3073 pmap_get_pde_pte(pmap_t pm, vm_offset_t va, pd_entry_t **pdp, pt_entry_t **ptp)
3074 {
3075         struct l2_dtable *l2;
3076         pd_entry_t *pl1pd, l1pd;
3077         pt_entry_t *ptep;
3078         u_short l1idx;
3079
3080         if (pm->pm_l1 == NULL)
3081                 return (FALSE);
3082
3083         l1idx = L1_IDX(va);
3084         *pdp = pl1pd = &pm->pm_l1->l1_kva[l1idx];
3085         l1pd = *pl1pd;
3086
3087         if (l1pte_section_p(l1pd)) {
3088                 *ptp = NULL;
3089                 return (TRUE);
3090         }
3091
3092         if (pm->pm_l2 == NULL)
3093                 return (FALSE);
3094
3095         l2 = pm->pm_l2[L2_IDX(l1idx)];
3096
3097         if (l2 == NULL ||
3098             (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
3099                 return (FALSE);
3100         }
3101
3102         *ptp = &ptep[l2pte_index(va)];
3103         return (TRUE);
3104 }
3105
3106 /*
3107  *      Routine:        pmap_remove_all
3108  *      Function:
3109  *              Removes this physical page from
3110  *              all physical maps in which it resides.
3111  *              Reflects back modify bits to the pager.
3112  *
3113  *      Notes:
3114  *              Original versions of this routine were very
3115  *              inefficient because they iteratively called
3116  *              pmap_remove (slow...)
3117  */
3118 void
3119 pmap_remove_all(vm_page_t m)
3120 {
3121         pv_entry_t pv;
3122         pt_entry_t *ptep, pte;
3123         struct l2_bucket *l2b;
3124         boolean_t flush = FALSE;
3125         pmap_t curpm;
3126         int flags = 0;
3127
3128 #if defined(PMAP_DEBUG)
3129         /*
3130          * XXX this makes pmap_page_protect(NONE) illegal for non-managed
3131          * pages!
3132          */
3133         if (m->flags & PG_FICTITIOUS) {
3134                 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m));
3135         }
3136 #endif
3137
3138         if (TAILQ_EMPTY(&m->md.pv_list))
3139                 return;
3140         curpm = vmspace_pmap(curproc->p_vmspace);
3141         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
3142                 if (flush == FALSE && (pv->pv_pmap == curpm ||
3143                     pv->pv_pmap == pmap_kernel()))
3144                         flush = TRUE;
3145                 PMAP_LOCK(pv->pv_pmap);
3146                 l2b = pmap_get_l2_bucket(pv->pv_pmap, pv->pv_va);
3147                 KASSERT(l2b != NULL, ("No l2 bucket"));
3148                 ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
3149                 pte = *ptep;
3150                 *ptep = 0;
3151                 PTE_SYNC_CURRENT(pv->pv_pmap, ptep);
3152                 pmap_free_l2_bucket(pv->pv_pmap, l2b, 1);
3153                 if (pv->pv_flags & PVF_WIRED)
3154                         pv->pv_pmap->pm_stats.wired_count--;
3155                 pv->pv_pmap->pm_stats.resident_count--;
3156                 flags |= pv->pv_flags;
3157                 pmap_nuke_pv(m, pv->pv_pmap, pv);
3158                 PMAP_UNLOCK(pv->pv_pmap);
3159                 pmap_free_pv_entry(pv);
3160         }
3161
3162         if (flush) {
3163                 if (PV_BEEN_EXECD(flags))
3164                         pmap_tlb_flushID(curpm);
3165                 else
3166                         pmap_tlb_flushD(curpm);
3167         }
3168         vm_page_flag_clear(m, PG_WRITEABLE);
3169 }
3170
3171
3172 /*
3173  *      Set the physical protection on the
3174  *      specified range of this map as requested.
3175  */
3176 void
3177 pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
3178 {
3179         struct l2_bucket *l2b;
3180         pt_entry_t *ptep, pte;
3181         vm_offset_t next_bucket;
3182         u_int flags;
3183         int flush;
3184
3185         if ((prot & VM_PROT_READ) == 0) {
3186                 pmap_remove(pm, sva, eva);
3187                 return;
3188         }
3189
3190         if (prot & VM_PROT_WRITE) {
3191                 /*
3192                  * If this is a read->write transition, just ignore it and let
3193                  * vm_fault() take care of it later.
3194                  */
3195                 return;
3196         }
3197
3198         vm_page_lock_queues();
3199         PMAP_LOCK(pm);
3200
3201         /*
3202          * OK, at this point, we know we're doing write-protect operation.
3203          * If the pmap is active, write-back the range.
3204          */
3205         pmap_dcache_wb_range(pm, sva, eva - sva, FALSE, FALSE);
3206
3207         flush = ((eva - sva) >= (PAGE_SIZE * 4)) ? 0 : -1;
3208         flags = 0;
3209
3210         while (sva < eva) {
3211                 next_bucket = L2_NEXT_BUCKET(sva);
3212                 if (next_bucket > eva)
3213                         next_bucket = eva;
3214
3215                 l2b = pmap_get_l2_bucket(pm, sva);
3216                 if (l2b == NULL) {
3217                         sva = next_bucket;
3218                         continue;
3219                 }
3220
3221                 ptep = &l2b->l2b_kva[l2pte_index(sva)];
3222
3223                 while (sva < next_bucket) {
3224                         if ((pte = *ptep) != 0 && (pte & L2_S_PROT_W) != 0) {
3225                                 struct vm_page *pg;
3226                                 u_int f;
3227
3228                                 pg = PHYS_TO_VM_PAGE(l2pte_pa(pte));
3229                                 pte &= ~L2_S_PROT_W;
3230                                 *ptep = pte;
3231                                 PTE_SYNC(ptep);
3232
3233                                 if (pg != NULL) {
3234                                         f = pmap_modify_pv(pg, pm, sva,
3235                                             PVF_WRITE, 0);
3236                                         pmap_vac_me_harder(pg, pm, sva);
3237                                         vm_page_dirty(pg);
3238                                 } else
3239                                         f = PVF_REF | PVF_EXEC;
3240
3241                                 if (flush >= 0) {
3242                                         flush++;
3243                                         flags |= f;
3244                                 } else
3245                                 if (PV_BEEN_EXECD(f))
3246                                         pmap_tlb_flushID_SE(pm, sva);
3247                                 else
3248                                 if (PV_BEEN_REFD(f))
3249                                         pmap_tlb_flushD_SE(pm, sva);
3250                         }
3251
3252                         sva += PAGE_SIZE;
3253                         ptep++;
3254                 }
3255         }
3256
3257
3258         if (flush) {
3259                 if (PV_BEEN_EXECD(flags))
3260                         pmap_tlb_flushID(pm);
3261                 else
3262                 if (PV_BEEN_REFD(flags))
3263                         pmap_tlb_flushD(pm);
3264         }
3265         vm_page_unlock_queues();
3266
3267         PMAP_UNLOCK(pm);
3268 }
3269
3270
3271 /*
3272  *      Insert the given physical page (p) at
3273  *      the specified virtual address (v) in the
3274  *      target physical map with the protection requested.
3275  *
3276  *      If specified, the page will be wired down, meaning
3277  *      that the related pte can not be reclaimed.
3278  *
3279  *      NB:  This is the only routine which MAY NOT lazy-evaluate
3280  *      or lose information.  That is, this routine must actually
3281  *      insert this page into the given map NOW.
3282  */
3283
3284 void
3285 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
3286     boolean_t wired)
3287 {
3288
3289         vm_page_lock_queues();
3290         PMAP_LOCK(pmap);
3291         pmap_enter_locked(pmap, va, m, prot, wired, M_WAITOK);
3292         vm_page_unlock_queues();
3293         PMAP_UNLOCK(pmap);
3294 }
3295
3296 /*
3297  *      The page queues and pmap must be locked.
3298  */
3299 static void
3300 pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
3301     boolean_t wired, int flags)
3302 {
3303         struct l2_bucket *l2b = NULL;
3304         struct vm_page *opg;
3305         struct pv_entry *pve = NULL;
3306         pt_entry_t *ptep, npte, opte;
3307         u_int nflags;
3308         u_int oflags;
3309         vm_paddr_t pa;
3310
3311         PMAP_ASSERT_LOCKED(pmap);
3312         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
3313         if (va == vector_page) {
3314                 pa = systempage.pv_pa;
3315                 m = NULL;
3316         } else
3317                 pa = VM_PAGE_TO_PHYS(m);
3318         nflags = 0;
3319         if (prot & VM_PROT_WRITE)
3320                 nflags |= PVF_WRITE;
3321         if (prot & VM_PROT_EXECUTE)
3322                 nflags |= PVF_EXEC;
3323         if (wired)
3324                 nflags |= PVF_WIRED;
3325         PDEBUG(1, printf("pmap_enter: pmap = %08x, va = %08x, m = %08x, prot = %x, "
3326             "wired = %x\n", (uint32_t) pmap, va, (uint32_t) m, prot, wired));
3327
3328         if (pmap == pmap_kernel()) {
3329                 l2b = pmap_get_l2_bucket(pmap, va);
3330                 if (l2b == NULL)
3331                         l2b = pmap_grow_l2_bucket(pmap, va);
3332         } else {
3333 do_l2b_alloc:
3334                 l2b = pmap_alloc_l2_bucket(pmap, va);
3335                 if (l2b == NULL) {
3336                         if (flags & M_WAITOK) {
3337                                 PMAP_UNLOCK(pmap);
3338                                 vm_page_unlock_queues();
3339                                 VM_WAIT;
3340                                 vm_page_lock_queues();
3341                                 PMAP_LOCK(pmap);
3342                                 goto do_l2b_alloc;
3343                         }
3344                         return;
3345                 }
3346         }
3347
3348         ptep = &l2b->l2b_kva[l2pte_index(va)];
3349
3350         opte = *ptep;
3351         npte = pa;
3352         oflags = 0;
3353         if (opte) {
3354                 /*
3355                  * There is already a mapping at this address.
3356                  * If the physical address is different, lookup the
3357                  * vm_page.
3358                  */
3359                 if (l2pte_pa(opte) != pa)
3360                         opg = PHYS_TO_VM_PAGE(l2pte_pa(opte));
3361                 else
3362                         opg = m;
3363         } else
3364                 opg = NULL;
3365
3366         if ((prot & (VM_PROT_ALL)) ||
3367             (!m || m->md.pvh_attrs & PVF_REF)) {
3368                 /*
3369                  * - The access type indicates that we don't need
3370                  *   to do referenced emulation.
3371                  * OR
3372                  * - The physical page has already been referenced
3373                  *   so no need to re-do referenced emulation here.
3374                  */
3375                 npte |= L2_S_PROTO;
3376
3377                 nflags |= PVF_REF;
3378
3379                 if (m && ((prot & VM_PROT_WRITE) != 0 ||
3380                     (m->md.pvh_attrs & PVF_MOD))) {
3381                         /*
3382                          * This is a writable mapping, and the
3383                          * page's mod state indicates it has
3384                          * already been modified. Make it
3385                          * writable from the outset.
3386                          */
3387                         nflags |= PVF_MOD;
3388                         if (!(m->md.pvh_attrs & PVF_MOD))
3389                                 vm_page_dirty(m);
3390                 }
3391                 if (m && opte)
3392                         vm_page_flag_set(m, PG_REFERENCED);
3393         } else {
3394                 /*
3395                  * Need to do page referenced emulation.
3396                  */
3397                 npte |= L2_TYPE_INV;
3398         }
3399
3400         if (prot & VM_PROT_WRITE)
3401                 npte |= L2_S_PROT_W;
3402         npte |= pte_l2_s_cache_mode;
3403         if (m && m == opg) {
3404                 /*
3405                  * We're changing the attrs of an existing mapping.
3406                  */
3407                 oflags = pmap_modify_pv(m, pmap, va,
3408                     PVF_WRITE | PVF_EXEC | PVF_WIRED |
3409                     PVF_MOD | PVF_REF, nflags);
3410
3411                 /*
3412                  * We may need to flush the cache if we're
3413                  * doing rw-ro...
3414                  */
3415                 if (pmap_is_current(pmap) &&
3416                     (oflags & PVF_NC) == 0 &&
3417                             (opte & L2_S_PROT_W) != 0 &&
3418                             (prot & VM_PROT_WRITE) == 0)
3419                         cpu_dcache_wb_range(va, PAGE_SIZE);
3420         } else {
3421                 /*
3422                  * New mapping, or changing the backing page
3423                  * of an existing mapping.
3424                  */
3425                 if (opg) {
3426                         /*
3427                          * Replacing an existing mapping with a new one.
3428                          * It is part of our managed memory so we
3429                          * must remove it from the PV list
3430                          */
3431                         pve = pmap_remove_pv(opg, pmap, va);
3432                         if (m && (m->flags & (PG_UNMANAGED | PG_FICTITIOUS)) &&
3433                             pve)
3434                                 pmap_free_pv_entry(pve);
3435                         else if (!pve &&
3436                             !(m->flags & (PG_UNMANAGED | PG_FICTITIOUS)))
3437                                 pve = pmap_get_pv_entry();
3438                         KASSERT(pve != NULL, ("No pv"));
3439                         oflags = pve->pv_flags;
3440
3441                         /*
3442                          * If the old mapping was valid (ref/mod
3443                          * emulation creates 'invalid' mappings
3444                          * initially) then make sure to frob
3445                          * the cache.
3446                          */
3447                         if ((oflags & PVF_NC) == 0 &&
3448                             l2pte_valid(opte)) {
3449                                 if (PV_BEEN_EXECD(oflags)) {
3450                                         pmap_idcache_wbinv_range(pmap, va,
3451                                             PAGE_SIZE);
3452                                 } else
3453                                         if (PV_BEEN_REFD(oflags)) {
3454                                                 pmap_dcache_wb_range(pmap, va,
3455                                                     PAGE_SIZE, TRUE,
3456                                                     (oflags & PVF_WRITE) == 0);
3457                                         }
3458                         }
3459                 } else if (m && !(m->flags & (PG_UNMANAGED | PG_FICTITIOUS)))
3460                         if ((pve = pmap_get_pv_entry()) == NULL) {
3461                                 panic("pmap_enter: no pv entries");
3462                         }
3463                 if (m && !(m->flags & (PG_UNMANAGED | PG_FICTITIOUS))) {
3464                         KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
3465                             ("pmap_enter: managed mapping within the clean submap"));
3466                         pmap_enter_pv(m, pve, pmap, va, nflags);
3467                 }
3468         }
3469         /*
3470          * Make sure userland mappings get the right permissions
3471          */
3472         if (pmap != pmap_kernel() && va != vector_page) {
3473                 npte |= L2_S_PROT_U;
3474         }
3475
3476         /*
3477          * Keep the stats up to date
3478          */
3479         if (opte == 0) {
3480                 l2b->l2b_occupancy++;
3481                 pmap->pm_stats.resident_count++;
3482         }
3483
3484
3485         /*
3486          * If this is just a wiring change, the two PTEs will be
3487          * identical, so there's no need to update the page table.
3488          */
3489         if (npte != opte) {
3490                 boolean_t is_cached = pmap_is_current(pmap);
3491
3492                 *ptep = npte;
3493                 if (is_cached) {
3494                         /*
3495                          * We only need to frob the cache/tlb if this pmap
3496                          * is current
3497                          */
3498                         PTE_SYNC(ptep);
3499                         if (L1_IDX(va) != L1_IDX(vector_page) &&
3500                             l2pte_valid(npte)) {
3501                                 /*
3502                                  * This mapping is likely to be accessed as
3503                                  * soon as we return to userland. Fix up the
3504                                  * L1 entry to avoid taking another
3505                                  * page/domain fault.
3506                                  */
3507                                 pd_entry_t *pl1pd, l1pd;
3508
3509                                 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)];
3510                                 l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) |
3511                                     L1_C_PROTO;
3512                                 if (*pl1pd != l1pd) {
3513                                         *pl1pd = l1pd;
3514                                         PTE_SYNC(pl1pd);
3515                                 }
3516                         }
3517                 }
3518
3519                 if (PV_BEEN_EXECD(oflags))
3520                         pmap_tlb_flushID_SE(pmap, va);
3521                 else if (PV_BEEN_REFD(oflags))
3522                         pmap_tlb_flushD_SE(pmap, va);
3523
3524
3525                 if (m)
3526                         pmap_vac_me_harder(m, pmap, va);
3527         }
3528 }
3529
3530 /*
3531  * Maps a sequence of resident pages belonging to the same object.
3532  * The sequence begins with the given page m_start.  This page is
3533  * mapped at the given virtual address start.  Each subsequent page is
3534  * mapped at a virtual address that is offset from start by the same
3535  * amount as the page is offset from m_start within the object.  The
3536  * last page in the sequence is the page with the largest offset from
3537  * m_start that can be mapped at a virtual address less than the given
3538  * virtual address end.  Not every virtual page between start and end
3539  * is mapped; only those for which a resident page exists with the
3540  * corresponding offset from m_start are mapped.
3541  */
3542 void
3543 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
3544     vm_page_t m_start, vm_prot_t prot)
3545 {
3546         vm_page_t m;
3547         vm_pindex_t diff, psize;
3548
3549         psize = atop(end - start);
3550         m = m_start;
3551         PMAP_LOCK(pmap);
3552         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
3553                 pmap_enter_locked(pmap, start + ptoa(diff), m, prot &
3554                     (VM_PROT_READ | VM_PROT_EXECUTE), FALSE, M_NOWAIT);
3555                 m = TAILQ_NEXT(m, listq);
3556         }
3557         PMAP_UNLOCK(pmap);
3558 }
3559
3560 /*
3561  * this code makes some *MAJOR* assumptions:
3562  * 1. Current pmap & pmap exists.
3563  * 2. Not wired.
3564  * 3. Read access.
3565  * 4. No page table pages.
3566  * but is *MUCH* faster than pmap_enter...
3567  */
3568
3569 void
3570 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
3571 {
3572
3573         PMAP_LOCK(pmap);
3574         pmap_enter_locked(pmap, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
3575             FALSE, M_NOWAIT);
3576         PMAP_UNLOCK(pmap);
3577 }
3578
3579 /*
3580  *      Routine:        pmap_change_wiring
3581  *      Function:       Change the wiring attribute for a map/virtual-address
3582  *                      pair.
3583  *      In/out conditions:
3584  *                      The mapping must already exist in the pmap.
3585  */
3586 void
3587 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
3588 {
3589         struct l2_bucket *l2b;
3590         pt_entry_t *ptep, pte;
3591         vm_page_t pg;
3592
3593         vm_page_lock_queues();
3594         PMAP_LOCK(pmap);
3595         l2b = pmap_get_l2_bucket(pmap, va);
3596         KASSERT(l2b, ("No l2b bucket in pmap_change_wiring"));
3597         ptep = &l2b->l2b_kva[l2pte_index(va)];
3598         pte = *ptep;
3599         pg = PHYS_TO_VM_PAGE(l2pte_pa(pte));
3600         if (pg)
3601                 pmap_modify_pv(pg, pmap, va, PVF_WIRED, wired);
3602         vm_page_unlock_queues();
3603         PMAP_UNLOCK(pmap);
3604 }
3605
3606
3607 /*
3608  *      Copy the range specified by src_addr/len
3609  *      from the source map to the range dst_addr/len
3610  *      in the destination map.
3611  *
3612  *      This routine is only advisory and need not do anything.
3613  */
3614 void
3615 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
3616     vm_size_t len, vm_offset_t src_addr)
3617 {
3618 }
3619
3620
3621 /*
3622  *      Routine:        pmap_extract
3623  *      Function:
3624  *              Extract the physical page address associated
3625  *              with the given map/virtual_address pair.
3626  */
3627 vm_paddr_t
3628 pmap_extract(pmap_t pm, vm_offset_t va)
3629 {
3630         struct l2_dtable *l2;
3631         pd_entry_t l1pd;
3632         pt_entry_t *ptep, pte;
3633         vm_paddr_t pa;
3634         u_int l1idx;
3635         l1idx = L1_IDX(va);
3636
3637         PMAP_LOCK(pm);
3638         l1pd = pm->pm_l1->l1_kva[l1idx];
3639         if (l1pte_section_p(l1pd)) {
3640                 /*
3641                  * These should only happen for pmap_kernel()
3642                  */
3643                 KASSERT(pm == pmap_kernel(), ("huh"));
3644                 pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
3645         } else {
3646                 /*
3647                  * Note that we can't rely on the validity of the L1
3648                  * descriptor as an indication that a mapping exists.
3649                  * We have to look it up in the L2 dtable.
3650                  */
3651                 l2 = pm->pm_l2[L2_IDX(l1idx)];
3652
3653                 if (l2 == NULL ||
3654                     (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
3655                         PMAP_UNLOCK(pm);
3656                         return (0);
3657                 }
3658
3659                 ptep = &ptep[l2pte_index(va)];
3660                 pte = *ptep;
3661
3662                 if (pte == 0) {
3663                         PMAP_UNLOCK(pm);
3664                         return (0);
3665                 }
3666
3667                 switch (pte & L2_TYPE_MASK) {
3668                 case L2_TYPE_L:
3669                         pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
3670                         break;
3671
3672                 default:
3673                         pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
3674                         break;
3675                 }
3676         }
3677
3678         PMAP_UNLOCK(pm);
3679         return (pa);
3680 }
3681
3682 /*
3683  * Atomically extract and hold the physical page with the given
3684  * pmap and virtual address pair if that mapping permits the given
3685  * protection.
3686  *
3687  */
3688 vm_page_t
3689 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
3690 {
3691         struct l2_dtable *l2;
3692         pd_entry_t l1pd;
3693         pt_entry_t *ptep, pte;
3694         vm_paddr_t pa;
3695         vm_page_t m = NULL;
3696         u_int l1idx;
3697         l1idx = L1_IDX(va);
3698
3699         vm_page_lock_queues();
3700         PMAP_LOCK(pmap);
3701         l1pd = pmap->pm_l1->l1_kva[l1idx];
3702         if (l1pte_section_p(l1pd)) {
3703                 /*
3704                  * These should only happen for pmap_kernel()
3705                  */
3706                 KASSERT(pmap == pmap_kernel(), ("huh"));
3707                 pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
3708                 if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) {
3709                         m = PHYS_TO_VM_PAGE(pa);
3710                         vm_page_hold(m);
3711                 }
3712
3713         } else {
3714                 /*
3715                  * Note that we can't rely on the validity of the L1
3716                  * descriptor as an indication that a mapping exists.
3717                  * We have to look it up in the L2 dtable.
3718                  */
3719                 l2 = pmap->pm_l2[L2_IDX(l1idx)];
3720
3721                 if (l2 == NULL ||
3722                     (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
3723                         PMAP_UNLOCK(pmap);
3724                         vm_page_unlock_queues();
3725                         return (NULL);
3726                 }
3727
3728                 ptep = &ptep[l2pte_index(va)];
3729                 pte = *ptep;
3730
3731                 if (pte == 0) {
3732                         PMAP_UNLOCK(pmap);
3733                         vm_page_unlock_queues();
3734                         return (NULL);
3735                 }
3736                 if (pte & L2_S_PROT_W || (prot & VM_PROT_WRITE) == 0) {
3737                         switch (pte & L2_TYPE_MASK) {
3738                         case L2_TYPE_L:
3739                                 pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
3740                                 break;
3741
3742                         default:
3743                                 pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
3744                                 break;
3745                         }
3746                         m = PHYS_TO_VM_PAGE(pa);
3747                         vm_page_hold(m);
3748                 }
3749         }
3750
3751         PMAP_UNLOCK(pmap);
3752         vm_page_unlock_queues();
3753         return (m);
3754 }
3755
3756 /*
3757  * Initialize a preallocated and zeroed pmap structure,
3758  * such as one in a vmspace structure.
3759  */
3760
3761 void
3762 pmap_pinit(pmap_t pmap)
3763 {
3764         PDEBUG(1, printf("pmap_pinit: pmap = %08x\n", (uint32_t) pmap));
3765
3766         PMAP_LOCK_INIT(pmap);
3767         pmap_alloc_l1(pmap);
3768         bzero(pmap->pm_l2, sizeof(pmap->pm_l2));
3769
3770         pmap->pm_count = 1;
3771         pmap->pm_active = 0;
3772
3773         TAILQ_INIT(&pmap->pm_pvlist);
3774         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
3775         pmap->pm_stats.resident_count = 1;
3776         if (vector_page < KERNBASE) {
3777                 pmap_enter(pmap, vector_page, PHYS_TO_VM_PAGE(systempage.pv_pa),
3778                     VM_PROT_READ, 1);
3779         }
3780 }
3781
3782
3783 /***************************************************
3784  * page management routines.
3785  ***************************************************/
3786
3787
3788 static void
3789 pmap_free_pv_entry(pv_entry_t pv)
3790 {
3791         pv_entry_count--;
3792         uma_zfree(pvzone, pv);
3793 }
3794
3795
3796 /*
3797  * get a new pv_entry, allocating a block from the system
3798  * when needed.
3799  * the memory allocation is performed bypassing the malloc code
3800  * because of the possibility of allocations at interrupt time.
3801  */
3802 static pv_entry_t
3803 pmap_get_pv_entry(void)
3804 {
3805         pv_entry_t ret_value;
3806
3807         pv_entry_count++;
3808         if (pv_entry_count > pv_entry_high_water)
3809                 pagedaemon_wakeup();
3810         ret_value = uma_zalloc(pvzone, M_NOWAIT);
3811         return ret_value;
3812 }
3813
3814
3815 /*
3816  *      Remove the given range of addresses from the specified map.
3817  *
3818  *      It is assumed that the start and end are properly
3819  *      rounded to the page size.
3820  */
3821 #define  PMAP_REMOVE_CLEAN_LIST_SIZE     3
3822 void
3823 pmap_remove(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
3824 {
3825         struct l2_bucket *l2b;
3826         vm_offset_t next_bucket;
3827         pt_entry_t *ptep;
3828         u_int cleanlist_idx, total, cnt;
3829         struct {
3830                 vm_offset_t va;
3831                 pt_entry_t *pte;
3832         } cleanlist[PMAP_REMOVE_CLEAN_LIST_SIZE];
3833         u_int mappings, is_exec, is_refd;
3834         int flushall = 0;
3835
3836
3837         /*
3838          * we lock in the pmap => pv_head direction
3839          */
3840
3841         vm_page_lock_queues();
3842         PMAP_LOCK(pm);
3843         if (!pmap_is_current(pm)) {
3844                 cleanlist_idx = PMAP_REMOVE_CLEAN_LIST_SIZE + 1;
3845         } else
3846                 cleanlist_idx = 0;
3847
3848         total = 0;
3849         while (sva < eva) {
3850                 /*
3851                  * Do one L2 bucket's worth at a time.
3852                  */
3853                 next_bucket = L2_NEXT_BUCKET(sva);
3854                 if (next_bucket > eva)
3855                         next_bucket = eva;
3856
3857                 l2b = pmap_get_l2_bucket(pm, sva);
3858                 if (l2b == NULL) {
3859                         sva = next_bucket;
3860                         continue;
3861                 }
3862
3863                 ptep = &l2b->l2b_kva[l2pte_index(sva)];
3864                 mappings = 0;
3865
3866                 while (sva < next_bucket) {
3867                         struct vm_page *pg;
3868                         pt_entry_t pte;
3869                         vm_paddr_t pa;
3870
3871                         pte = *ptep;
3872
3873                         if (pte == 0) {
3874                                 /*
3875                                  * Nothing here, move along
3876                                  */
3877                                 sva += PAGE_SIZE;
3878                                 ptep++;
3879                                 continue;
3880                         }
3881
3882                         pm->pm_stats.resident_count--;
3883                         pa = l2pte_pa(pte);
3884                         is_exec = 0;
3885                         is_refd = 1;
3886
3887                         /*
3888                          * Update flags. In a number of circumstances,
3889                          * we could cluster a lot of these and do a
3890                          * number of sequential pages in one go.
3891                          */
3892                         if ((pg = PHYS_TO_VM_PAGE(pa)) != NULL) {
3893                                 struct pv_entry *pve;
3894
3895                                 pve = pmap_remove_pv(pg, pm, sva);
3896                                 if (pve) {
3897                                         is_exec = PV_BEEN_EXECD(pve->pv_flags);
3898                                         is_refd = PV_BEEN_REFD(pve->pv_flags);
3899                                         pmap_free_pv_entry(pve);
3900                                 }
3901                         }
3902
3903                         if (!l2pte_valid(pte)) {
3904                                 *ptep = 0;
3905                                 PTE_SYNC_CURRENT(pm, ptep);
3906                                 sva += PAGE_SIZE;
3907                                 ptep++;
3908                                 mappings++;
3909                                 continue;
3910                         }
3911
3912                         if (cleanlist_idx < PMAP_REMOVE_CLEAN_LIST_SIZE) {
3913                                 /* Add to the clean list. */
3914                                 cleanlist[cleanlist_idx].pte = ptep;
3915                                 cleanlist[cleanlist_idx].va =
3916                                     sva | (is_exec & 1);
3917                                 cleanlist_idx++;
3918                         } else
3919                         if (cleanlist_idx == PMAP_REMOVE_CLEAN_LIST_SIZE) {
3920                                 /* Nuke everything if needed. */
3921                                 pmap_idcache_wbinv_all(pm);
3922                                 pmap_tlb_flushID(pm);
3923
3924                                 /*
3925                                  * Roll back the previous PTE list,
3926                                  * and zero out the current PTE.
3927                                  */
3928                                 for (cnt = 0;
3929                                      cnt < PMAP_REMOVE_CLEAN_LIST_SIZE; cnt++) {
3930                                         *cleanlist[cnt].pte = 0;
3931                                 }
3932                                 *ptep = 0;
3933                                 PTE_SYNC(ptep);
3934                                 cleanlist_idx++;
3935                                 flushall = 1;
3936                         } else {
3937                                 *ptep = 0;
3938                                 PTE_SYNC(ptep);
3939                                         if (is_exec)
3940                                                 pmap_tlb_flushID_SE(pm, sva);
3941                                         else
3942                                         if (is_refd)
3943                                                 pmap_tlb_flushD_SE(pm, sva);
3944                         }
3945
3946                         sva += PAGE_SIZE;
3947                         ptep++;
3948                         mappings++;
3949                 }
3950
3951                 /*
3952                  * Deal with any left overs
3953                  */
3954                 if (cleanlist_idx <= PMAP_REMOVE_CLEAN_LIST_SIZE) {
3955                         total += cleanlist_idx;
3956                         for (cnt = 0; cnt < cleanlist_idx; cnt++) {
3957                                 vm_offset_t clva =
3958                                     cleanlist[cnt].va & ~1;
3959                                 if (cleanlist[cnt].va & 1) {
3960                                         pmap_idcache_wbinv_range(pm,
3961                                             clva, PAGE_SIZE);
3962                                         pmap_tlb_flushID_SE(pm, clva);
3963                                 } else {
3964                                         pmap_dcache_wb_range(pm,
3965                                             clva, PAGE_SIZE, TRUE,
3966                                             FALSE);
3967                                         pmap_tlb_flushD_SE(pm, clva);
3968                                 }
3969                                 *cleanlist[cnt].pte = 0;
3970                                 PTE_SYNC_CURRENT(pm, cleanlist[cnt].pte);
3971                         }
3972
3973                         if (total <= PMAP_REMOVE_CLEAN_LIST_SIZE)
3974                                 cleanlist_idx = 0;
3975                         else {
3976                                 /*
3977                                  * We are removing so much entries it's just
3978                                  * easier to flush the whole cache.
3979                                  */
3980                                 cleanlist_idx = PMAP_REMOVE_CLEAN_LIST_SIZE + 1;
3981                                 pmap_idcache_wbinv_all(pm);
3982                                 flushall = 1;
3983                         }
3984                 }
3985
3986                 pmap_free_l2_bucket(pm, l2b, mappings);
3987         }
3988
3989         vm_page_unlock_queues();
3990         if (flushall)
3991                 cpu_tlb_flushID();
3992         PMAP_UNLOCK(pm);
3993 }
3994
3995
3996
3997
3998 /*
3999  * pmap_zero_page()
4000  *
4001  * Zero a given physical page by mapping it at a page hook point.
4002  * In doing the zero page op, the page we zero is mapped cachable, as with
4003  * StrongARM accesses to non-cached pages are non-burst making writing
4004  * _any_ bulk data very slow.
4005  */
4006 #if (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0
4007 void
4008 pmap_zero_page_generic(vm_paddr_t phys, int off, int size)
4009 {
4010 #ifdef ARM_USE_SMALL_ALLOC
4011         char *dstpg;
4012 #endif
4013
4014 #ifdef DEBUG
4015         struct vm_page *pg = PHYS_TO_VM_PAGE(phys);
4016
4017         if (pg->md.pvh_list != NULL)
4018                 panic("pmap_zero_page: page has mappings");
4019 #endif
4020
4021         if (_arm_bzero &&
4022             _arm_bzero((void *)(phys + off), size, IS_PHYSICAL) == 0)
4023                 return;
4024
4025 #ifdef ARM_USE_SMALL_ALLOC
4026         dstpg = (char *)arm_ptovirt(phys);
4027         if (off || size != PAGE_SIZE) {
4028                 bzero(dstpg + off, size);
4029                 cpu_dcache_wbinv_range((vm_offset_t)(dstpg + off), size);
4030         } else {
4031                 bzero_page((vm_offset_t)dstpg);
4032                 cpu_dcache_wbinv_range((vm_offset_t)dstpg, PAGE_SIZE);
4033         }
4034 #else
4035
4036         mtx_lock(&cmtx);
4037         /*
4038          * Hook in the page, zero it, and purge the cache for that
4039          * zeroed page. Invalidate the TLB as needed.
4040          */
4041         *cdst_pte = L2_S_PROTO | phys |
4042             L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
4043         PTE_SYNC(cdst_pte);
4044         cpu_tlb_flushD_SE(cdstp);
4045         cpu_cpwait();
4046         if (off || size != PAGE_SIZE) {
4047                 bzero((void *)(cdstp + off), size);
4048                 cpu_dcache_wbinv_range(cdstp + off, size);
4049         } else {
4050                 bzero_page(cdstp);
4051                 cpu_dcache_wbinv_range(cdstp, PAGE_SIZE);
4052         }
4053         mtx_unlock(&cmtx);
4054 #endif
4055 }
4056 #endif /* (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 */
4057
4058 #if ARM_MMU_XSCALE == 1
4059 void
4060 pmap_zero_page_xscale(vm_paddr_t phys, int off, int size)
4061 {
4062         if (_arm_bzero &&
4063             _arm_bzero((void *)(phys + off), size, IS_PHYSICAL) == 0)
4064                 return;
4065         mtx_lock(&cmtx);
4066         /*
4067          * Hook in the page, zero it, and purge the cache for that
4068          * zeroed page. Invalidate the TLB as needed.
4069          */
4070         *cdst_pte = L2_S_PROTO | phys |
4071             L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) |
4072             L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);       /* mini-data */
4073         PTE_SYNC(cdst_pte);
4074         cpu_tlb_flushD_SE(cdstp);
4075         cpu_cpwait();
4076         if (off || size != PAGE_SIZE)
4077                 bzero((void *)(cdstp + off), size);
4078         else
4079                 bzero_page(cdstp);
4080         mtx_unlock(&cmtx);
4081         xscale_cache_clean_minidata();
4082 }
4083
4084 /*
4085  * Change the PTEs for the specified kernel mappings such that they
4086  * will use the mini data cache instead of the main data cache.
4087  */
4088 void
4089 pmap_use_minicache(vm_offset_t va, vm_size_t size)
4090 {
4091         struct l2_bucket *l2b;
4092         pt_entry_t *ptep, *sptep, pte;
4093         vm_offset_t next_bucket, eva;
4094
4095 #if (ARM_NMMUS > 1)
4096         if (xscale_use_minidata == 0)
4097                 return;
4098 #endif
4099
4100         eva = va + size;
4101
4102         while (va < eva) {
4103                 next_bucket = L2_NEXT_BUCKET(va);
4104                 if (next_bucket > eva)
4105                         next_bucket = eva;
4106
4107                 l2b = pmap_get_l2_bucket(pmap_kernel(), va);
4108
4109                 sptep = ptep = &l2b->l2b_kva[l2pte_index(va)];
4110
4111                 while (va < next_bucket) {
4112                         pte = *ptep;
4113                         if (!l2pte_minidata(pte)) {
4114                                 cpu_dcache_wbinv_range(va, PAGE_SIZE);
4115                                 cpu_tlb_flushD_SE(va);
4116                                 *ptep = pte & ~L2_B;
4117                         }
4118                         ptep++;
4119                         va += PAGE_SIZE;
4120                 }
4121                 PTE_SYNC_RANGE(sptep, (u_int)(ptep - sptep));
4122         }
4123         cpu_cpwait();
4124 }
4125 #endif /* ARM_MMU_XSCALE == 1 */
4126
4127 /*
4128  *      pmap_zero_page zeros the specified hardware page by mapping
4129  *      the page into KVM and using bzero to clear its contents.
4130  */
4131 void
4132 pmap_zero_page(vm_page_t m)
4133 {
4134         pmap_zero_page_func(VM_PAGE_TO_PHYS(m), 0, PAGE_SIZE);
4135 }
4136
4137
4138 /*
4139  *      pmap_zero_page_area zeros the specified hardware page by mapping
4140  *      the page into KVM and using bzero to clear its contents.
4141  *
4142  *      off and size may not cover an area beyond a single hardware page.
4143  */
4144 void
4145 pmap_zero_page_area(vm_page_t m, int off, int size)
4146 {
4147
4148         pmap_zero_page_func(VM_PAGE_TO_PHYS(m), off, size);
4149 }
4150
4151
4152 /*
4153  *      pmap_zero_page_idle zeros the specified hardware page by mapping
4154  *      the page into KVM and using bzero to clear its contents.  This
4155  *      is intended to be called from the vm_pagezero process only and
4156  *      outside of Giant.
4157  */
4158 void
4159 pmap_zero_page_idle(vm_page_t m)
4160 {
4161
4162         pmap_zero_page(m);
4163 }
4164
4165 #if 0
4166 /*
4167  * pmap_clean_page()
4168  *
4169  * This is a local function used to work out the best strategy to clean
4170  * a single page referenced by its entry in the PV table. It's used by
4171  * pmap_copy_page, pmap_zero page and maybe some others later on.
4172  *
4173  * Its policy is effectively:
4174  *  o If there are no mappings, we don't bother doing anything with the cache.
4175  *  o If there is one mapping, we clean just that page.
4176  *  o If there are multiple mappings, we clean the entire cache.
4177  *
4178  * So that some functions can be further optimised, it returns 0 if it didn't
4179  * clean the entire cache, or 1 if it did.
4180  *
4181  * XXX One bug in this routine is that if the pv_entry has a single page
4182  * mapped at 0x00000000 a whole cache clean will be performed rather than
4183  * just the 1 page. Since this should not occur in everyday use and if it does
4184  * it will just result in not the most efficient clean for the page.
4185  */
4186 static int
4187 pmap_clean_page(struct pv_entry *pv, boolean_t is_src)
4188 {
4189         pmap_t pm, pm_to_clean = NULL;
4190         struct pv_entry *npv;
4191         u_int cache_needs_cleaning = 0;
4192         u_int flags = 0;
4193         vm_offset_t page_to_clean = 0;
4194
4195         if (pv == NULL) {
4196                 /* nothing mapped in so nothing to flush */
4197                 return (0);
4198         }
4199
4200         /*
4201          * Since we flush the cache each time we change to a different
4202          * user vmspace, we only need to flush the page if it is in the
4203          * current pmap.
4204          */
4205         if (curthread)
4206                 pm = vmspace_pmap(curproc->p_vmspace);
4207         else
4208                 pm = pmap_kernel();
4209
4210         for (npv = pv; npv; npv = TAILQ_NEXT(npv, pv_list)) {
4211                 if (npv->pv_pmap == pmap_kernel() || npv->pv_pmap == pm) {
4212                         flags |= npv->pv_flags;
4213                         /*
4214                          * The page is mapped non-cacheable in
4215                          * this map.  No need to flush the cache.
4216                          */
4217                         if (npv->pv_flags & PVF_NC) {
4218 #ifdef DIAGNOSTIC
4219                                 if (cache_needs_cleaning)
4220                                         panic("pmap_clean_page: "
4221                                             "cache inconsistency");
4222 #endif
4223                                 break;
4224                         } else if (is_src && (npv->pv_flags & PVF_WRITE) == 0)
4225                                 continue;
4226                         if (cache_needs_cleaning) {
4227                                 page_to_clean = 0;
4228                                 break;
4229                         } else {
4230                                 page_to_clean = npv->pv_va;
4231                                 pm_to_clean = npv->pv_pmap;
4232                         }
4233                         cache_needs_cleaning = 1;
4234                 }
4235         }
4236         if (page_to_clean) {
4237                 if (PV_BEEN_EXECD(flags))
4238                         pmap_idcache_wbinv_range(pm_to_clean, page_to_clean,
4239                             PAGE_SIZE);
4240                 else
4241                         pmap_dcache_wb_range(pm_to_clean, page_to_clean,
4242                             PAGE_SIZE, !is_src, (flags & PVF_WRITE) == 0);
4243         } else if (cache_needs_cleaning) {
4244                 if (PV_BEEN_EXECD(flags))
4245                         pmap_idcache_wbinv_all(pm);
4246                 else
4247                         pmap_dcache_wbinv_all(pm);
4248                 return (1);
4249         }
4250         return (0);
4251 }
4252 #endif
4253
4254 /*
4255  *      pmap_copy_page copies the specified (machine independent)
4256  *      page by mapping the page into virtual memory and using
4257  *      bcopy to copy the page, one machine dependent page at a
4258  *      time.
4259  */
4260
4261 /*
4262  * pmap_copy_page()
4263  *
4264  * Copy one physical page into another, by mapping the pages into
4265  * hook points. The same comment regarding cachability as in
4266  * pmap_zero_page also applies here.
4267  */
4268 #if  (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0
4269 void
4270 pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst)
4271 {
4272 #if 0
4273         struct vm_page *src_pg = PHYS_TO_VM_PAGE(src);
4274 #endif
4275 #ifdef DEBUG
4276         struct vm_page *dst_pg = PHYS_TO_VM_PAGE(dst);
4277
4278         if (dst_pg->md.pvh_list != NULL)
4279                 panic("pmap_copy_page: dst page has mappings");
4280 #endif
4281
4282
4283         /*
4284          * Clean the source page.  Hold the source page's lock for
4285          * the duration of the copy so that no other mappings can
4286          * be created while we have a potentially aliased mapping.
4287          */
4288 #if 0
4289         /*
4290          * XXX: Not needed while we call cpu_dcache_wbinv_all() in
4291          * pmap_copy_page().
4292          */
4293         (void) pmap_clean_page(TAILQ_FIRST(&src_pg->md.pv_list), TRUE);
4294 #endif
4295         /*
4296          * Map the pages into the page hook points, copy them, and purge
4297          * the cache for the appropriate page. Invalidate the TLB
4298          * as required.
4299          */
4300         mtx_lock(&cmtx);
4301         *csrc_pte = L2_S_PROTO | src |
4302             L2_S_PROT(PTE_KERNEL, VM_PROT_READ) | pte_l2_s_cache_mode;
4303         PTE_SYNC(csrc_pte);
4304         *cdst_pte = L2_S_PROTO | dst |
4305             L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
4306         PTE_SYNC(cdst_pte);
4307         cpu_tlb_flushD_SE(csrcp);
4308         cpu_tlb_flushD_SE(cdstp);
4309         cpu_cpwait();
4310         bcopy_page(csrcp, cdstp);
4311         mtx_unlock(&cmtx);
4312         cpu_dcache_inv_range(csrcp, PAGE_SIZE);
4313         cpu_dcache_wbinv_range(cdstp, PAGE_SIZE);
4314 }
4315 #endif /* (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 */
4316
4317 #if ARM_MMU_XSCALE == 1
4318 void
4319 pmap_copy_page_xscale(vm_paddr_t src, vm_paddr_t dst)
4320 {
4321 #if 0
4322         /* XXX: Only needed for pmap_clean_page(), which is commented out. */
4323         struct vm_page *src_pg = PHYS_TO_VM_PAGE(src);
4324 #endif
4325 #ifdef DEBUG
4326         struct vm_page *dst_pg = PHYS_TO_VM_PAGE(dst);
4327
4328         if (dst_pg->md.pvh_list != NULL)
4329                 panic("pmap_copy_page: dst page has mappings");
4330 #endif
4331
4332
4333         /*
4334          * Clean the source page.  Hold the source page's lock for
4335          * the duration of the copy so that no other mappings can
4336          * be created while we have a potentially aliased mapping.
4337          */
4338 #if 0
4339         /*
4340          * XXX: Not needed while we call cpu_dcache_wbinv_all() in
4341          * pmap_copy_page().
4342          */
4343         (void) pmap_clean_page(TAILQ_FIRST(&src_pg->md.pv_list), TRUE);
4344 #endif
4345         /*
4346          * Map the pages into the page hook points, copy them, and purge
4347          * the cache for the appropriate page. Invalidate the TLB
4348          * as required.
4349          */
4350         mtx_lock(&cmtx);
4351         *csrc_pte = L2_S_PROTO | src |
4352             L2_S_PROT(PTE_KERNEL, VM_PROT_READ) |
4353             L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);       /* mini-data */
4354         PTE_SYNC(csrc_pte);
4355         *cdst_pte = L2_S_PROTO | dst |
4356             L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) |
4357             L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);       /* mini-data */
4358         PTE_SYNC(cdst_pte);
4359         cpu_tlb_flushD_SE(csrcp);
4360         cpu_tlb_flushD_SE(cdstp);
4361         cpu_cpwait();
4362         bcopy_page(csrcp, cdstp);
4363         mtx_unlock(&cmtx);
4364         xscale_cache_clean_minidata();
4365 }
4366 #endif /* ARM_MMU_XSCALE == 1 */
4367
4368 void
4369 pmap_copy_page(vm_page_t src, vm_page_t dst)
4370 {
4371 #ifdef ARM_USE_SMALL_ALLOC
4372         vm_offset_t srcpg, dstpg;
4373 #endif
4374
4375         cpu_dcache_wbinv_all();
4376         if (_arm_memcpy &&
4377             _arm_memcpy((void *)VM_PAGE_TO_PHYS(dst),
4378             (void *)VM_PAGE_TO_PHYS(src), PAGE_SIZE, IS_PHYSICAL) == 0)
4379                 return;
4380 #ifdef ARM_USE_SMALL_ALLOC
4381         srcpg = arm_ptovirt(VM_PAGE_TO_PHYS(src));
4382         dstpg = arm_ptovirt(VM_PAGE_TO_PHYS(dst));
4383         bcopy_page(srcpg, dstpg);
4384         cpu_dcache_wbinv_range(dstpg, PAGE_SIZE);
4385 #else
4386         pmap_copy_page_func(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
4387 #endif
4388 }
4389
4390
4391
4392
4393 /*
4394  * this routine returns true if a physical page resides
4395  * in the given pmap.
4396  */
4397 boolean_t
4398 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
4399 {
4400         pv_entry_t pv;
4401         int loops = 0;
4402
4403         if (m->flags & PG_FICTITIOUS)
4404                 return (FALSE);
4405
4406         /*
4407          * Not found, check current mappings returning immediately
4408          */
4409         for (pv = TAILQ_FIRST(&m->md.pv_list);
4410             pv;
4411             pv = TAILQ_NEXT(pv, pv_list)) {
4412                 if (pv->pv_pmap == pmap) {
4413                         return (TRUE);
4414                 }
4415                 loops++;
4416                 if (loops >= 16)
4417                         break;
4418         }
4419         return (FALSE);
4420 }
4421
4422
4423 /*
4424  *      pmap_ts_referenced:
4425  *
4426  *      Return the count of reference bits for a page, clearing all of them.
4427  */
4428 int
4429 pmap_ts_referenced(vm_page_t m)
4430 {
4431         return (pmap_clearbit(m, PVF_REF));
4432 }
4433
4434
4435 boolean_t
4436 pmap_is_modified(vm_page_t m)
4437 {
4438
4439         if (m->md.pvh_attrs & PVF_MOD)
4440                 return (TRUE);
4441
4442         return(FALSE);
4443 }
4444
4445
4446 /*
4447  *      Clear the modify bits on the specified physical page.
4448  */
4449 void
4450 pmap_clear_modify(vm_page_t m)
4451 {
4452
4453         if (m->md.pvh_attrs & PVF_MOD)
4454                 pmap_clearbit(m, PVF_MOD);
4455 }
4456
4457
4458 /*
4459  *      pmap_clear_reference:
4460  *
4461  *      Clear the reference bit on the specified physical page.
4462  */
4463 void
4464 pmap_clear_reference(vm_page_t m)
4465 {
4466
4467         if (m->md.pvh_attrs & PVF_REF)
4468                 pmap_clearbit(m, PVF_REF);
4469 }
4470
4471
4472 /*
4473  * Clear the write and modified bits in each of the given page's mappings.
4474  */
4475 void
4476 pmap_remove_write(vm_page_t m)
4477 {
4478
4479         if (m->flags & PG_WRITEABLE)
4480                 pmap_clearbit(m, PVF_WRITE);
4481 }
4482
4483
4484 /*
4485  * perform the pmap work for mincore
4486  */
4487 int
4488 pmap_mincore(pmap_t pmap, vm_offset_t addr)
4489 {
4490         printf("pmap_mincore()\n");
4491
4492         return (0);
4493 }
4494
4495
4496 vm_offset_t
4497 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
4498 {
4499
4500         return(addr);
4501 }
4502
4503
4504 /*
4505  * Map a set of physical memory pages into the kernel virtual
4506  * address space. Return a pointer to where it is mapped. This
4507  * routine is intended to be used for mapping device memory,
4508  * NOT real memory.
4509  */
4510 void *
4511 pmap_mapdev(vm_offset_t pa, vm_size_t size)
4512 {
4513         vm_offset_t va, tmpva, offset;
4514
4515         offset = pa & PAGE_MASK;
4516         size = roundup(size, PAGE_SIZE);
4517
4518         GIANT_REQUIRED;
4519
4520         va = kmem_alloc_nofault(kernel_map, size);
4521         if (!va)
4522                 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
4523         for (tmpva = va; size > 0;) {
4524                 pmap_kenter_internal(tmpva, pa, 0);
4525                 size -= PAGE_SIZE;
4526                 tmpva += PAGE_SIZE;
4527                 pa += PAGE_SIZE;
4528         }
4529
4530         return ((void *)(va + offset));
4531 }
4532
4533 #define BOOTSTRAP_DEBUG
4534
4535 /*
4536  * pmap_map_section:
4537  *
4538  *      Create a single section mapping.
4539  */
4540 void
4541 pmap_map_section(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa,
4542     int prot, int cache)
4543 {
4544         pd_entry_t *pde = (pd_entry_t *) l1pt;
4545         pd_entry_t fl;
4546
4547         KASSERT(((va | pa) & L1_S_OFFSET) == 0, ("ouin2"));
4548
4549         switch (cache) {
4550         case PTE_NOCACHE:
4551         default:
4552                 fl = 0;
4553                 break;
4554
4555         case PTE_CACHE:
4556                 fl = pte_l1_s_cache_mode;
4557                 break;
4558
4559         case PTE_PAGETABLE:
4560                 fl = pte_l1_s_cache_mode_pt;
4561                 break;
4562         }
4563
4564         pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa |
4565             L1_S_PROT(PTE_KERNEL, prot) | fl | L1_S_DOM(PMAP_DOMAIN_KERNEL);
4566         PTE_SYNC(&pde[va >> L1_S_SHIFT]);
4567
4568 }
4569
4570 /*
4571  * pmap_link_l2pt:
4572  *
4573  *      Link the L2 page table specified by l2pv.pv_pa into the L1
4574  *      page table at the slot for "va".
4575  */
4576 void
4577 pmap_link_l2pt(vm_offset_t l1pt, vm_offset_t va, struct pv_addr *l2pv)
4578 {
4579         pd_entry_t *pde = (pd_entry_t *) l1pt, proto;
4580         u_int slot = va >> L1_S_SHIFT;
4581
4582         proto = L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_C_PROTO;
4583
4584 #ifdef VERBOSE_INIT_ARM
4585         printf("pmap_link_l2pt: pa=0x%x va=0x%x\n", l2pv->pv_pa, l2pv->pv_va);
4586 #endif
4587
4588         pde[slot + 0] = proto | (l2pv->pv_pa + 0x000);
4589
4590         PTE_SYNC(&pde[slot]);
4591
4592         SLIST_INSERT_HEAD(&kernel_pt_list, l2pv, pv_list);
4593
4594
4595 }
4596
4597 /*
4598  * pmap_map_entry
4599  *
4600  *      Create a single page mapping.
4601  */
4602 void
4603 pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot,
4604     int cache)
4605 {
4606         pd_entry_t *pde = (pd_entry_t *) l1pt;
4607         pt_entry_t fl;
4608         pt_entry_t *pte;
4609
4610         KASSERT(((va | pa) & PAGE_MASK) == 0, ("ouin"));
4611
4612         switch (cache) {
4613         case PTE_NOCACHE:
4614         default:
4615                 fl = 0;
4616                 break;
4617
4618         case PTE_CACHE:
4619                 fl = pte_l2_s_cache_mode;
4620                 break;
4621
4622         case PTE_PAGETABLE:
4623                 fl = pte_l2_s_cache_mode_pt;
4624                 break;
4625         }
4626
4627         if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
4628                 panic("pmap_map_entry: no L2 table for VA 0x%08x", va);
4629
4630         pte = (pt_entry_t *) kernel_pt_lookup(pde[L1_IDX(va)] & L1_C_ADDR_MASK);
4631
4632         if (pte == NULL)
4633                 panic("pmap_map_entry: can't find L2 table for VA 0x%08x", va);
4634
4635         pte[l2pte_index(va)] =
4636             L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, prot) | fl;
4637         PTE_SYNC(&pte[l2pte_index(va)]);
4638 }
4639
4640 /*
4641  * pmap_map_chunk:
4642  *
4643  *      Map a chunk of memory using the most efficient mappings
4644  *      possible (section. large page, small page) into the
4645  *      provided L1 and L2 tables at the specified virtual address.
4646  */
4647 vm_size_t
4648 pmap_map_chunk(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa,
4649     vm_size_t size, int prot, int cache)
4650 {
4651         pd_entry_t *pde = (pd_entry_t *) l1pt;
4652         pt_entry_t *pte, f1, f2s, f2l;
4653         vm_size_t resid;
4654         int i;
4655
4656         resid = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
4657
4658         if (l1pt == 0)
4659                 panic("pmap_map_chunk: no L1 table provided");
4660
4661 #ifdef VERBOSE_INIT_ARM
4662         printf("pmap_map_chunk: pa=0x%x va=0x%x size=0x%x resid=0x%x "
4663             "prot=0x%x cache=%d\n", pa, va, size, resid, prot, cache);
4664 #endif
4665
4666         switch (cache) {
4667         case PTE_NOCACHE:
4668         default:
4669                 f1 = 0;
4670                 f2l = 0;
4671                 f2s = 0;
4672                 break;
4673
4674         case PTE_CACHE:
4675                 f1 = pte_l1_s_cache_mode;
4676                 f2l = pte_l2_l_cache_mode;
4677                 f2s = pte_l2_s_cache_mode;
4678                 break;
4679
4680         case PTE_PAGETABLE:
4681                 f1 = pte_l1_s_cache_mode_pt;
4682                 f2l = pte_l2_l_cache_mode_pt;
4683                 f2s = pte_l2_s_cache_mode_pt;
4684                 break;
4685         }
4686
4687         size = resid;
4688
4689         while (resid > 0) {
4690                 /* See if we can use a section mapping. */
4691                 if (L1_S_MAPPABLE_P(va, pa, resid)) {
4692 #ifdef VERBOSE_INIT_ARM
4693                         printf("S");
4694 #endif
4695                         pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa |
4696                             L1_S_PROT(PTE_KERNEL, prot) | f1 |
4697                             L1_S_DOM(PMAP_DOMAIN_KERNEL);
4698                         PTE_SYNC(&pde[va >> L1_S_SHIFT]);
4699                         va += L1_S_SIZE;
4700                         pa += L1_S_SIZE;
4701                         resid -= L1_S_SIZE;
4702                         continue;
4703                 }
4704
4705                 /*
4706                  * Ok, we're going to use an L2 table.  Make sure
4707                  * one is actually in the corresponding L1 slot
4708                  * for the current VA.
4709                  */
4710                 if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
4711                         panic("pmap_map_chunk: no L2 table for VA 0x%08x", va);
4712
4713                 pte = (pt_entry_t *) kernel_pt_lookup(
4714                     pde[L1_IDX(va)] & L1_C_ADDR_MASK);
4715                 if (pte == NULL)
4716                         panic("pmap_map_chunk: can't find L2 table for VA"
4717                             "0x%08x", va);
4718                 /* See if we can use a L2 large page mapping. */
4719                 if (L2_L_MAPPABLE_P(va, pa, resid)) {
4720 #ifdef VERBOSE_INIT_ARM
4721                         printf("L");
4722 #endif
4723                         for (i = 0; i < 16; i++) {
4724                                 pte[l2pte_index(va) + i] =
4725                                     L2_L_PROTO | pa |
4726                                     L2_L_PROT(PTE_KERNEL, prot) | f2l;
4727                                 PTE_SYNC(&pte[l2pte_index(va) + i]);
4728                         }
4729                         va += L2_L_SIZE;
4730                         pa += L2_L_SIZE;
4731                         resid -= L2_L_SIZE;
4732                         continue;
4733                 }
4734
4735                 /* Use a small page mapping. */
4736 #ifdef VERBOSE_INIT_ARM
4737                 printf("P");
4738 #endif
4739                 pte[l2pte_index(va)] =
4740                     L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, prot) | f2s;
4741                 PTE_SYNC(&pte[l2pte_index(va)]);
4742                 va += PAGE_SIZE;
4743                 pa += PAGE_SIZE;
4744                 resid -= PAGE_SIZE;
4745         }
4746 #ifdef VERBOSE_INIT_ARM
4747         printf("\n");
4748 #endif
4749         return (size);
4750
4751 }
4752
4753 /********************** Static device map routines ***************************/
4754
4755 static const struct pmap_devmap *pmap_devmap_table;
4756
4757 /*
4758  * Register the devmap table.  This is provided in case early console
4759  * initialization needs to register mappings created by bootstrap code
4760  * before pmap_devmap_bootstrap() is called.
4761  */
4762 void
4763 pmap_devmap_register(const struct pmap_devmap *table)
4764 {
4765
4766         pmap_devmap_table = table;
4767 }
4768
4769 /*
4770  * Map all of the static regions in the devmap table, and remember
4771  * the devmap table so other parts of the kernel can look up entries
4772  * later.
4773  */
4774 void
4775 pmap_devmap_bootstrap(vm_offset_t l1pt, const struct pmap_devmap *table)
4776 {
4777         int i;
4778
4779         pmap_devmap_table = table;
4780
4781         for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) {
4782 #ifdef VERBOSE_INIT_ARM
4783                 printf("devmap: %08x -> %08x @ %08x\n",
4784                     pmap_devmap_table[i].pd_pa,
4785                     pmap_devmap_table[i].pd_pa +
4786                         pmap_devmap_table[i].pd_size - 1,
4787                     pmap_devmap_table[i].pd_va);
4788 #endif
4789                 pmap_map_chunk(l1pt, pmap_devmap_table[i].pd_va,
4790                     pmap_devmap_table[i].pd_pa,
4791                     pmap_devmap_table[i].pd_size,
4792                     pmap_devmap_table[i].pd_prot,
4793                     pmap_devmap_table[i].pd_cache);
4794         }
4795 }
4796
4797 const struct pmap_devmap *
4798 pmap_devmap_find_pa(vm_paddr_t pa, vm_size_t size)
4799 {
4800         int i;
4801
4802         if (pmap_devmap_table == NULL)
4803                 return (NULL);
4804
4805         for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) {
4806                 if (pa >= pmap_devmap_table[i].pd_pa &&
4807                     pa + size <= pmap_devmap_table[i].pd_pa +
4808                                  pmap_devmap_table[i].pd_size)
4809                         return (&pmap_devmap_table[i]);
4810         }
4811
4812         return (NULL);
4813 }
4814
4815 const struct pmap_devmap *
4816 pmap_devmap_find_va(vm_offset_t va, vm_size_t size)
4817 {
4818         int i;
4819
4820         if (pmap_devmap_table == NULL)
4821                 return (NULL);
4822
4823         for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) {
4824                 if (va >= pmap_devmap_table[i].pd_va &&
4825                     va + size <= pmap_devmap_table[i].pd_va +
4826                                  pmap_devmap_table[i].pd_size)
4827                         return (&pmap_devmap_table[i]);
4828         }
4829
4830         return (NULL);
4831 }
4832