sys/powerpc/booke/pmap.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause
   3  *
   4  * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com>
   5  * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com>
   6  * All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
  20  * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  22  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  24  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  25  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  *
  28  * Some hw specific parts of this pmap were derived or influenced
  29  * by NetBSD's ibm4xx pmap module. More generic code is shared with
  30  * a few other pmap modules from the FreeBSD tree.
  31  */
  32
  33  /*
  34   * VM layout notes:
  35   *
  36   * Kernel and user threads run within one common virtual address space
  37   * defined by AS=0.
  38   *
  39   * 32-bit pmap:
  40   * Virtual address space layout:
  41   * -----------------------------
  42   * 0x0000_0000 - 0x7fff_ffff   : user process
  43   * 0x8000_0000 - 0xbfff_ffff   : pmap_mapdev()-ed area (PCI/PCIE etc.)
  44   * 0xc000_0000 - 0xc0ff_ffff   : kernel reserved
  45   *   0xc000_0000 - data_end    : kernel code+data, env, metadata etc.
  46   * 0xc100_0000 - 0xffff_ffff   : KVA
  47   *   0xc100_0000 - 0xc100_3fff : reserved for page zero/copy
  48   *   0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs
  49   *   0xc200_4000 - 0xc200_8fff : guard page + kstack0
  50   *   0xc200_9000 - 0xfeef_ffff : actual free KVA space
  51   *
  52   * 64-bit pmap:
  53   * Virtual address space layout:
  54   * -----------------------------
  55   * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff      : user process
  56   *   0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff    : text, data, heap, maps, libraries
  57   *   0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff    : mmio region
  58   *   0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff    : stack
  59   * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff      : kernel reserved
  60   *   0xc000_0000_0000_0000 - endkernel-1              : kernel code & data
  61   *               endkernel - msgbufp-1                : flat device tree
  62   *                 msgbufp - kernel_pdir-1            : message buffer
  63   *             kernel_pdir - kernel_pp2d-1            : kernel page directory
  64   *             kernel_pp2d - .                        : kernel pointers to page directory
  65   *      pmap_zero_copy_min - crashdumpmap-1           : reserved for page zero/copy
  66   *            crashdumpmap - ptbl_buf_pool_vabase-1   : reserved for ptbl bufs
  67   *    ptbl_buf_pool_vabase - virtual_avail-1          : user page directories and page tables
  68   *           virtual_avail - 0xcfff_ffff_ffff_ffff    : actual free KVA space
  69   * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff      : coprocessor region
  70   * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff      : mmio region
  71   * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff      : direct map
  72   *   0xf000_0000_0000_0000 - +Maxmem                  : physmem map
  73   *                         - 0xffff_ffff_ffff_ffff    : device direct map
  74   */
  75
  76 #include <sys/cdefs.h>
  77 __FBSDID("$FreeBSD$");
  78
  79 #include "opt_ddb.h"
  80 #include "opt_kstack_pages.h"
  81
  82 #include <sys/param.h>
  83 #include <sys/conf.h>
  84 #include <sys/malloc.h>
  85 #include <sys/ktr.h>
  86 #include <sys/proc.h>
  87 #include <sys/user.h>
  88 #include <sys/queue.h>
  89 #include <sys/systm.h>
  90 #include <sys/kernel.h>
  91 #include <sys/kerneldump.h>
  92 #include <sys/linker.h>
  93 #include <sys/msgbuf.h>
  94 #include <sys/lock.h>
  95 #include <sys/mutex.h>
  96 #include <sys/rwlock.h>
  97 #include <sys/sched.h>
  98 #include <sys/smp.h>
  99 #include <sys/vmmeter.h>
 100
 101 #include <vm/vm.h>
 102 #include <vm/vm_param.h>
 103 #include <vm/vm_page.h>
 104 #include <vm/vm_kern.h>
 105 #include <vm/vm_pageout.h>
 106 #include <vm/vm_extern.h>
 107 #include <vm/vm_object.h>
 108 #include <vm/vm_map.h>
 109 #include <vm/vm_pager.h>
 110 #include <vm/vm_phys.h>
 111 #include <vm/vm_pagequeue.h>
 112 #include <vm/vm_dumpset.h>
 113 #include <vm/uma.h>
 114
 115 #include <machine/_inttypes.h>
 116 #include <machine/cpu.h>
 117 #include <machine/pcb.h>
 118 #include <machine/platform.h>
 119
 120 #include <machine/tlb.h>
 121 #include <machine/spr.h>
 122 #include <machine/md_var.h>
 123 #include <machine/mmuvar.h>
 124 #include <machine/pmap.h>
 125 #include <machine/pte.h>
 126
 127 #include <ddb/ddb.h>
 128
 129 #define SPARSE_MAPDEV
 130
 131 /* Use power-of-two mappings in mmu_booke_mapdev(), to save entries. */
 132 #define POW2_MAPPINGS
 133
 134 #ifdef  DEBUG
 135 #define debugf(fmt, args...) printf(fmt, ##args)
 136 #define __debug_used
 137 #else
 138 #define debugf(fmt, args...)
 139 #define __debug_used    __unused
 140 #endif
 141
 142 #ifdef __powerpc64__
 143 #define PRI0ptrX        "016lx"
 144 #else
 145 #define PRI0ptrX        "08x"
 146 #endif
 147
 148 #define TODO                    panic("%s: not implemented", __func__);
 149
 150 extern unsigned char _etext[];
 151 extern unsigned char _end[];
 152
 153 extern uint32_t *bootinfo;
 154
 155 vm_paddr_t kernload;
 156 vm_offset_t kernstart;
 157 vm_size_t kernsize;
 158
 159 /* Message buffer and tables. */
 160 static vm_offset_t data_start;
 161 static vm_size_t data_end;
 162
 163 /* Phys/avail memory regions. */
 164 static struct mem_region *availmem_regions;
 165 static int availmem_regions_sz;
 166 static struct mem_region *physmem_regions;
 167 static int physmem_regions_sz;
 168
 169 #ifndef __powerpc64__
 170 /* Reserved KVA space and mutex for mmu_booke_zero_page. */
 171 static vm_offset_t zero_page_va;
 172 static struct mtx zero_page_mutex;
 173
 174 /* Reserved KVA space and mutex for mmu_booke_copy_page. */
 175 static vm_offset_t copy_page_src_va;
 176 static vm_offset_t copy_page_dst_va;
 177 static struct mtx copy_page_mutex;
 178 #endif
 179
 180 static struct mtx tlbivax_mutex;
 181
 182 /**************************************************************************/
 183 /* PMAP */
 184 /**************************************************************************/
 185
 186 static int mmu_booke_enter_locked(pmap_t, vm_offset_t, vm_page_t,
 187     vm_prot_t, u_int flags, int8_t psind);
 188
 189 unsigned int kptbl_min;         /* Index of the first kernel ptbl. */
 190 static uma_zone_t ptbl_root_zone;
 191
 192 /*
 193  * If user pmap is processed with mmu_booke_remove and the resident count
 194  * drops to 0, there are no more pages to remove, so we need not continue.
 195  */
 196 #define PMAP_REMOVE_DONE(pmap) \
 197         ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0)
 198
 199 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__)
 200 extern int elf32_nxstack;
 201 #endif
 202
 203 /**************************************************************************/
 204 /* TLB and TID handling */
 205 /**************************************************************************/
 206
 207 /* Translation ID busy table */
 208 static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1];
 209
 210 /*
 211  * TLB0 capabilities (entry, way numbers etc.). These can vary between e500
 212  * core revisions and should be read from h/w registers during early config.
 213  */
 214 uint32_t tlb0_entries;
 215 uint32_t tlb0_ways;
 216 uint32_t tlb0_entries_per_way;
 217 uint32_t tlb1_entries;
 218
 219 #define TLB0_ENTRIES            (tlb0_entries)
 220 #define TLB0_WAYS               (tlb0_ways)
 221 #define TLB0_ENTRIES_PER_WAY    (tlb0_entries_per_way)
 222
 223 #define TLB1_ENTRIES (tlb1_entries)
 224
 225 static tlbtid_t tid_alloc(struct pmap *);
 226
 227 #ifdef DDB
 228 #ifdef __powerpc64__
 229 static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t);
 230 #else
 231 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t);
 232 #endif
 233 #endif
 234
 235 static void tlb1_read_entry(tlb_entry_t *, unsigned int);
 236 static void tlb1_write_entry(tlb_entry_t *, unsigned int);
 237 static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *);
 238 static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t, int);
 239
 240 static __inline uint32_t tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma);
 241
 242 static vm_size_t tsize2size(unsigned int);
 243 static unsigned int size2tsize(vm_size_t);
 244 static unsigned long ilog2(unsigned long);
 245
 246 static void set_mas4_defaults(void);
 247
 248 static inline void tlb0_flush_entry(vm_offset_t);
 249 static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int);
 250
 251 /**************************************************************************/
 252 /* Page table management */
 253 /**************************************************************************/
 254
 255 static struct rwlock_padalign pvh_global_lock;
 256
 257 /* Data for the pv entry allocation mechanism */
 258 static uma_zone_t pvzone;
 259 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
 260
 261 #define PV_ENTRY_ZONE_MIN       2048    /* min pv entries in uma zone */
 262
 263 #ifndef PMAP_SHPGPERPROC
 264 #define PMAP_SHPGPERPROC        200
 265 #endif
 266
 267 static vm_paddr_t pte_vatopa(pmap_t, vm_offset_t);
 268 static int pte_enter(pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t);
 269 static int pte_remove(pmap_t, vm_offset_t, uint8_t);
 270 static pte_t *pte_find(pmap_t, vm_offset_t);
 271 static void kernel_pte_alloc(vm_offset_t, vm_offset_t);
 272
 273 static pv_entry_t pv_alloc(void);
 274 static void pv_free(pv_entry_t);
 275 static void pv_insert(pmap_t, vm_offset_t, vm_page_t);
 276 static void pv_remove(pmap_t, vm_offset_t, vm_page_t);
 277
 278 static void booke_pmap_init_qpages(void);
 279
 280 static inline void tlb_miss_lock(void);
 281 static inline void tlb_miss_unlock(void);
 282
 283 #ifdef SMP
 284 extern tlb_entry_t __boot_tlb1[];
 285 void pmap_bootstrap_ap(volatile uint32_t *);
 286 #endif
 287
 288 /*
 289  * Kernel MMU interface
 290  */
 291 static void             mmu_booke_clear_modify(vm_page_t);
 292 static void             mmu_booke_copy(pmap_t, pmap_t, vm_offset_t,
 293     vm_size_t, vm_offset_t);
 294 static void             mmu_booke_copy_page(vm_page_t, vm_page_t);
 295 static void             mmu_booke_copy_pages(vm_page_t *,
 296     vm_offset_t, vm_page_t *, vm_offset_t, int);
 297 static int              mmu_booke_enter(pmap_t, vm_offset_t, vm_page_t,
 298     vm_prot_t, u_int flags, int8_t psind);
 299 static void             mmu_booke_enter_object(pmap_t, vm_offset_t, vm_offset_t,
 300     vm_page_t, vm_prot_t);
 301 static void             mmu_booke_enter_quick(pmap_t, vm_offset_t, vm_page_t,
 302     vm_prot_t);
 303 static vm_paddr_t       mmu_booke_extract(pmap_t, vm_offset_t);
 304 static vm_page_t        mmu_booke_extract_and_hold(pmap_t, vm_offset_t,
 305     vm_prot_t);
 306 static void             mmu_booke_init(void);
 307 static boolean_t        mmu_booke_is_modified(vm_page_t);
 308 static boolean_t        mmu_booke_is_prefaultable(pmap_t, vm_offset_t);
 309 static boolean_t        mmu_booke_is_referenced(vm_page_t);
 310 static int              mmu_booke_ts_referenced(vm_page_t);
 311 static vm_offset_t      mmu_booke_map(vm_offset_t *, vm_paddr_t, vm_paddr_t,
 312     int);
 313 static int              mmu_booke_mincore(pmap_t, vm_offset_t,
 314     vm_paddr_t *);
 315 static void             mmu_booke_object_init_pt(pmap_t, vm_offset_t,
 316     vm_object_t, vm_pindex_t, vm_size_t);
 317 static boolean_t        mmu_booke_page_exists_quick(pmap_t, vm_page_t);
 318 static void             mmu_booke_page_init(vm_page_t);
 319 static int              mmu_booke_page_wired_mappings(vm_page_t);
 320 static int              mmu_booke_pinit(pmap_t);
 321 static void             mmu_booke_pinit0(pmap_t);
 322 static void             mmu_booke_protect(pmap_t, vm_offset_t, vm_offset_t,
 323     vm_prot_t);
 324 static void             mmu_booke_qenter(vm_offset_t, vm_page_t *, int);
 325 static void             mmu_booke_qremove(vm_offset_t, int);
 326 static void             mmu_booke_release(pmap_t);
 327 static void             mmu_booke_remove(pmap_t, vm_offset_t, vm_offset_t);
 328 static void             mmu_booke_remove_all(vm_page_t);
 329 static void             mmu_booke_remove_write(vm_page_t);
 330 static void             mmu_booke_unwire(pmap_t, vm_offset_t, vm_offset_t);
 331 static void             mmu_booke_zero_page(vm_page_t);
 332 static void             mmu_booke_zero_page_area(vm_page_t, int, int);
 333 static void             mmu_booke_activate(struct thread *);
 334 static void             mmu_booke_deactivate(struct thread *);
 335 static void             mmu_booke_bootstrap(vm_offset_t, vm_offset_t);
 336 static void             *mmu_booke_mapdev(vm_paddr_t, vm_size_t);
 337 static void             *mmu_booke_mapdev_attr(vm_paddr_t, vm_size_t, vm_memattr_t);
 338 static void             mmu_booke_unmapdev(void *, vm_size_t);
 339 static vm_paddr_t       mmu_booke_kextract(vm_offset_t);
 340 static void             mmu_booke_kenter(vm_offset_t, vm_paddr_t);
 341 static void             mmu_booke_kenter_attr(vm_offset_t, vm_paddr_t, vm_memattr_t);
 342 static void             mmu_booke_kremove(vm_offset_t);
 343 static int              mmu_booke_dev_direct_mapped(vm_paddr_t, vm_size_t);
 344 static void             mmu_booke_sync_icache(pmap_t, vm_offset_t,
 345     vm_size_t);
 346 static void             mmu_booke_dumpsys_map(vm_paddr_t pa, size_t,
 347     void **);
 348 static void             mmu_booke_dumpsys_unmap(vm_paddr_t pa, size_t,
 349     void *);
 350 static void             mmu_booke_scan_init(void);
 351 static vm_offset_t      mmu_booke_quick_enter_page(vm_page_t m);
 352 static void             mmu_booke_quick_remove_page(vm_offset_t addr);
 353 static int              mmu_booke_change_attr(vm_offset_t addr,
 354     vm_size_t sz, vm_memattr_t mode);
 355 static int              mmu_booke_decode_kernel_ptr(vm_offset_t addr,
 356     int *is_user, vm_offset_t *decoded_addr);
 357 static void             mmu_booke_page_array_startup(long);
 358 static boolean_t mmu_booke_page_is_mapped(vm_page_t m);
 359 static bool mmu_booke_ps_enabled(pmap_t pmap);
 360
 361 static struct pmap_funcs mmu_booke_methods = {
 362         /* pmap dispatcher interface */
 363         .clear_modify = mmu_booke_clear_modify,
 364         .copy = mmu_booke_copy,
 365         .copy_page = mmu_booke_copy_page,
 366         .copy_pages = mmu_booke_copy_pages,
 367         .enter = mmu_booke_enter,
 368         .enter_object = mmu_booke_enter_object,
 369         .enter_quick = mmu_booke_enter_quick,
 370         .extract = mmu_booke_extract,
 371         .extract_and_hold = mmu_booke_extract_and_hold,
 372         .init = mmu_booke_init,
 373         .is_modified = mmu_booke_is_modified,
 374         .is_prefaultable = mmu_booke_is_prefaultable,
 375         .is_referenced = mmu_booke_is_referenced,
 376         .ts_referenced = mmu_booke_ts_referenced,
 377         .map = mmu_booke_map,
 378         .mincore = mmu_booke_mincore,
 379         .object_init_pt = mmu_booke_object_init_pt,
 380         .page_exists_quick = mmu_booke_page_exists_quick,
 381         .page_init = mmu_booke_page_init,
 382         .page_wired_mappings =  mmu_booke_page_wired_mappings,
 383         .pinit = mmu_booke_pinit,
 384         .pinit0 = mmu_booke_pinit0,
 385         .protect = mmu_booke_protect,
 386         .qenter = mmu_booke_qenter,
 387         .qremove = mmu_booke_qremove,
 388         .release = mmu_booke_release,
 389         .remove = mmu_booke_remove,
 390         .remove_all = mmu_booke_remove_all,
 391         .remove_write = mmu_booke_remove_write,
 392         .sync_icache = mmu_booke_sync_icache,
 393         .unwire = mmu_booke_unwire,
 394         .zero_page = mmu_booke_zero_page,
 395         .zero_page_area = mmu_booke_zero_page_area,
 396         .activate = mmu_booke_activate,
 397         .deactivate = mmu_booke_deactivate,
 398         .quick_enter_page =  mmu_booke_quick_enter_page,
 399         .quick_remove_page =  mmu_booke_quick_remove_page,
 400         .page_array_startup = mmu_booke_page_array_startup,
 401         .page_is_mapped = mmu_booke_page_is_mapped,
 402         .ps_enabled = mmu_booke_ps_enabled,
 403
 404         /* Internal interfaces */
 405         .bootstrap = mmu_booke_bootstrap,
 406         .dev_direct_mapped = mmu_booke_dev_direct_mapped,
 407         .mapdev = mmu_booke_mapdev,
 408         .mapdev_attr = mmu_booke_mapdev_attr,
 409         .kenter = mmu_booke_kenter,
 410         .kenter_attr = mmu_booke_kenter_attr,
 411         .kextract = mmu_booke_kextract,
 412         .kremove = mmu_booke_kremove,
 413         .unmapdev = mmu_booke_unmapdev,
 414         .change_attr = mmu_booke_change_attr,
 415         .decode_kernel_ptr =  mmu_booke_decode_kernel_ptr,
 416
 417         /* dumpsys() support */
 418         .dumpsys_map_chunk = mmu_booke_dumpsys_map,
 419         .dumpsys_unmap_chunk = mmu_booke_dumpsys_unmap,
 420         .dumpsys_pa_init = mmu_booke_scan_init,
 421 };
 422
 423 MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods);
 424
 425 #ifdef __powerpc64__
 426 #include "pmap_64.c"
 427 #else
 428 #include "pmap_32.c"
 429 #endif
 430
 431 static vm_offset_t tlb1_map_base = VM_MAPDEV_BASE;
 432
 433 static __inline uint32_t
 434 tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma)
 435 {
 436         uint32_t attrib;
 437         int i;
 438
 439         if (ma != VM_MEMATTR_DEFAULT) {
 440                 switch (ma) {
 441                 case VM_MEMATTR_UNCACHEABLE:
 442                         return (MAS2_I | MAS2_G);
 443                 case VM_MEMATTR_WRITE_COMBINING:
 444                 case VM_MEMATTR_WRITE_BACK:
 445                 case VM_MEMATTR_PREFETCHABLE:
 446                         return (MAS2_I);
 447                 case VM_MEMATTR_WRITE_THROUGH:
 448                         return (MAS2_W | MAS2_M);
 449                 case VM_MEMATTR_CACHEABLE:
 450                         return (MAS2_M);
 451                 }
 452         }
 453
 454         /*
 455          * Assume the page is cache inhibited and access is guarded unless
 456          * it's in our available memory array.
 457          */
 458         attrib = _TLB_ENTRY_IO;
 459         for (i = 0; i < physmem_regions_sz; i++) {
 460                 if ((pa >= physmem_regions[i].mr_start) &&
 461                     (pa < (physmem_regions[i].mr_start +
 462                      physmem_regions[i].mr_size))) {
 463                         attrib = _TLB_ENTRY_MEM;
 464                         break;
 465                 }
 466         }
 467
 468         return (attrib);
 469 }
 470
 471 static inline void
 472 tlb_miss_lock(void)
 473 {
 474 #ifdef SMP
 475         struct pcpu *pc;
 476
 477         if (!smp_started)
 478                 return;
 479
 480         STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 481                 if (pc != pcpup) {
 482                         CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, "
 483                             "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke.tlb_lock);
 484
 485                         KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)),
 486                             ("tlb_miss_lock: tried to lock self"));
 487
 488                         tlb_lock(pc->pc_booke.tlb_lock);
 489
 490                         CTR1(KTR_PMAP, "%s: locked", __func__);
 491                 }
 492         }
 493 #endif
 494 }
 495
 496 static inline void
 497 tlb_miss_unlock(void)
 498 {
 499 #ifdef SMP
 500         struct pcpu *pc;
 501
 502         if (!smp_started)
 503                 return;
 504
 505         STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 506                 if (pc != pcpup) {
 507                         CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d",
 508                             __func__, pc->pc_cpuid);
 509
 510                         tlb_unlock(pc->pc_booke.tlb_lock);
 511
 512                         CTR1(KTR_PMAP, "%s: unlocked", __func__);
 513                 }
 514         }
 515 #endif
 516 }
 517
 518 /* Return number of entries in TLB0. */
 519 static __inline void
 520 tlb0_get_tlbconf(void)
 521 {
 522         uint32_t tlb0_cfg;
 523
 524         tlb0_cfg = mfspr(SPR_TLB0CFG);
 525         tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK;
 526         tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT;
 527         tlb0_entries_per_way = tlb0_entries / tlb0_ways;
 528 }
 529
 530 /* Return number of entries in TLB1. */
 531 static __inline void
 532 tlb1_get_tlbconf(void)
 533 {
 534         uint32_t tlb1_cfg;
 535
 536         tlb1_cfg = mfspr(SPR_TLB1CFG);
 537         tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK;
 538 }
 539
 540 /**************************************************************************/
 541 /* Page table related */
 542 /**************************************************************************/
 543
 544 /* Allocate pv_entry structure. */
 545 pv_entry_t
 546 pv_alloc(void)
 547 {
 548         pv_entry_t pv;
 549
 550         pv_entry_count++;
 551         if (pv_entry_count > pv_entry_high_water)
 552                 pagedaemon_wakeup(0); /* XXX powerpc NUMA */
 553         pv = uma_zalloc(pvzone, M_NOWAIT);
 554
 555         return (pv);
 556 }
 557
 558 /* Free pv_entry structure. */
 559 static __inline void
 560 pv_free(pv_entry_t pve)
 561 {
 562
 563         pv_entry_count--;
 564         uma_zfree(pvzone, pve);
 565 }
 566
 567 /* Allocate and initialize pv_entry structure. */
 568 static void
 569 pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m)
 570 {
 571         pv_entry_t pve;
 572
 573         //int su = (pmap == kernel_pmap);
 574         //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su,
 575         //      (u_int32_t)pmap, va, (u_int32_t)m);
 576
 577         pve = pv_alloc();
 578         if (pve == NULL)
 579                 panic("pv_insert: no pv entries!");
 580
 581         pve->pv_pmap = pmap;
 582         pve->pv_va = va;
 583
 584         /* add to pv_list */
 585         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 586         rw_assert(&pvh_global_lock, RA_WLOCKED);
 587
 588         TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link);
 589
 590         //debugf("pv_insert: e\n");
 591 }
 592
 593 /* Destroy pv entry. */
 594 static void
 595 pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m)
 596 {
 597         pv_entry_t pve;
 598
 599         //int su = (pmap == kernel_pmap);
 600         //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va);
 601
 602         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 603         rw_assert(&pvh_global_lock, RA_WLOCKED);
 604
 605         /* find pv entry */
 606         TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) {
 607                 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) {
 608                         /* remove from pv_list */
 609                         TAILQ_REMOVE(&m->md.pv_list, pve, pv_link);
 610                         if (TAILQ_EMPTY(&m->md.pv_list))
 611                                 vm_page_aflag_clear(m, PGA_WRITEABLE);
 612
 613                         /* free pv entry struct */
 614                         pv_free(pve);
 615                         break;
 616                 }
 617         }
 618
 619         //debugf("pv_remove: e\n");
 620 }
 621
 622 /**************************************************************************/
 623 /* PMAP related */
 624 /**************************************************************************/
 625
 626 /*
 627  * This is called during booke_init, before the system is really initialized.
 628  */
 629 static void
 630 mmu_booke_bootstrap(vm_offset_t start, vm_offset_t kernelend)
 631 {
 632         vm_paddr_t phys_kernelend;
 633         struct mem_region *mp, *mp1;
 634         int cnt, i, j;
 635         vm_paddr_t s, e, sz;
 636         vm_paddr_t physsz, hwphyssz;
 637         u_int phys_avail_count __debug_used;
 638         vm_size_t kstack0_sz;
 639         vm_paddr_t kstack0_phys;
 640         vm_offset_t kstack0;
 641         void *dpcpu;
 642
 643         debugf("mmu_booke_bootstrap: entered\n");
 644
 645         /* Set interesting system properties */
 646 #ifdef __powerpc64__
 647         hw_direct_map = 1;
 648 #else
 649         hw_direct_map = 0;
 650 #endif
 651 #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__)
 652         elf32_nxstack = 1;
 653 #endif
 654
 655         /* Initialize invalidation mutex */
 656         mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN);
 657
 658         /* Read TLB0 size and associativity. */
 659         tlb0_get_tlbconf();
 660
 661         /*
 662          * Align kernel start and end address (kernel image).
 663          * Note that kernel end does not necessarily relate to kernsize.
 664          * kernsize is the size of the kernel that is actually mapped.
 665          */
 666         data_start = round_page(kernelend);
 667         data_end = data_start;
 668
 669         /* Allocate the dynamic per-cpu area. */
 670         dpcpu = (void *)data_end;
 671         data_end += DPCPU_SIZE;
 672
 673         /* Allocate space for the message buffer. */
 674         msgbufp = (struct msgbuf *)data_end;
 675         data_end += msgbufsize;
 676         debugf(" msgbufp at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n",
 677             (uintptr_t)msgbufp, data_end);
 678
 679         data_end = round_page(data_end);
 680         data_end = round_page(mmu_booke_alloc_kernel_pgtables(data_end));
 681
 682         /* Retrieve phys/avail mem regions */
 683         mem_regions(&physmem_regions, &physmem_regions_sz,
 684             &availmem_regions, &availmem_regions_sz);
 685
 686         if (PHYS_AVAIL_ENTRIES < availmem_regions_sz)
 687                 panic("mmu_booke_bootstrap: phys_avail too small");
 688
 689         data_end = round_page(data_end);
 690         vm_page_array = (vm_page_t)data_end;
 691         /*
 692          * Get a rough idea (upper bound) on the size of the page array.  The
 693          * vm_page_array will not handle any more pages than we have in the
 694          * avail_regions array, and most likely much less.
 695          */
 696         sz = 0;
 697         for (mp = availmem_regions; mp->mr_size; mp++) {
 698                 sz += mp->mr_size;
 699         }
 700         sz = (round_page(sz) / (PAGE_SIZE + sizeof(struct vm_page)));
 701         data_end += round_page(sz * sizeof(struct vm_page));
 702
 703         /* Pre-round up to 1MB.  This wastes some space, but saves TLB entries */
 704         data_end = roundup2(data_end, 1 << 20);
 705
 706         debugf(" data_end: 0x%"PRI0ptrX"\n", data_end);
 707         debugf(" kernstart: %#zx\n", kernstart);
 708         debugf(" kernsize: %#zx\n", kernsize);
 709
 710         if (data_end - kernstart > kernsize) {
 711                 kernsize += tlb1_mapin_region(kernstart + kernsize,
 712                     kernload + kernsize, (data_end - kernstart) - kernsize,
 713                     _TLB_ENTRY_MEM);
 714         }
 715         data_end = kernstart + kernsize;
 716         debugf(" updated data_end: 0x%"PRI0ptrX"\n", data_end);
 717
 718         /*
 719          * Clear the structures - note we can only do it safely after the
 720          * possible additional TLB1 translations are in place (above) so that
 721          * all range up to the currently calculated 'data_end' is covered.
 722          */
 723         bzero((void *)data_start, data_end - data_start);
 724         dpcpu_init(dpcpu, 0);
 725
 726         /*******************************************************/
 727         /* Set the start and end of kva. */
 728         /*******************************************************/
 729         virtual_avail = round_page(data_end);
 730         virtual_end = VM_MAX_KERNEL_ADDRESS;
 731
 732 #ifndef __powerpc64__
 733         /* Allocate KVA space for page zero/copy operations. */
 734         zero_page_va = virtual_avail;
 735         virtual_avail += PAGE_SIZE;
 736         copy_page_src_va = virtual_avail;
 737         virtual_avail += PAGE_SIZE;
 738         copy_page_dst_va = virtual_avail;
 739         virtual_avail += PAGE_SIZE;
 740         debugf("zero_page_va = 0x%"PRI0ptrX"\n", zero_page_va);
 741         debugf("copy_page_src_va = 0x%"PRI0ptrX"\n", copy_page_src_va);
 742         debugf("copy_page_dst_va = 0x%"PRI0ptrX"\n", copy_page_dst_va);
 743
 744         /* Initialize page zero/copy mutexes. */
 745         mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF);
 746         mtx_init(&copy_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF);
 747
 748         /* Allocate KVA space for ptbl bufs. */
 749         ptbl_buf_pool_vabase = virtual_avail;
 750         virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE;
 751         debugf("ptbl_buf_pool_vabase = 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n",
 752             ptbl_buf_pool_vabase, virtual_avail);
 753 #endif
 754
 755         /* Calculate corresponding physical addresses for the kernel region. */
 756         phys_kernelend = kernload + kernsize;
 757         debugf("kernel image and allocated data:\n");
 758         debugf(" kernload    = 0x%09jx\n", (uintmax_t)kernload);
 759         debugf(" kernstart   = 0x%"PRI0ptrX"\n", kernstart);
 760         debugf(" kernsize    = 0x%"PRI0ptrX"\n", kernsize);
 761
 762         /*
 763          * Remove kernel physical address range from avail regions list. Page
 764          * align all regions.  Non-page aligned memory isn't very interesting
 765          * to us.  Also, sort the entries for ascending addresses.
 766          */
 767
 768         sz = 0;
 769         cnt = availmem_regions_sz;
 770         debugf("processing avail regions:\n");
 771         for (mp = availmem_regions; mp->mr_size; mp++) {
 772                 s = mp->mr_start;
 773                 e = mp->mr_start + mp->mr_size;
 774                 debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e);
 775                 /* Check whether this region holds all of the kernel. */
 776                 if (s < kernload && e > phys_kernelend) {
 777                         availmem_regions[cnt].mr_start = phys_kernelend;
 778                         availmem_regions[cnt++].mr_size = e - phys_kernelend;
 779                         e = kernload;
 780                 }
 781                 /* Look whether this regions starts within the kernel. */
 782                 if (s >= kernload && s < phys_kernelend) {
 783                         if (e <= phys_kernelend)
 784                                 goto empty;
 785                         s = phys_kernelend;
 786                 }
 787                 /* Now look whether this region ends within the kernel. */
 788                 if (e > kernload && e <= phys_kernelend) {
 789                         if (s >= kernload)
 790                                 goto empty;
 791                         e = kernload;
 792                 }
 793                 /* Now page align the start and size of the region. */
 794                 s = round_page(s);
 795                 e = trunc_page(e);
 796                 if (e < s)
 797                         e = s;
 798                 sz = e - s;
 799                 debugf("%09jx-%09jx = %jx\n",
 800                     (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz);
 801
 802                 /* Check whether some memory is left here. */
 803                 if (sz == 0) {
 804                 empty:
 805                         memmove(mp, mp + 1,
 806                             (cnt - (mp - availmem_regions)) * sizeof(*mp));
 807                         cnt--;
 808                         mp--;
 809                         continue;
 810                 }
 811
 812                 /* Do an insertion sort. */
 813                 for (mp1 = availmem_regions; mp1 < mp; mp1++)
 814                         if (s < mp1->mr_start)
 815                                 break;
 816                 if (mp1 < mp) {
 817                         memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1);
 818                         mp1->mr_start = s;
 819                         mp1->mr_size = sz;
 820                 } else {
 821                         mp->mr_start = s;
 822                         mp->mr_size = sz;
 823                 }
 824         }
 825         availmem_regions_sz = cnt;
 826
 827         /*******************************************************/
 828         /* Steal physical memory for kernel stack from the end */
 829         /* of the first avail region                           */
 830         /*******************************************************/
 831         kstack0_sz = kstack_pages * PAGE_SIZE;
 832         kstack0_phys = availmem_regions[0].mr_start +
 833             availmem_regions[0].mr_size;
 834         kstack0_phys -= kstack0_sz;
 835         availmem_regions[0].mr_size -= kstack0_sz;
 836
 837         /*******************************************************/
 838         /* Fill in phys_avail table, based on availmem_regions */
 839         /*******************************************************/
 840         phys_avail_count = 0;
 841         physsz = 0;
 842         hwphyssz = 0;
 843         TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
 844
 845         debugf("fill in phys_avail:\n");
 846         for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) {
 847                 debugf(" region: 0x%jx - 0x%jx (0x%jx)\n",
 848                     (uintmax_t)availmem_regions[i].mr_start,
 849                     (uintmax_t)availmem_regions[i].mr_start +
 850                         availmem_regions[i].mr_size,
 851                     (uintmax_t)availmem_regions[i].mr_size);
 852
 853                 if (hwphyssz != 0 &&
 854                     (physsz + availmem_regions[i].mr_size) >= hwphyssz) {
 855                         debugf(" hw.physmem adjust\n");
 856                         if (physsz < hwphyssz) {
 857                                 phys_avail[j] = availmem_regions[i].mr_start;
 858                                 phys_avail[j + 1] =
 859                                     availmem_regions[i].mr_start +
 860                                     hwphyssz - physsz;
 861                                 physsz = hwphyssz;
 862                                 phys_avail_count++;
 863                                 dump_avail[j] = phys_avail[j];
 864                                 dump_avail[j + 1] = phys_avail[j + 1];
 865                         }
 866                         break;
 867                 }
 868
 869                 phys_avail[j] = availmem_regions[i].mr_start;
 870                 phys_avail[j + 1] = availmem_regions[i].mr_start +
 871                     availmem_regions[i].mr_size;
 872                 phys_avail_count++;
 873                 physsz += availmem_regions[i].mr_size;
 874                 dump_avail[j] = phys_avail[j];
 875                 dump_avail[j + 1] = phys_avail[j + 1];
 876         }
 877         physmem = btoc(physsz);
 878
 879         /* Calculate the last available physical address. */
 880         for (i = 0; phys_avail[i + 2] != 0; i += 2)
 881                 ;
 882         Maxmem = powerpc_btop(phys_avail[i + 1]);
 883
 884         debugf("Maxmem = 0x%08lx\n", Maxmem);
 885         debugf("phys_avail_count = %d\n", phys_avail_count);
 886         debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n",
 887             (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem);
 888
 889 #ifdef __powerpc64__
 890         /*
 891          * Map the physical memory contiguously in TLB1.
 892          * Round so it fits into a single mapping.
 893          */
 894         tlb1_mapin_region(DMAP_BASE_ADDRESS, 0,
 895             phys_avail[i + 1], _TLB_ENTRY_MEM);
 896 #endif
 897
 898         /*******************************************************/
 899         /* Initialize (statically allocated) kernel pmap. */
 900         /*******************************************************/
 901         PMAP_LOCK_INIT(kernel_pmap);
 902
 903         debugf("kernel_pmap = 0x%"PRI0ptrX"\n", (uintptr_t)kernel_pmap);
 904         kernel_pte_alloc(virtual_avail, kernstart);
 905         for (i = 0; i < MAXCPU; i++) {
 906                 kernel_pmap->pm_tid[i] = TID_KERNEL;
 907
 908                 /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */
 909                 tidbusy[i][TID_KERNEL] = kernel_pmap;
 910         }
 911
 912         /* Mark kernel_pmap active on all CPUs */
 913         CPU_FILL(&kernel_pmap->pm_active);
 914
 915         /*
 916          * Initialize the global pv list lock.
 917          */
 918         rw_init(&pvh_global_lock, "pmap pv global");
 919
 920         /*******************************************************/
 921         /* Final setup */
 922         /*******************************************************/
 923
 924         /* Enter kstack0 into kernel map, provide guard page */
 925         kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
 926         thread0.td_kstack = kstack0;
 927         thread0.td_kstack_pages = kstack_pages;
 928
 929         debugf("kstack_sz = 0x%08jx\n", (uintmax_t)kstack0_sz);
 930         debugf("kstack0_phys at 0x%09jx - 0x%09jx\n",
 931             (uintmax_t)kstack0_phys, (uintmax_t)kstack0_phys + kstack0_sz);
 932         debugf("kstack0 at 0x%"PRI0ptrX" - 0x%"PRI0ptrX"\n",
 933             kstack0, kstack0 + kstack0_sz);
 934
 935         virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz;
 936         for (i = 0; i < kstack_pages; i++) {
 937                 mmu_booke_kenter(kstack0, kstack0_phys);
 938                 kstack0 += PAGE_SIZE;
 939                 kstack0_phys += PAGE_SIZE;
 940         }
 941
 942         pmap_bootstrapped = 1;
 943
 944         debugf("virtual_avail = %"PRI0ptrX"\n", virtual_avail);
 945         debugf("virtual_end   = %"PRI0ptrX"\n", virtual_end);
 946
 947         debugf("mmu_booke_bootstrap: exit\n");
 948 }
 949
 950 #ifdef SMP
 951 void
 952 tlb1_ap_prep(void)
 953 {
 954         tlb_entry_t *e, tmp;
 955         unsigned int i;
 956
 957         /* Prepare TLB1 image for AP processors */
 958         e = __boot_tlb1;
 959         for (i = 0; i < TLB1_ENTRIES; i++) {
 960                 tlb1_read_entry(&tmp, i);
 961
 962                 if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED))
 963                         memcpy(e++, &tmp, sizeof(tmp));
 964         }
 965 }
 966
 967 void
 968 pmap_bootstrap_ap(volatile uint32_t *trcp __unused)
 969 {
 970         int i;
 971
 972         /*
 973          * Finish TLB1 configuration: the BSP already set up its TLB1 and we
 974          * have the snapshot of its contents in the s/w __boot_tlb1[] table
 975          * created by tlb1_ap_prep(), so use these values directly to
 976          * (re)program AP's TLB1 hardware.
 977          *
 978          * Start at index 1 because index 0 has the kernel map.
 979          */
 980         for (i = 1; i < TLB1_ENTRIES; i++) {
 981                 if (__boot_tlb1[i].mas1 & MAS1_VALID)
 982                         tlb1_write_entry(&__boot_tlb1[i], i);
 983         }
 984
 985         set_mas4_defaults();
 986 }
 987 #endif
 988
 989 static void
 990 booke_pmap_init_qpages(void)
 991 {
 992         struct pcpu *pc;
 993         int i;
 994
 995         CPU_FOREACH(i) {
 996                 pc = pcpu_find(i);
 997                 pc->pc_qmap_addr = kva_alloc(PAGE_SIZE);
 998                 if (pc->pc_qmap_addr == 0)
 999                         panic("pmap_init_qpages: unable to allocate KVA");
1000         }
1001 }
1002
1003 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL);
1004
1005 /*
1006  * Get the physical page address for the given pmap/virtual address.
1007  */
1008 static vm_paddr_t
1009 mmu_booke_extract(pmap_t pmap, vm_offset_t va)
1010 {
1011         vm_paddr_t pa;
1012
1013         PMAP_LOCK(pmap);
1014         pa = pte_vatopa(pmap, va);
1015         PMAP_UNLOCK(pmap);
1016
1017         return (pa);
1018 }
1019
1020 /*
1021  * Extract the physical page address associated with the given
1022  * kernel virtual address.
1023  */
1024 static vm_paddr_t
1025 mmu_booke_kextract(vm_offset_t va)
1026 {
1027         tlb_entry_t e;
1028         vm_paddr_t p = 0;
1029         int i;
1030
1031 #ifdef __powerpc64__
1032         if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS)
1033                 return (DMAP_TO_PHYS(va));
1034 #endif
1035
1036         if (va >= VM_MIN_KERNEL_ADDRESS && va <= VM_MAX_KERNEL_ADDRESS)
1037                 p = pte_vatopa(kernel_pmap, va);
1038
1039         if (p == 0) {
1040                 /* Check TLB1 mappings */
1041                 for (i = 0; i < TLB1_ENTRIES; i++) {
1042                         tlb1_read_entry(&e, i);
1043                         if (!(e.mas1 & MAS1_VALID))
1044                                 continue;
1045                         if (va >= e.virt && va < e.virt + e.size)
1046                                 return (e.phys + (va - e.virt));
1047                 }
1048         }
1049
1050         return (p);
1051 }
1052
1053 /*
1054  * Initialize the pmap module.
1055  * Called by vm_init, to initialize any structures that the pmap
1056  * system needs to map virtual memory.
1057  */
1058 static void
1059 mmu_booke_init(void)
1060 {
1061         int shpgperproc = PMAP_SHPGPERPROC;
1062
1063         /*
1064          * Initialize the address space (zone) for the pv entries.  Set a
1065          * high water mark so that the system can recover from excessive
1066          * numbers of pv entries.
1067          */
1068         pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
1069             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
1070
1071         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
1072         pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count;
1073
1074         TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
1075         pv_entry_high_water = 9 * (pv_entry_max / 10);
1076
1077         uma_zone_reserve_kva(pvzone, pv_entry_max);
1078
1079         /* Pre-fill pvzone with initial number of pv entries. */
1080         uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN);
1081
1082         /* Create a UMA zone for page table roots. */
1083         ptbl_root_zone = uma_zcreate("pmap root", PMAP_ROOT_SIZE,
1084             NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, UMA_ZONE_VM);
1085
1086         /* Initialize ptbl allocation. */
1087         ptbl_init();
1088 }
1089
1090 /*
1091  * Map a list of wired pages into kernel virtual address space.  This is
1092  * intended for temporary mappings which do not need page modification or
1093  * references recorded.  Existing mappings in the region are overwritten.
1094  */
1095 static void
1096 mmu_booke_qenter(vm_offset_t sva, vm_page_t *m, int count)
1097 {
1098         vm_offset_t va;
1099
1100         va = sva;
1101         while (count-- > 0) {
1102                 mmu_booke_kenter(va, VM_PAGE_TO_PHYS(*m));
1103                 va += PAGE_SIZE;
1104                 m++;
1105         }
1106 }
1107
1108 /*
1109  * Remove page mappings from kernel virtual address space.  Intended for
1110  * temporary mappings entered by mmu_booke_qenter.
1111  */
1112 static void
1113 mmu_booke_qremove(vm_offset_t sva, int count)
1114 {
1115         vm_offset_t va;
1116
1117         va = sva;
1118         while (count-- > 0) {
1119                 mmu_booke_kremove(va);
1120                 va += PAGE_SIZE;
1121         }
1122 }
1123
1124 /*
1125  * Map a wired page into kernel virtual address space.
1126  */
1127 static void
1128 mmu_booke_kenter(vm_offset_t va, vm_paddr_t pa)
1129 {
1130
1131         mmu_booke_kenter_attr(va, pa, VM_MEMATTR_DEFAULT);
1132 }
1133
1134 static void
1135 mmu_booke_kenter_attr(vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma)
1136 {
1137         uint32_t flags;
1138         pte_t *pte;
1139
1140         KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) &&
1141             (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va"));
1142
1143         flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID;
1144         flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT;
1145         flags |= PTE_PS_4KB;
1146
1147         pte = pte_find(kernel_pmap, va);
1148         KASSERT((pte != NULL), ("mmu_booke_kenter: invalid va.  NULL PTE"));
1149
1150         mtx_lock_spin(&tlbivax_mutex);
1151         tlb_miss_lock();
1152
1153         if (PTE_ISVALID(pte)) {
1154                 CTR1(KTR_PMAP, "%s: replacing entry!", __func__);
1155
1156                 /* Flush entry from TLB0 */
1157                 tlb0_flush_entry(va);
1158         }
1159
1160         *pte = PTE_RPN_FROM_PA(pa) | flags;
1161
1162         //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x "
1163         //              "pa=0x%08x rpn=0x%08x flags=0x%08x\n",
1164         //              pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags);
1165
1166         /* Flush the real memory from the instruction cache. */
1167         if ((flags & (PTE_I | PTE_G)) == 0)
1168                 __syncicache((void *)va, PAGE_SIZE);
1169
1170         tlb_miss_unlock();
1171         mtx_unlock_spin(&tlbivax_mutex);
1172 }
1173
1174 /*
1175  * Remove a page from kernel page table.
1176  */
1177 static void
1178 mmu_booke_kremove(vm_offset_t va)
1179 {
1180         pte_t *pte;
1181
1182         CTR2(KTR_PMAP,"%s: s (va = 0x%"PRI0ptrX")\n", __func__, va);
1183
1184         KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) &&
1185             (va <= VM_MAX_KERNEL_ADDRESS)),
1186             ("mmu_booke_kremove: invalid va"));
1187
1188         pte = pte_find(kernel_pmap, va);
1189
1190         if (!PTE_ISVALID(pte)) {
1191                 CTR1(KTR_PMAP, "%s: invalid pte", __func__);
1192
1193                 return;
1194         }
1195
1196         mtx_lock_spin(&tlbivax_mutex);
1197         tlb_miss_lock();
1198
1199         /* Invalidate entry in TLB0, update PTE. */
1200         tlb0_flush_entry(va);
1201         *pte = 0;
1202
1203         tlb_miss_unlock();
1204         mtx_unlock_spin(&tlbivax_mutex);
1205 }
1206
1207 /*
1208  * Figure out where a given kernel pointer (usually in a fault) points
1209  * to from the VM's perspective, potentially remapping into userland's
1210  * address space.
1211  */
1212 static int
1213 mmu_booke_decode_kernel_ptr(vm_offset_t addr, int *is_user,
1214     vm_offset_t *decoded_addr)
1215 {
1216
1217         if (trunc_page(addr) <= VM_MAXUSER_ADDRESS)
1218                 *is_user = 1;
1219         else
1220                 *is_user = 0;
1221
1222         *decoded_addr = addr;
1223         return (0);
1224 }
1225
1226 static boolean_t
1227 mmu_booke_page_is_mapped(vm_page_t m)
1228 {
1229
1230         return (!TAILQ_EMPTY(&(m)->md.pv_list));
1231 }
1232
1233 static bool
1234 mmu_booke_ps_enabled(pmap_t pmap __unused)
1235 {
1236         return (false);
1237 }
1238
1239 /*
1240  * Initialize pmap associated with process 0.
1241  */
1242 static void
1243 mmu_booke_pinit0(pmap_t pmap)
1244 {
1245
1246         PMAP_LOCK_INIT(pmap);
1247         mmu_booke_pinit(pmap);
1248         PCPU_SET(curpmap, pmap);
1249 }
1250
1251 /*
1252  * Insert the given physical page at the specified virtual address in the
1253  * target physical map with the protection requested. If specified the page
1254  * will be wired down.
1255  */
1256 static int
1257 mmu_booke_enter(pmap_t pmap, vm_offset_t va, vm_page_t m,
1258     vm_prot_t prot, u_int flags, int8_t psind)
1259 {
1260         int error;
1261
1262         rw_wlock(&pvh_global_lock);
1263         PMAP_LOCK(pmap);
1264         error = mmu_booke_enter_locked(pmap, va, m, prot, flags, psind);
1265         PMAP_UNLOCK(pmap);
1266         rw_wunlock(&pvh_global_lock);
1267         return (error);
1268 }
1269
1270 static int
1271 mmu_booke_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1272     vm_prot_t prot, u_int pmap_flags, int8_t psind __unused)
1273 {
1274         pte_t *pte;
1275         vm_paddr_t pa;
1276         pte_t flags;
1277         int error, su, sync;
1278
1279         pa = VM_PAGE_TO_PHYS(m);
1280         su = (pmap == kernel_pmap);
1281         sync = 0;
1282
1283         //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x "
1284         //              "pa=0x%08x prot=0x%08x flags=%#x)\n",
1285         //              (u_int32_t)pmap, su, pmap->pm_tid,
1286         //              (u_int32_t)m, va, pa, prot, flags);
1287
1288         if (su) {
1289                 KASSERT(((va >= virtual_avail) &&
1290                     (va <= VM_MAX_KERNEL_ADDRESS)),
1291                     ("mmu_booke_enter_locked: kernel pmap, non kernel va"));
1292         } else {
1293                 KASSERT((va <= VM_MAXUSER_ADDRESS),
1294                     ("mmu_booke_enter_locked: user pmap, non user va"));
1295         }
1296         if ((m->oflags & VPO_UNMANAGED) == 0) {
1297                 if ((pmap_flags & PMAP_ENTER_QUICK_LOCKED) == 0)
1298                         VM_PAGE_OBJECT_BUSY_ASSERT(m);
1299                 else
1300                         VM_OBJECT_ASSERT_LOCKED(m->object);
1301         }
1302
1303         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1304
1305         /*
1306          * If there is an existing mapping, and the physical address has not
1307          * changed, must be protection or wiring change.
1308          */
1309         if (((pte = pte_find(pmap, va)) != NULL) &&
1310             (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) {
1311
1312                 /*
1313                  * Before actually updating pte->flags we calculate and
1314                  * prepare its new value in a helper var.
1315                  */
1316                 flags = *pte;
1317                 flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED);
1318
1319                 /* Wiring change, just update stats. */
1320                 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) {
1321                         if (!PTE_ISWIRED(pte)) {
1322                                 flags |= PTE_WIRED;
1323                                 pmap->pm_stats.wired_count++;
1324                         }
1325                 } else {
1326                         if (PTE_ISWIRED(pte)) {
1327                                 flags &= ~PTE_WIRED;
1328                                 pmap->pm_stats.wired_count--;
1329                         }
1330                 }
1331
1332                 if (prot & VM_PROT_WRITE) {
1333                         /* Add write permissions. */
1334                         flags |= PTE_SW;
1335                         if (!su)
1336                                 flags |= PTE_UW;
1337
1338                         if ((flags & PTE_MANAGED) != 0)
1339                                 vm_page_aflag_set(m, PGA_WRITEABLE);
1340                 } else {
1341                         /* Handle modified pages, sense modify status. */
1342
1343                         /*
1344                          * The PTE_MODIFIED flag could be set by underlying
1345                          * TLB misses since we last read it (above), possibly
1346                          * other CPUs could update it so we check in the PTE
1347                          * directly rather than rely on that saved local flags
1348                          * copy.
1349                          */
1350                         if (PTE_ISMODIFIED(pte))
1351                                 vm_page_dirty(m);
1352                 }
1353
1354                 if (prot & VM_PROT_EXECUTE) {
1355                         flags |= PTE_SX;
1356                         if (!su)
1357                                 flags |= PTE_UX;
1358
1359                         /*
1360                          * Check existing flags for execute permissions: if we
1361                          * are turning execute permissions on, icache should
1362                          * be flushed.
1363                          */
1364                         if ((*pte & (PTE_UX | PTE_SX)) == 0)
1365                                 sync++;
1366                 }
1367
1368                 flags &= ~PTE_REFERENCED;
1369
1370                 /*
1371                  * The new flags value is all calculated -- only now actually
1372                  * update the PTE.
1373                  */
1374                 mtx_lock_spin(&tlbivax_mutex);
1375                 tlb_miss_lock();
1376
1377                 tlb0_flush_entry(va);
1378                 *pte &= ~PTE_FLAGS_MASK;
1379                 *pte |= flags;
1380
1381                 tlb_miss_unlock();
1382                 mtx_unlock_spin(&tlbivax_mutex);
1383
1384         } else {
1385                 /*
1386                  * If there is an existing mapping, but it's for a different
1387                  * physical address, pte_enter() will delete the old mapping.
1388                  */
1389                 //if ((pte != NULL) && PTE_ISVALID(pte))
1390                 //      debugf("mmu_booke_enter_locked: replace\n");
1391                 //else
1392                 //      debugf("mmu_booke_enter_locked: new\n");
1393
1394                 /* Now set up the flags and install the new mapping. */
1395                 flags = (PTE_SR | PTE_VALID);
1396                 flags |= PTE_M;
1397
1398                 if (!su)
1399                         flags |= PTE_UR;
1400
1401                 if (prot & VM_PROT_WRITE) {
1402                         flags |= PTE_SW;
1403                         if (!su)
1404                                 flags |= PTE_UW;
1405
1406                         if ((m->oflags & VPO_UNMANAGED) == 0)
1407                                 vm_page_aflag_set(m, PGA_WRITEABLE);
1408                 }
1409
1410                 if (prot & VM_PROT_EXECUTE) {
1411                         flags |= PTE_SX;
1412                         if (!su)
1413                                 flags |= PTE_UX;
1414                 }
1415
1416                 /* If its wired update stats. */
1417                 if ((pmap_flags & PMAP_ENTER_WIRED) != 0)
1418                         flags |= PTE_WIRED;
1419
1420                 error = pte_enter(pmap, m, va, flags,
1421                     (pmap_flags & PMAP_ENTER_NOSLEEP) != 0);
1422                 if (error != 0)
1423                         return (KERN_RESOURCE_SHORTAGE);
1424
1425                 if ((flags & PMAP_ENTER_WIRED) != 0)
1426                         pmap->pm_stats.wired_count++;
1427
1428                 /* Flush the real memory from the instruction cache. */
1429                 if (prot & VM_PROT_EXECUTE)
1430                         sync++;
1431         }
1432
1433         if (sync && (su || pmap == PCPU_GET(curpmap))) {
1434                 __syncicache((void *)va, PAGE_SIZE);
1435                 sync = 0;
1436         }
1437
1438         return (KERN_SUCCESS);
1439 }
1440
1441 /*
1442  * Maps a sequence of resident pages belonging to the same object.
1443  * The sequence begins with the given page m_start.  This page is
1444  * mapped at the given virtual address start.  Each subsequent page is
1445  * mapped at a virtual address that is offset from start by the same
1446  * amount as the page is offset from m_start within the object.  The
1447  * last page in the sequence is the page with the largest offset from
1448  * m_start that can be mapped at a virtual address less than the given
1449  * virtual address end.  Not every virtual page between start and end
1450  * is mapped; only those for which a resident page exists with the
1451  * corresponding offset from m_start are mapped.
1452  */
1453 static void
1454 mmu_booke_enter_object(pmap_t pmap, vm_offset_t start,
1455     vm_offset_t end, vm_page_t m_start, vm_prot_t prot)
1456 {
1457         vm_page_t m;
1458         vm_pindex_t diff, psize;
1459
1460         VM_OBJECT_ASSERT_LOCKED(m_start->object);
1461
1462         psize = atop(end - start);
1463         m = m_start;
1464         rw_wlock(&pvh_global_lock);
1465         PMAP_LOCK(pmap);
1466         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1467                 mmu_booke_enter_locked(pmap, start + ptoa(diff), m,
1468                     prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1469                     PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0);
1470                 m = TAILQ_NEXT(m, listq);
1471         }
1472         PMAP_UNLOCK(pmap);
1473         rw_wunlock(&pvh_global_lock);
1474 }
1475
1476 static void
1477 mmu_booke_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m,
1478     vm_prot_t prot)
1479 {
1480
1481         rw_wlock(&pvh_global_lock);
1482         PMAP_LOCK(pmap);
1483         mmu_booke_enter_locked(pmap, va, m,
1484             prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP |
1485             PMAP_ENTER_QUICK_LOCKED, 0);
1486         PMAP_UNLOCK(pmap);
1487         rw_wunlock(&pvh_global_lock);
1488 }
1489
1490 /*
1491  * Remove the given range of addresses from the specified map.
1492  *
1493  * It is assumed that the start and end are properly rounded to the page size.
1494  */
1495 static void
1496 mmu_booke_remove(pmap_t pmap, vm_offset_t va, vm_offset_t endva)
1497 {
1498         pte_t *pte;
1499         uint8_t hold_flag;
1500
1501         int su = (pmap == kernel_pmap);
1502
1503         //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n",
1504         //              su, (u_int32_t)pmap, pmap->pm_tid, va, endva);
1505
1506         if (su) {
1507                 KASSERT(((va >= virtual_avail) &&
1508                     (va <= VM_MAX_KERNEL_ADDRESS)),
1509                     ("mmu_booke_remove: kernel pmap, non kernel va"));
1510         } else {
1511                 KASSERT((va <= VM_MAXUSER_ADDRESS),
1512                     ("mmu_booke_remove: user pmap, non user va"));
1513         }
1514
1515         if (PMAP_REMOVE_DONE(pmap)) {
1516                 //debugf("mmu_booke_remove: e (empty)\n");
1517                 return;
1518         }
1519
1520         hold_flag = PTBL_HOLD_FLAG(pmap);
1521         //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag);
1522
1523         rw_wlock(&pvh_global_lock);
1524         PMAP_LOCK(pmap);
1525         for (; va < endva; va += PAGE_SIZE) {
1526                 pte = pte_find_next(pmap, &va);
1527                 if ((pte == NULL) || !PTE_ISVALID(pte))
1528                         break;
1529                 if (va >= endva)
1530                         break;
1531                 pte_remove(pmap, va, hold_flag);
1532         }
1533         PMAP_UNLOCK(pmap);
1534         rw_wunlock(&pvh_global_lock);
1535
1536         //debugf("mmu_booke_remove: e\n");
1537 }
1538
1539 /*
1540  * Remove physical page from all pmaps in which it resides.
1541  */
1542 static void
1543 mmu_booke_remove_all(vm_page_t m)
1544 {
1545         pv_entry_t pv, pvn;
1546         uint8_t hold_flag;
1547
1548         rw_wlock(&pvh_global_lock);
1549         TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_link, pvn) {
1550                 PMAP_LOCK(pv->pv_pmap);
1551                 hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap);
1552                 pte_remove(pv->pv_pmap, pv->pv_va, hold_flag);
1553                 PMAP_UNLOCK(pv->pv_pmap);
1554         }
1555         vm_page_aflag_clear(m, PGA_WRITEABLE);
1556         rw_wunlock(&pvh_global_lock);
1557 }
1558
1559 /*
1560  * Map a range of physical addresses into kernel virtual address space.
1561  */
1562 static vm_offset_t
1563 mmu_booke_map(vm_offset_t *virt, vm_paddr_t pa_start,
1564     vm_paddr_t pa_end, int prot)
1565 {
1566         vm_offset_t sva = *virt;
1567         vm_offset_t va = sva;
1568
1569 #ifdef __powerpc64__
1570         /* XXX: Handle memory not starting at 0x0. */
1571         if (pa_end < ctob(Maxmem))
1572                 return (PHYS_TO_DMAP(pa_start));
1573 #endif
1574
1575         while (pa_start < pa_end) {
1576                 mmu_booke_kenter(va, pa_start);
1577                 va += PAGE_SIZE;
1578                 pa_start += PAGE_SIZE;
1579         }
1580         *virt = va;
1581
1582         return (sva);
1583 }
1584
1585 /*
1586  * The pmap must be activated before it's address space can be accessed in any
1587  * way.
1588  */
1589 static void
1590 mmu_booke_activate(struct thread *td)
1591 {
1592         pmap_t pmap;
1593         u_int cpuid;
1594
1595         pmap = &td->td_proc->p_vmspace->vm_pmap;
1596
1597         CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX")",
1598             __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap);
1599
1600         KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!"));
1601
1602         sched_pin();
1603
1604         cpuid = PCPU_GET(cpuid);
1605         CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
1606         PCPU_SET(curpmap, pmap);
1607
1608         if (pmap->pm_tid[cpuid] == TID_NONE)
1609                 tid_alloc(pmap);
1610
1611         /* Load PID0 register with pmap tid value. */
1612         mtspr(SPR_PID0, pmap->pm_tid[cpuid]);
1613         __asm __volatile("isync");
1614
1615         mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0);
1616
1617         sched_unpin();
1618
1619         CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__,
1620             pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm);
1621 }
1622
1623 /*
1624  * Deactivate the specified process's address space.
1625  */
1626 static void
1627 mmu_booke_deactivate(struct thread *td)
1628 {
1629         pmap_t pmap;
1630
1631         pmap = &td->td_proc->p_vmspace->vm_pmap;
1632
1633         CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX,
1634             __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap);
1635
1636         td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0);
1637
1638         CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active);
1639         PCPU_SET(curpmap, NULL);
1640 }
1641
1642 /*
1643  * Copy the range specified by src_addr/len
1644  * from the source map to the range dst_addr/len
1645  * in the destination map.
1646  *
1647  * This routine is only advisory and need not do anything.
1648  */
1649 static void
1650 mmu_booke_copy(pmap_t dst_pmap, pmap_t src_pmap,
1651     vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr)
1652 {
1653
1654 }
1655
1656 /*
1657  * Set the physical protection on the specified range of this map as requested.
1658  */
1659 static void
1660 mmu_booke_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
1661     vm_prot_t prot)
1662 {
1663         vm_offset_t va;
1664         vm_page_t m;
1665         pte_t *pte;
1666
1667         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1668                 mmu_booke_remove(pmap, sva, eva);
1669                 return;
1670         }
1671
1672         if (prot & VM_PROT_WRITE)
1673                 return;
1674
1675         PMAP_LOCK(pmap);
1676         for (va = sva; va < eva; va += PAGE_SIZE) {
1677                 if ((pte = pte_find(pmap, va)) != NULL) {
1678                         if (PTE_ISVALID(pte)) {
1679                                 m = PHYS_TO_VM_PAGE(PTE_PA(pte));
1680
1681                                 mtx_lock_spin(&tlbivax_mutex);
1682                                 tlb_miss_lock();
1683
1684                                 /* Handle modified pages. */
1685                                 if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte))
1686                                         vm_page_dirty(m);
1687
1688                                 tlb0_flush_entry(va);
1689                                 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED);
1690
1691                                 tlb_miss_unlock();
1692                                 mtx_unlock_spin(&tlbivax_mutex);
1693                         }
1694                 }
1695         }
1696         PMAP_UNLOCK(pmap);
1697 }
1698
1699 /*
1700  * Clear the write and modified bits in each of the given page's mappings.
1701  */
1702 static void
1703 mmu_booke_remove_write(vm_page_t m)
1704 {
1705         pv_entry_t pv;
1706         pte_t *pte;
1707
1708         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1709             ("mmu_booke_remove_write: page %p is not managed", m));
1710         vm_page_assert_busied(m);
1711
1712         if (!pmap_page_is_write_mapped(m))
1713                 return;
1714         rw_wlock(&pvh_global_lock);
1715         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
1716                 PMAP_LOCK(pv->pv_pmap);
1717                 if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL) {
1718                         if (PTE_ISVALID(pte)) {
1719                                 m = PHYS_TO_VM_PAGE(PTE_PA(pte));
1720
1721                                 mtx_lock_spin(&tlbivax_mutex);
1722                                 tlb_miss_lock();
1723
1724                                 /* Handle modified pages. */
1725                                 if (PTE_ISMODIFIED(pte))
1726                                         vm_page_dirty(m);
1727
1728                                 /* Flush mapping from TLB0. */
1729                                 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED);
1730
1731                                 tlb_miss_unlock();
1732                                 mtx_unlock_spin(&tlbivax_mutex);
1733                         }
1734                 }
1735                 PMAP_UNLOCK(pv->pv_pmap);
1736         }
1737         vm_page_aflag_clear(m, PGA_WRITEABLE);
1738         rw_wunlock(&pvh_global_lock);
1739 }
1740
1741 /*
1742  * Atomically extract and hold the physical page with the given
1743  * pmap and virtual address pair if that mapping permits the given
1744  * protection.
1745  */
1746 static vm_page_t
1747 mmu_booke_extract_and_hold(pmap_t pmap, vm_offset_t va,
1748     vm_prot_t prot)
1749 {
1750         pte_t *pte;
1751         vm_page_t m;
1752         uint32_t pte_wbit;
1753
1754         m = NULL;
1755         PMAP_LOCK(pmap);
1756         pte = pte_find(pmap, va);
1757         if ((pte != NULL) && PTE_ISVALID(pte)) {
1758                 if (pmap == kernel_pmap)
1759                         pte_wbit = PTE_SW;
1760                 else
1761                         pte_wbit = PTE_UW;
1762
1763                 if ((*pte & pte_wbit) != 0 || (prot & VM_PROT_WRITE) == 0) {
1764                         m = PHYS_TO_VM_PAGE(PTE_PA(pte));
1765                         if (!vm_page_wire_mapped(m))
1766                                 m = NULL;
1767                 }
1768         }
1769         PMAP_UNLOCK(pmap);
1770         return (m);
1771 }
1772
1773 /*
1774  * Initialize a vm_page's machine-dependent fields.
1775  */
1776 static void
1777 mmu_booke_page_init(vm_page_t m)
1778 {
1779
1780         m->md.pv_tracked = 0;
1781         TAILQ_INIT(&m->md.pv_list);
1782 }
1783
1784 /*
1785  * Return whether or not the specified physical page was modified
1786  * in any of physical maps.
1787  */
1788 static boolean_t
1789 mmu_booke_is_modified(vm_page_t m)
1790 {
1791         pte_t *pte;
1792         pv_entry_t pv;
1793         boolean_t rv;
1794
1795         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1796             ("mmu_booke_is_modified: page %p is not managed", m));
1797         rv = FALSE;
1798
1799         /*
1800          * If the page is not busied then this check is racy.
1801          */
1802         if (!pmap_page_is_write_mapped(m))
1803                 return (FALSE);
1804
1805         rw_wlock(&pvh_global_lock);
1806         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
1807                 PMAP_LOCK(pv->pv_pmap);
1808                 if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL &&
1809                     PTE_ISVALID(pte)) {
1810                         if (PTE_ISMODIFIED(pte))
1811                                 rv = TRUE;
1812                 }
1813                 PMAP_UNLOCK(pv->pv_pmap);
1814                 if (rv)
1815                         break;
1816         }
1817         rw_wunlock(&pvh_global_lock);
1818         return (rv);
1819 }
1820
1821 /*
1822  * Return whether or not the specified virtual address is eligible
1823  * for prefault.
1824  */
1825 static boolean_t
1826 mmu_booke_is_prefaultable(pmap_t pmap, vm_offset_t addr)
1827 {
1828
1829         return (FALSE);
1830 }
1831
1832 /*
1833  * Return whether or not the specified physical page was referenced
1834  * in any physical maps.
1835  */
1836 static boolean_t
1837 mmu_booke_is_referenced(vm_page_t m)
1838 {
1839         pte_t *pte;
1840         pv_entry_t pv;
1841         boolean_t rv;
1842
1843         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1844             ("mmu_booke_is_referenced: page %p is not managed", m));
1845         rv = FALSE;
1846         rw_wlock(&pvh_global_lock);
1847         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
1848                 PMAP_LOCK(pv->pv_pmap);
1849                 if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL &&
1850                     PTE_ISVALID(pte)) {
1851                         if (PTE_ISREFERENCED(pte))
1852                                 rv = TRUE;
1853                 }
1854                 PMAP_UNLOCK(pv->pv_pmap);
1855                 if (rv)
1856                         break;
1857         }
1858         rw_wunlock(&pvh_global_lock);
1859         return (rv);
1860 }
1861
1862 /*
1863  * Clear the modify bits on the specified physical page.
1864  */
1865 static void
1866 mmu_booke_clear_modify(vm_page_t m)
1867 {
1868         pte_t *pte;
1869         pv_entry_t pv;
1870
1871         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1872             ("mmu_booke_clear_modify: page %p is not managed", m));
1873         vm_page_assert_busied(m);
1874
1875         if (!pmap_page_is_write_mapped(m))
1876                 return;
1877
1878         rw_wlock(&pvh_global_lock);
1879         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
1880                 PMAP_LOCK(pv->pv_pmap);
1881                 if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL &&
1882                     PTE_ISVALID(pte)) {
1883                         mtx_lock_spin(&tlbivax_mutex);
1884                         tlb_miss_lock();
1885
1886                         if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) {
1887                                 tlb0_flush_entry(pv->pv_va);
1888                                 *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED |
1889                                     PTE_REFERENCED);
1890                         }
1891
1892                         tlb_miss_unlock();
1893                         mtx_unlock_spin(&tlbivax_mutex);
1894                 }
1895                 PMAP_UNLOCK(pv->pv_pmap);
1896         }
1897         rw_wunlock(&pvh_global_lock);
1898 }
1899
1900 /*
1901  * Return a count of reference bits for a page, clearing those bits.
1902  * It is not necessary for every reference bit to be cleared, but it
1903  * is necessary that 0 only be returned when there are truly no
1904  * reference bits set.
1905  *
1906  * As an optimization, update the page's dirty field if a modified bit is
1907  * found while counting reference bits.  This opportunistic update can be
1908  * performed at low cost and can eliminate the need for some future calls
1909  * to pmap_is_modified().  However, since this function stops after
1910  * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
1911  * dirty pages.  Those dirty pages will only be detected by a future call
1912  * to pmap_is_modified().
1913  */
1914 static int
1915 mmu_booke_ts_referenced(vm_page_t m)
1916 {
1917         pte_t *pte;
1918         pv_entry_t pv;
1919         int count;
1920
1921         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1922             ("mmu_booke_ts_referenced: page %p is not managed", m));
1923         count = 0;
1924         rw_wlock(&pvh_global_lock);
1925         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
1926                 PMAP_LOCK(pv->pv_pmap);
1927                 if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL &&
1928                     PTE_ISVALID(pte)) {
1929                         if (PTE_ISMODIFIED(pte))
1930                                 vm_page_dirty(m);
1931                         if (PTE_ISREFERENCED(pte)) {
1932                                 mtx_lock_spin(&tlbivax_mutex);
1933                                 tlb_miss_lock();
1934
1935                                 tlb0_flush_entry(pv->pv_va);
1936                                 *pte &= ~PTE_REFERENCED;
1937
1938                                 tlb_miss_unlock();
1939                                 mtx_unlock_spin(&tlbivax_mutex);
1940
1941                                 if (++count >= PMAP_TS_REFERENCED_MAX) {
1942                                         PMAP_UNLOCK(pv->pv_pmap);
1943                                         break;
1944                                 }
1945                         }
1946                 }
1947                 PMAP_UNLOCK(pv->pv_pmap);
1948         }
1949         rw_wunlock(&pvh_global_lock);
1950         return (count);
1951 }
1952
1953 /*
1954  * Clear the wired attribute from the mappings for the specified range of
1955  * addresses in the given pmap.  Every valid mapping within that range must
1956  * have the wired attribute set.  In contrast, invalid mappings cannot have
1957  * the wired attribute set, so they are ignored.
1958  *
1959  * The wired attribute of the page table entry is not a hardware feature, so
1960  * there is no need to invalidate any TLB entries.
1961  */
1962 static void
1963 mmu_booke_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1964 {
1965         vm_offset_t va;
1966         pte_t *pte;
1967
1968         PMAP_LOCK(pmap);
1969         for (va = sva; va < eva; va += PAGE_SIZE) {
1970                 if ((pte = pte_find(pmap, va)) != NULL &&
1971                     PTE_ISVALID(pte)) {
1972                         if (!PTE_ISWIRED(pte))
1973                                 panic("mmu_booke_unwire: pte %p isn't wired",
1974                                     pte);
1975                         *pte &= ~PTE_WIRED;
1976                         pmap->pm_stats.wired_count--;
1977                 }
1978         }
1979         PMAP_UNLOCK(pmap);
1980
1981 }
1982
1983 /*
1984  * Return true if the pmap's pv is one of the first 16 pvs linked to from this
1985  * page.  This count may be changed upwards or downwards in the future; it is
1986  * only necessary that true be returned for a small subset of pmaps for proper
1987  * page aging.
1988  */
1989 static boolean_t
1990 mmu_booke_page_exists_quick(pmap_t pmap, vm_page_t m)
1991 {
1992         pv_entry_t pv;
1993         int loops;
1994         boolean_t rv;
1995
1996         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1997             ("mmu_booke_page_exists_quick: page %p is not managed", m));
1998         loops = 0;
1999         rv = FALSE;
2000         rw_wlock(&pvh_global_lock);
2001         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
2002                 if (pv->pv_pmap == pmap) {
2003                         rv = TRUE;
2004                         break;
2005                 }
2006                 if (++loops >= 16)
2007                         break;
2008         }
2009         rw_wunlock(&pvh_global_lock);
2010         return (rv);
2011 }
2012
2013 /*
2014  * Return the number of managed mappings to the given physical page that are
2015  * wired.
2016  */
2017 static int
2018 mmu_booke_page_wired_mappings(vm_page_t m)
2019 {
2020         pv_entry_t pv;
2021         pte_t *pte;
2022         int count = 0;
2023
2024         if ((m->oflags & VPO_UNMANAGED) != 0)
2025                 return (count);
2026         rw_wlock(&pvh_global_lock);
2027         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
2028                 PMAP_LOCK(pv->pv_pmap);
2029                 if ((pte = pte_find(pv->pv_pmap, pv->pv_va)) != NULL)
2030                         if (PTE_ISVALID(pte) && PTE_ISWIRED(pte))
2031                                 count++;
2032                 PMAP_UNLOCK(pv->pv_pmap);
2033         }
2034         rw_wunlock(&pvh_global_lock);
2035         return (count);
2036 }
2037
2038 static int
2039 mmu_booke_dev_direct_mapped(vm_paddr_t pa, vm_size_t size)
2040 {
2041         int i;
2042         vm_offset_t va;
2043
2044         /*
2045          * This currently does not work for entries that
2046          * overlap TLB1 entries.
2047          */
2048         for (i = 0; i < TLB1_ENTRIES; i ++) {
2049                 if (tlb1_iomapped(i, pa, size, &va) == 0)
2050                         return (0);
2051         }
2052
2053         return (EFAULT);
2054 }
2055
2056 void
2057 mmu_booke_dumpsys_map(vm_paddr_t pa, size_t sz, void **va)
2058 {
2059         vm_paddr_t ppa;
2060         vm_offset_t ofs;
2061         vm_size_t gran;
2062
2063         /* Minidumps are based on virtual memory addresses. */
2064         if (do_minidump) {
2065                 *va = (void *)(vm_offset_t)pa;
2066                 return;
2067         }
2068
2069         /* Raw physical memory dumps don't have a virtual address. */
2070         /* We always map a 256MB page at 256M. */
2071         gran = 256 * 1024 * 1024;
2072         ppa = rounddown2(pa, gran);
2073         ofs = pa - ppa;
2074         *va = (void *)gran;
2075         tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO);
2076
2077         if (sz > (gran - ofs))
2078                 tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran,
2079                     _TLB_ENTRY_IO);
2080 }
2081
2082 void
2083 mmu_booke_dumpsys_unmap(vm_paddr_t pa, size_t sz, void *va)
2084 {
2085         vm_paddr_t ppa;
2086         vm_offset_t ofs;
2087         vm_size_t gran;
2088         tlb_entry_t e;
2089         int i;
2090
2091         /* Minidumps are based on virtual memory addresses. */
2092         /* Nothing to do... */
2093         if (do_minidump)
2094                 return;
2095
2096         for (i = 0; i < TLB1_ENTRIES; i++) {
2097                 tlb1_read_entry(&e, i);
2098                 if (!(e.mas1 & MAS1_VALID))
2099                         break;
2100         }
2101
2102         /* Raw physical memory dumps don't have a virtual address. */
2103         i--;
2104         e.mas1 = 0;
2105         e.mas2 = 0;
2106         e.mas3 = 0;
2107         tlb1_write_entry(&e, i);
2108
2109         gran = 256 * 1024 * 1024;
2110         ppa = rounddown2(pa, gran);
2111         ofs = pa - ppa;
2112         if (sz > (gran - ofs)) {
2113                 i--;
2114                 e.mas1 = 0;
2115                 e.mas2 = 0;
2116                 e.mas3 = 0;
2117                 tlb1_write_entry(&e, i);
2118         }
2119 }
2120
2121 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1];
2122
2123 void
2124 mmu_booke_scan_init(void)
2125 {
2126         vm_offset_t va;
2127         pte_t *pte;
2128         int i;
2129
2130         if (!do_minidump) {
2131                 /* Initialize phys. segments for dumpsys(). */
2132                 memset(&dump_map, 0, sizeof(dump_map));
2133                 mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions,
2134                     &availmem_regions_sz);
2135                 for (i = 0; i < physmem_regions_sz; i++) {
2136                         dump_map[i].pa_start = physmem_regions[i].mr_start;
2137                         dump_map[i].pa_size = physmem_regions[i].mr_size;
2138                 }
2139                 return;
2140         }
2141
2142         /* Virtual segments for minidumps: */
2143         memset(&dump_map, 0, sizeof(dump_map));
2144
2145         /* 1st: kernel .data and .bss. */
2146         dump_map[0].pa_start = trunc_page((uintptr_t)_etext);
2147         dump_map[0].pa_size =
2148             round_page((uintptr_t)_end) - dump_map[0].pa_start;
2149
2150         /* 2nd: msgbuf and tables (see pmap_bootstrap()). */
2151         dump_map[1].pa_start = data_start;
2152         dump_map[1].pa_size = data_end - data_start;
2153
2154         /* 3rd: kernel VM. */
2155         va = dump_map[1].pa_start + dump_map[1].pa_size;
2156         /* Find start of next chunk (from va). */
2157         while (va < virtual_end) {
2158                 /* Don't dump the buffer cache. */
2159                 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) {
2160                         va = kmi.buffer_eva;
2161                         continue;
2162                 }
2163                 pte = pte_find(kernel_pmap, va);
2164                 if (pte != NULL && PTE_ISVALID(pte))
2165                         break;
2166                 va += PAGE_SIZE;
2167         }
2168         if (va < virtual_end) {
2169                 dump_map[2].pa_start = va;
2170                 va += PAGE_SIZE;
2171                 /* Find last page in chunk. */
2172                 while (va < virtual_end) {
2173                         /* Don't run into the buffer cache. */
2174                         if (va == kmi.buffer_sva)
2175                                 break;
2176                         pte = pte_find(kernel_pmap, va);
2177                         if (pte == NULL || !PTE_ISVALID(pte))
2178                                 break;
2179                         va += PAGE_SIZE;
2180                 }
2181                 dump_map[2].pa_size = va - dump_map[2].pa_start;
2182         }
2183 }
2184
2185 /*
2186  * Map a set of physical memory pages into the kernel virtual address space.
2187  * Return a pointer to where it is mapped. This routine is intended to be used
2188  * for mapping device memory, NOT real memory.
2189  */
2190 static void *
2191 mmu_booke_mapdev(vm_paddr_t pa, vm_size_t size)
2192 {
2193
2194         return (mmu_booke_mapdev_attr(pa, size, VM_MEMATTR_DEFAULT));
2195 }
2196
2197 static int
2198 tlb1_find_pa(vm_paddr_t pa, tlb_entry_t *e)
2199 {
2200         int i;
2201
2202         for (i = 0; i < TLB1_ENTRIES; i++) {
2203                 tlb1_read_entry(e, i);
2204                 if ((e->mas1 & MAS1_VALID) == 0)
2205                         continue;
2206                 if (e->phys == pa)
2207                         return (i);
2208         }
2209         return (-1);
2210 }
2211
2212 static void *
2213 mmu_booke_mapdev_attr(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
2214 {
2215         tlb_entry_t e;
2216         vm_paddr_t tmppa;
2217 #ifndef __powerpc64__
2218         uintptr_t tmpva;
2219 #endif
2220         uintptr_t va, retva;
2221         vm_size_t sz;
2222         int i;
2223         int wimge;
2224
2225         /*
2226          * Check if this is premapped in TLB1.
2227          */
2228         sz = size;
2229         tmppa = pa;
2230         va = ~0;
2231         wimge = tlb_calc_wimg(pa, ma);
2232         for (i = 0; i < TLB1_ENTRIES; i++) {
2233                 tlb1_read_entry(&e, i);
2234                 if (!(e.mas1 & MAS1_VALID))
2235                         continue;
2236                 if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED)))
2237                         continue;
2238                 if (tmppa >= e.phys && tmppa < e.phys + e.size) {
2239                         va = e.virt + (pa - e.phys);
2240                         tmppa = e.phys + e.size;
2241                         sz -= MIN(sz, e.size - (pa - e.phys));
2242                         while (sz > 0 && (i = tlb1_find_pa(tmppa, &e)) != -1) {
2243                                 if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED)))
2244                                         break;
2245                                 sz -= MIN(sz, e.size);
2246                                 tmppa = e.phys + e.size;
2247                         }
2248                         if (sz != 0)
2249                                 break;
2250                         return ((void *)va);
2251                 }
2252         }
2253
2254         size = roundup(size, PAGE_SIZE);
2255
2256 #ifdef __powerpc64__
2257         KASSERT(pa < VM_MAPDEV_PA_MAX,
2258             ("Unsupported physical address! %lx", pa));
2259         va = VM_MAPDEV_BASE + pa;
2260         retva = va;
2261 #ifdef POW2_MAPPINGS
2262         /*
2263          * Align the mapping to a power of 2 size, taking into account that we
2264          * may need to increase the size multiple times to satisfy the size and
2265          * alignment requirements.
2266          *
2267          * This works in the general case because it's very rare (near never?)
2268          * to have different access properties (WIMG) within a single
2269          * power-of-two region.  If a design does call for that, POW2_MAPPINGS
2270          * can be undefined, and exact mappings will be used instead.
2271          */
2272         sz = size;
2273         size = roundup2(size, 1 << ilog2(size));
2274         while (rounddown2(va, size) + size < va + sz)
2275                 size <<= 1;
2276         va = rounddown2(va, size);
2277         pa = rounddown2(pa, size);
2278 #endif
2279 #else
2280         /*
2281          * The device mapping area is between VM_MAXUSER_ADDRESS and
2282          * VM_MIN_KERNEL_ADDRESS.  This gives 1GB of device addressing.
2283          */
2284 #ifdef SPARSE_MAPDEV
2285         /*
2286          * With a sparse mapdev, align to the largest starting region.  This
2287          * could feasibly be optimized for a 'best-fit' alignment, but that
2288          * calculation could be very costly.
2289          * Align to the smaller of:
2290          * - first set bit in overlap of (pa & size mask)
2291          * - largest size envelope
2292          *
2293          * It's possible the device mapping may start at a PA that's not larger
2294          * than the size mask, so we need to offset in to maximize the TLB entry
2295          * range and minimize the number of used TLB entries.
2296          */
2297         do {
2298             tmpva = tlb1_map_base;
2299             sz = ffsl((~((1 << flsl(size-1)) - 1)) & pa);
2300             sz = sz ? min(roundup(sz + 3, 4), flsl(size) - 1) : flsl(size) - 1;
2301             va = roundup(tlb1_map_base, 1 << sz) | (((1 << sz) - 1) & pa);
2302         } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size));
2303 #endif
2304         va = atomic_fetchadd_int(&tlb1_map_base, size);
2305         retva = va;
2306 #endif
2307
2308         if (tlb1_mapin_region(va, pa, size, tlb_calc_wimg(pa, ma)) != size)
2309                 return (NULL);
2310
2311         return ((void *)retva);
2312 }
2313
2314 /*
2315  * 'Unmap' a range mapped by mmu_booke_mapdev().
2316  */
2317 static void
2318 mmu_booke_unmapdev(void *p, vm_size_t size)
2319 {
2320 #ifdef SUPPORTS_SHRINKING_TLB1
2321         vm_offset_t base, offset, va;
2322
2323         /*
2324          * Unmap only if this is inside kernel virtual space.
2325          */
2326         va = (vm_offset_t)p;
2327         if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) {
2328                 base = trunc_page(va);
2329                 offset = va & PAGE_MASK;
2330                 size = roundup(offset + size, PAGE_SIZE);
2331                 mmu_booke_qremove(base, atop(size));
2332                 kva_free(base, size);
2333         }
2334 #endif
2335 }
2336
2337 /*
2338  * mmu_booke_object_init_pt preloads the ptes for a given object into the
2339  * specified pmap. This eliminates the blast of soft faults on process startup
2340  * and immediately after an mmap.
2341  */
2342 static void
2343 mmu_booke_object_init_pt(pmap_t pmap, vm_offset_t addr,
2344     vm_object_t object, vm_pindex_t pindex, vm_size_t size)
2345 {
2346
2347         VM_OBJECT_ASSERT_WLOCKED(object);
2348         KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2349             ("mmu_booke_object_init_pt: non-device object"));
2350 }
2351
2352 /*
2353  * Perform the pmap work for mincore.
2354  */
2355 static int
2356 mmu_booke_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap)
2357 {
2358
2359         /* XXX: this should be implemented at some point */
2360         return (0);
2361 }
2362
2363 static int
2364 mmu_booke_change_attr(vm_offset_t addr, vm_size_t sz, vm_memattr_t mode)
2365 {
2366         vm_offset_t va;
2367         pte_t *pte;
2368         int i, j;
2369         tlb_entry_t e;
2370
2371         addr = trunc_page(addr);
2372
2373         /* Only allow changes to mapped kernel addresses.  This includes:
2374          * - KVA
2375          * - DMAP (powerpc64)
2376          * - Device mappings
2377          */
2378         if (addr <= VM_MAXUSER_ADDRESS ||
2379 #ifdef __powerpc64__
2380             (addr >= tlb1_map_base && addr < DMAP_BASE_ADDRESS) ||
2381             (addr > DMAP_MAX_ADDRESS && addr < VM_MIN_KERNEL_ADDRESS) ||
2382 #else
2383             (addr >= tlb1_map_base && addr < VM_MIN_KERNEL_ADDRESS) ||
2384 #endif
2385             (addr > VM_MAX_KERNEL_ADDRESS))
2386                 return (EINVAL);
2387
2388         /* Check TLB1 mappings */
2389         for (i = 0; i < TLB1_ENTRIES; i++) {
2390                 tlb1_read_entry(&e, i);
2391                 if (!(e.mas1 & MAS1_VALID))
2392                         continue;
2393                 if (addr >= e.virt && addr < e.virt + e.size)
2394                         break;
2395         }
2396         if (i < TLB1_ENTRIES) {
2397                 /* Only allow full mappings to be modified for now. */
2398                 /* Validate the range. */
2399                 for (j = i, va = addr; va < addr + sz; va += e.size, j++) {
2400                         tlb1_read_entry(&e, j);
2401                         if (va != e.virt || (sz - (va - addr) < e.size))
2402                                 return (EINVAL);
2403                 }
2404                 for (va = addr; va < addr + sz; va += e.size, i++) {
2405                         tlb1_read_entry(&e, i);
2406                         e.mas2 &= ~MAS2_WIMGE_MASK;
2407                         e.mas2 |= tlb_calc_wimg(e.phys, mode);
2408
2409                         /*
2410                          * Write it out to the TLB.  Should really re-sync with other
2411                          * cores.
2412                          */
2413                         tlb1_write_entry(&e, i);
2414                 }
2415                 return (0);
2416         }
2417
2418         /* Not in TLB1, try through pmap */
2419         /* First validate the range. */
2420         for (va = addr; va < addr + sz; va += PAGE_SIZE) {
2421                 pte = pte_find(kernel_pmap, va);
2422                 if (pte == NULL || !PTE_ISVALID(pte))
2423                         return (EINVAL);
2424         }
2425
2426         mtx_lock_spin(&tlbivax_mutex);
2427         tlb_miss_lock();
2428         for (va = addr; va < addr + sz; va += PAGE_SIZE) {
2429                 pte = pte_find(kernel_pmap, va);
2430                 *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT);
2431                 *pte |= tlb_calc_wimg(PTE_PA(pte), mode) << PTE_MAS2_SHIFT;
2432                 tlb0_flush_entry(va);
2433         }
2434         tlb_miss_unlock();
2435         mtx_unlock_spin(&tlbivax_mutex);
2436
2437         return (0);
2438 }
2439
2440 static void
2441 mmu_booke_page_array_startup(long pages)
2442 {
2443         vm_page_array_size = pages;
2444 }
2445
2446 /**************************************************************************/
2447 /* TID handling */
2448 /**************************************************************************/
2449
2450 /*
2451  * Allocate a TID. If necessary, steal one from someone else.
2452  * The new TID is flushed from the TLB before returning.
2453  */
2454 static tlbtid_t
2455 tid_alloc(pmap_t pmap)
2456 {
2457         tlbtid_t tid;
2458         int thiscpu;
2459
2460         KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap"));
2461
2462         CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap);
2463
2464         thiscpu = PCPU_GET(cpuid);
2465
2466         tid = PCPU_GET(booke.tid_next);
2467         if (tid > TID_MAX)
2468                 tid = TID_MIN;
2469         PCPU_SET(booke.tid_next, tid + 1);
2470
2471         /* If we are stealing TID then clear the relevant pmap's field */
2472         if (tidbusy[thiscpu][tid] != NULL) {
2473                 CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid);
2474
2475                 tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE;
2476
2477                 /* Flush all entries from TLB0 matching this TID. */
2478                 tid_flush(tid);
2479         }
2480
2481         tidbusy[thiscpu][tid] = pmap;
2482         pmap->pm_tid[thiscpu] = tid;
2483         __asm __volatile("msync; isync");
2484
2485         CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid,
2486             PCPU_GET(booke.tid_next));
2487
2488         return (tid);
2489 }
2490
2491 /**************************************************************************/
2492 /* TLB0 handling */
2493 /**************************************************************************/
2494
2495 /* Convert TLB0 va and way number to tlb0[] table index. */
2496 static inline unsigned int
2497 tlb0_tableidx(vm_offset_t va, unsigned int way)
2498 {
2499         unsigned int idx;
2500
2501         idx = (way * TLB0_ENTRIES_PER_WAY);
2502         idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT;
2503         return (idx);
2504 }
2505
2506 /*
2507  * Invalidate TLB0 entry.
2508  */
2509 static inline void
2510 tlb0_flush_entry(vm_offset_t va)
2511 {
2512
2513         CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va);
2514
2515         mtx_assert(&tlbivax_mutex, MA_OWNED);
2516
2517         __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK));
2518         __asm __volatile("isync; msync");
2519         __asm __volatile("tlbsync; msync");
2520
2521         CTR1(KTR_PMAP, "%s: e", __func__);
2522 }
2523
2524 /**************************************************************************/
2525 /* TLB1 handling */
2526 /**************************************************************************/
2527
2528 /*
2529  * TLB1 mapping notes:
2530  *
2531  * TLB1[0]      Kernel text and data.
2532  * TLB1[1-15]   Additional kernel text and data mappings (if required), PCI
2533  *              windows, other devices mappings.
2534  */
2535
2536  /*
2537  * Read an entry from given TLB1 slot.
2538  */
2539 void
2540 tlb1_read_entry(tlb_entry_t *entry, unsigned int slot)
2541 {
2542         register_t msr;
2543         uint32_t mas0;
2544
2545         KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__));
2546
2547         msr = mfmsr();
2548         __asm __volatile("wrteei 0");
2549
2550         mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot);
2551         mtspr(SPR_MAS0, mas0);
2552         __asm __volatile("isync; tlbre");
2553
2554         entry->mas1 = mfspr(SPR_MAS1);
2555         entry->mas2 = mfspr(SPR_MAS2);
2556         entry->mas3 = mfspr(SPR_MAS3);
2557
2558         switch ((mfpvr() >> 16) & 0xFFFF) {
2559         case FSL_E500v2:
2560         case FSL_E500mc:
2561         case FSL_E5500:
2562         case FSL_E6500:
2563                 entry->mas7 = mfspr(SPR_MAS7);
2564                 break;
2565         default:
2566                 entry->mas7 = 0;
2567                 break;
2568         }
2569         __asm __volatile("wrtee %0" :: "r"(msr));
2570
2571         entry->virt = entry->mas2 & MAS2_EPN_MASK;
2572         entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) |
2573             (entry->mas3 & MAS3_RPN);
2574         entry->size =
2575             tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT);
2576 }
2577
2578 struct tlbwrite_args {
2579         tlb_entry_t *e;
2580         unsigned int idx;
2581 };
2582
2583 static uint32_t
2584 tlb1_find_free(void)
2585 {
2586         tlb_entry_t e;
2587         int i;
2588
2589         for (i = 0; i < TLB1_ENTRIES; i++) {
2590                 tlb1_read_entry(&e, i);
2591                 if ((e.mas1 & MAS1_VALID) == 0)
2592                         return (i);
2593         }
2594         return (-1);
2595 }
2596
2597 static void
2598 tlb1_purge_va_range(vm_offset_t va, vm_size_t size)
2599 {
2600         tlb_entry_t e;
2601         int i;
2602
2603         for (i = 0; i < TLB1_ENTRIES; i++) {
2604                 tlb1_read_entry(&e, i);
2605                 if ((e.mas1 & MAS1_VALID) == 0)
2606                         continue;
2607                 if ((e.mas2 & MAS2_EPN_MASK) >= va &&
2608                     (e.mas2 & MAS2_EPN_MASK) < va + size) {
2609                         mtspr(SPR_MAS1, e.mas1 & ~MAS1_VALID);
2610                         __asm __volatile("isync; tlbwe; isync; msync");
2611                 }
2612         }
2613 }
2614
2615 static void
2616 tlb1_write_entry_int(void *arg)
2617 {
2618         struct tlbwrite_args *args = arg;
2619         uint32_t idx, mas0;
2620
2621         idx = args->idx;
2622         if (idx == -1) {
2623                 tlb1_purge_va_range(args->e->virt, args->e->size);
2624                 idx = tlb1_find_free();
2625                 if (idx == -1)
2626                         panic("No free TLB1 entries!\n");
2627         }
2628         /* Select entry */
2629         mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(idx);
2630
2631         mtspr(SPR_MAS0, mas0);
2632         mtspr(SPR_MAS1, args->e->mas1);
2633         mtspr(SPR_MAS2, args->e->mas2);
2634         mtspr(SPR_MAS3, args->e->mas3);
2635         switch ((mfpvr() >> 16) & 0xFFFF) {
2636         case FSL_E500mc:
2637         case FSL_E5500:
2638         case FSL_E6500:
2639                 mtspr(SPR_MAS8, 0);
2640                 /* FALLTHROUGH */
2641         case FSL_E500v2:
2642                 mtspr(SPR_MAS7, args->e->mas7);
2643                 break;
2644         default:
2645                 break;
2646         }
2647
2648         __asm __volatile("isync; tlbwe; isync; msync");
2649
2650 }
2651
2652 static void
2653 tlb1_write_entry_sync(void *arg)
2654 {
2655         /* Empty synchronization point for smp_rendezvous(). */
2656 }
2657
2658 /*
2659  * Write given entry to TLB1 hardware.
2660  */
2661 static void
2662 tlb1_write_entry(tlb_entry_t *e, unsigned int idx)
2663 {
2664         struct tlbwrite_args args;
2665
2666         args.e = e;
2667         args.idx = idx;
2668
2669 #ifdef SMP
2670         if ((e->mas2 & _TLB_ENTRY_SHARED) && smp_started) {
2671                 mb();
2672                 smp_rendezvous(tlb1_write_entry_sync,
2673                     tlb1_write_entry_int,
2674                     tlb1_write_entry_sync, &args);
2675         } else
2676 #endif
2677         {
2678                 register_t msr;
2679
2680                 msr = mfmsr();
2681                 __asm __volatile("wrteei 0");
2682                 tlb1_write_entry_int(&args);
2683                 __asm __volatile("wrtee %0" :: "r"(msr));
2684         }
2685 }
2686
2687 /*
2688  * Convert TLB TSIZE value to mapped region size.
2689  */
2690 static vm_size_t
2691 tsize2size(unsigned int tsize)
2692 {
2693
2694         /*
2695          * size = 4^tsize KB
2696          * size = 4^tsize * 2^10 = 2^(2 * tsize - 10)
2697          */
2698
2699         return ((1 << (2 * tsize)) * 1024);
2700 }
2701
2702 /*
2703  * Convert region size (must be power of 4) to TLB TSIZE value.
2704  */
2705 static unsigned int
2706 size2tsize(vm_size_t size)
2707 {
2708
2709         return (ilog2(size) / 2 - 5);
2710 }
2711
2712 /*
2713  * Register permanent kernel mapping in TLB1.
2714  *
2715  * Entries are created starting from index 0 (current free entry is
2716  * kept in tlb1_idx) and are not supposed to be invalidated.
2717  */
2718 int
2719 tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size,
2720     uint32_t flags)
2721 {
2722         tlb_entry_t e;
2723         uint32_t ts, tid;
2724         int tsize, index;
2725
2726         /* First try to update an existing entry. */
2727         for (index = 0; index < TLB1_ENTRIES; index++) {
2728                 tlb1_read_entry(&e, index);
2729                 /* Check if we're just updating the flags, and update them. */
2730                 if (e.phys == pa && e.virt == va && e.size == size) {
2731                         e.mas2 = (va & MAS2_EPN_MASK) | flags;
2732                         tlb1_write_entry(&e, index);
2733                         return (0);
2734                 }
2735         }
2736
2737         /* Convert size to TSIZE */
2738         tsize = size2tsize(size);
2739
2740         tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK;
2741         /* XXX TS is hard coded to 0 for now as we only use single address space */
2742         ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK;
2743
2744         e.phys = pa;
2745         e.virt = va;
2746         e.size = size;
2747         e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid;
2748         e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK);
2749         e.mas2 = (va & MAS2_EPN_MASK) | flags;
2750
2751         /* Set supervisor RWX permission bits */
2752         e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX;
2753         e.mas7 = (pa >> 32) & MAS7_RPN;
2754
2755         tlb1_write_entry(&e, -1);
2756
2757         return (0);
2758 }
2759
2760 /*
2761  * Map in contiguous RAM region into the TLB1.
2762  */
2763 static vm_size_t
2764 tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size, int wimge)
2765 {
2766         vm_offset_t base;
2767         vm_size_t mapped, sz, ssize;
2768
2769         mapped = 0;
2770         base = va;
2771         ssize = size;
2772
2773         while (size > 0) {
2774                 sz = 1UL << (ilog2(size) & ~1);
2775                 /* Align size to PA */
2776                 if (pa % sz != 0) {
2777                         do {
2778                                 sz >>= 2;
2779                         } while (pa % sz != 0);
2780                 }
2781                 /* Now align from there to VA */
2782                 if (va % sz != 0) {
2783                         do {
2784                                 sz >>= 2;
2785                         } while (va % sz != 0);
2786                 }
2787 #ifdef __powerpc64__
2788                 /*
2789                  * Clamp TLB1 entries to 4G.
2790                  *
2791                  * While the e6500 supports up to 1TB mappings, the e5500
2792                  * only supports up to 4G mappings. (0b1011)
2793                  *
2794                  * If any e6500 machines capable of supporting a very
2795                  * large amount of memory appear in the future, we can
2796                  * revisit this.
2797                  *
2798                  * For now, though, since we have plenty of space in TLB1,
2799                  * always avoid creating entries larger than 4GB.
2800                  */
2801                 sz = MIN(sz, 1UL << 32);
2802 #endif
2803                 if (bootverbose)
2804                         printf("Wiring VA=%p to PA=%jx (size=%lx)\n",
2805                             (void *)va, (uintmax_t)pa, (long)sz);
2806                 if (tlb1_set_entry(va, pa, sz,
2807                     _TLB_ENTRY_SHARED | wimge) < 0)
2808                         return (mapped);
2809                 size -= sz;
2810                 pa += sz;
2811                 va += sz;
2812         }
2813
2814         mapped = (va - base);
2815         if (bootverbose)
2816                 printf("mapped size 0x%"PRIxPTR" (wasted space 0x%"PRIxPTR")\n",
2817                     mapped, mapped - ssize);
2818
2819         return (mapped);
2820 }
2821
2822 /*
2823  * TLB1 initialization routine, to be called after the very first
2824  * assembler level setup done in locore.S.
2825  */
2826 void
2827 tlb1_init(void)
2828 {
2829         vm_offset_t mas2;
2830         uint32_t mas0, mas1, mas3, mas7;
2831         uint32_t tsz;
2832
2833         tlb1_get_tlbconf();
2834
2835         mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0);
2836         mtspr(SPR_MAS0, mas0);
2837         __asm __volatile("isync; tlbre");
2838
2839         mas1 = mfspr(SPR_MAS1);
2840         mas2 = mfspr(SPR_MAS2);
2841         mas3 = mfspr(SPR_MAS3);
2842         mas7 = mfspr(SPR_MAS7);
2843
2844         kernload =  ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) |
2845             (mas3 & MAS3_RPN);
2846
2847         tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
2848         kernsize += (tsz > 0) ? tsize2size(tsz) : 0;
2849         kernstart = trunc_page(mas2);
2850
2851         /* Setup TLB miss defaults */
2852         set_mas4_defaults();
2853 }
2854
2855 /*
2856  * pmap_early_io_unmap() should be used in short conjunction with
2857  * pmap_early_io_map(), as in the following snippet:
2858  *
2859  * x = pmap_early_io_map(...);
2860  * <do something with x>
2861  * pmap_early_io_unmap(x, size);
2862  *
2863  * And avoiding more allocations between.
2864  */
2865 void
2866 pmap_early_io_unmap(vm_offset_t va, vm_size_t size)
2867 {
2868         int i;
2869         tlb_entry_t e;
2870         vm_size_t isize;
2871
2872         size = roundup(size, PAGE_SIZE);
2873         isize = size;
2874         for (i = 0; i < TLB1_ENTRIES && size > 0; i++) {
2875                 tlb1_read_entry(&e, i);
2876                 if (!(e.mas1 & MAS1_VALID))
2877                         continue;
2878                 if (va <= e.virt && (va + isize) >= (e.virt + e.size)) {
2879                         size -= e.size;
2880                         e.mas1 &= ~MAS1_VALID;
2881                         tlb1_write_entry(&e, i);
2882                 }
2883         }
2884         if (tlb1_map_base == va + isize)
2885                 tlb1_map_base -= isize;
2886 }
2887
2888 vm_offset_t
2889 pmap_early_io_map(vm_paddr_t pa, vm_size_t size)
2890 {
2891         vm_paddr_t pa_base;
2892         vm_offset_t va, sz;
2893         int i;
2894         tlb_entry_t e;
2895
2896         KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!"));
2897
2898         for (i = 0; i < TLB1_ENTRIES; i++) {
2899                 tlb1_read_entry(&e, i);
2900                 if (!(e.mas1 & MAS1_VALID))
2901                         continue;
2902                 if (pa >= e.phys && (pa + size) <=
2903                     (e.phys + e.size))
2904                         return (e.virt + (pa - e.phys));
2905         }
2906
2907         pa_base = rounddown(pa, PAGE_SIZE);
2908         size = roundup(size + (pa - pa_base), PAGE_SIZE);
2909         tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1));
2910         va = tlb1_map_base + (pa - pa_base);
2911
2912         do {
2913                 sz = 1 << (ilog2(size) & ~1);
2914                 tlb1_set_entry(tlb1_map_base, pa_base, sz,
2915                     _TLB_ENTRY_SHARED | _TLB_ENTRY_IO);
2916                 size -= sz;
2917                 pa_base += sz;
2918                 tlb1_map_base += sz;
2919         } while (size > 0);
2920
2921         return (va);
2922 }
2923
2924 void
2925 pmap_track_page(pmap_t pmap, vm_offset_t va)
2926 {
2927         vm_paddr_t pa;
2928         vm_page_t page;
2929         struct pv_entry *pve;
2930
2931         va = trunc_page(va);
2932         pa = pmap_kextract(va);
2933         page = PHYS_TO_VM_PAGE(pa);
2934
2935         rw_wlock(&pvh_global_lock);
2936         PMAP_LOCK(pmap);
2937
2938         TAILQ_FOREACH(pve, &page->md.pv_list, pv_link) {
2939                 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) {
2940                         goto out;
2941                 }
2942         }
2943         page->md.pv_tracked = true;
2944         pv_insert(pmap, va, page);
2945 out:
2946         PMAP_UNLOCK(pmap);
2947         rw_wunlock(&pvh_global_lock);
2948 }
2949
2950 /*
2951  * Setup MAS4 defaults.
2952  * These values are loaded to MAS0-2 on a TLB miss.
2953  */
2954 static void
2955 set_mas4_defaults(void)
2956 {
2957         uint32_t mas4;
2958
2959         /* Defaults: TLB0, PID0, TSIZED=4K */
2960         mas4 = MAS4_TLBSELD0;
2961         mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK;
2962 #ifdef SMP
2963         mas4 |= MAS4_MD;
2964 #endif
2965         mtspr(SPR_MAS4, mas4);
2966         __asm __volatile("isync");
2967 }
2968
2969 /*
2970  * Return 0 if the physical IO range is encompassed by one of the
2971  * the TLB1 entries, otherwise return related error code.
2972  */
2973 static int
2974 tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va)
2975 {
2976         uint32_t prot;
2977         vm_paddr_t pa_start;
2978         vm_paddr_t pa_end;
2979         unsigned int entry_tsize;
2980         vm_size_t entry_size;
2981         tlb_entry_t e;
2982
2983         *va = (vm_offset_t)NULL;
2984
2985         tlb1_read_entry(&e, i);
2986         /* Skip invalid entries */
2987         if (!(e.mas1 & MAS1_VALID))
2988                 return (EINVAL);
2989
2990         /*
2991          * The entry must be cache-inhibited, guarded, and r/w
2992          * so it can function as an i/o page
2993          */
2994         prot = e.mas2 & (MAS2_I | MAS2_G);
2995         if (prot != (MAS2_I | MAS2_G))
2996                 return (EPERM);
2997
2998         prot = e.mas3 & (MAS3_SR | MAS3_SW);
2999         if (prot != (MAS3_SR | MAS3_SW))
3000                 return (EPERM);
3001
3002         /* The address should be within the entry range. */
3003         entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
3004         KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize"));
3005
3006         entry_size = tsize2size(entry_tsize);
3007         pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) |
3008             (e.mas3 & MAS3_RPN);
3009         pa_end = pa_start + entry_size;
3010
3011         if ((pa < pa_start) || ((pa + size) > pa_end))
3012                 return (ERANGE);
3013
3014         /* Return virtual address of this mapping. */
3015         *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start);
3016         return (0);
3017 }
3018
3019 #ifdef DDB
3020 /* Print out contents of the MAS registers for each TLB0 entry */
3021 static void
3022 #ifdef __powerpc64__
3023 tlb_print_entry(int i, uint32_t mas1, uint64_t mas2, uint32_t mas3,
3024 #else
3025 tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3,
3026 #endif
3027     uint32_t mas7)
3028 {
3029         int as;
3030         char desc[3];
3031         tlbtid_t tid;
3032         vm_size_t size;
3033         unsigned int tsize;
3034
3035         desc[2] = '\0';
3036         if (mas1 & MAS1_VALID)
3037                 desc[0] = 'V';
3038         else
3039                 desc[0] = ' ';
3040
3041         if (mas1 & MAS1_IPROT)
3042                 desc[1] = 'P';
3043         else
3044                 desc[1] = ' ';
3045
3046         as = (mas1 & MAS1_TS_MASK) ? 1 : 0;
3047         tid = MAS1_GETTID(mas1);
3048
3049         tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
3050         size = 0;
3051         if (tsize)
3052                 size = tsize2size(tsize);
3053
3054         printf("%3d: (%s) [AS=%d] "
3055             "sz = 0x%jx tsz = %d tid = %d mas1 = 0x%08x "
3056             "mas2(va) = 0x%"PRI0ptrX" mas3(pa) = 0x%08x mas7 = 0x%08x\n",
3057             i, desc, as, (uintmax_t)size, tsize, tid, mas1, mas2, mas3, mas7);
3058 }
3059
3060 DB_SHOW_COMMAND(tlb0, tlb0_print_tlbentries)
3061 {
3062         uint32_t mas0, mas1, mas3, mas7;
3063 #ifdef __powerpc64__
3064         uint64_t mas2;
3065 #else
3066         uint32_t mas2;
3067 #endif
3068         int entryidx, way, idx;
3069
3070         printf("TLB0 entries:\n");
3071         for (way = 0; way < TLB0_WAYS; way ++)
3072                 for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) {
3073                         mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way);
3074                         mtspr(SPR_MAS0, mas0);
3075
3076                         mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT;
3077                         mtspr(SPR_MAS2, mas2);
3078
3079                         __asm __volatile("isync; tlbre");
3080
3081                         mas1 = mfspr(SPR_MAS1);
3082                         mas2 = mfspr(SPR_MAS2);
3083                         mas3 = mfspr(SPR_MAS3);
3084                         mas7 = mfspr(SPR_MAS7);
3085
3086                         idx = tlb0_tableidx(mas2, way);
3087                         tlb_print_entry(idx, mas1, mas2, mas3, mas7);
3088                 }
3089 }
3090
3091 /*
3092  * Print out contents of the MAS registers for each TLB1 entry
3093  */
3094 DB_SHOW_COMMAND(tlb1, tlb1_print_tlbentries)
3095 {
3096         uint32_t mas0, mas1, mas3, mas7;
3097 #ifdef __powerpc64__
3098         uint64_t mas2;
3099 #else
3100         uint32_t mas2;
3101 #endif
3102         int i;
3103
3104         printf("TLB1 entries:\n");
3105         for (i = 0; i < TLB1_ENTRIES; i++) {
3106                 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i);
3107                 mtspr(SPR_MAS0, mas0);
3108
3109                 __asm __volatile("isync; tlbre");
3110
3111                 mas1 = mfspr(SPR_MAS1);
3112                 mas2 = mfspr(SPR_MAS2);
3113                 mas3 = mfspr(SPR_MAS3);
3114                 mas7 = mfspr(SPR_MAS7);
3115
3116                 tlb_print_entry(i, mas1, mas2, mas3, mas7);
3117         }
3118 }
3119 #endif