sys/powerpc/aim/slb.c

   1 /*-
   2  * Copyright (c) 2010 Nathan Whitehorn
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  *
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25  *
  26  * $FreeBSD$
  27  */
  28
  29 #include <sys/param.h>
  30 #include <sys/kernel.h>
  31 #include <sys/lock.h>
  32 #include <sys/mutex.h>
  33 #include <sys/proc.h>
  34 #include <sys/systm.h>
  35
  36 #include <vm/vm.h>
  37 #include <vm/pmap.h>
  38 #include <vm/uma.h>
  39 #include <vm/vm.h>
  40 #include <vm/vm_map.h>
  41 #include <vm/vm_page.h>
  42 #include <vm/vm_pageout.h>
  43
  44 #include <machine/md_var.h>
  45 #include <machine/platform.h>
  46 #include <machine/pmap.h>
  47 #include <machine/vmparam.h>
  48
  49 uintptr_t moea64_get_unique_vsid(void);
  50 void moea64_release_vsid(uint64_t vsid);
  51 static void slb_zone_init(void *);
  52
  53 static uma_zone_t slbt_zone;
  54 static uma_zone_t slb_cache_zone;
  55 int n_slbs = 64;
  56
  57 SYSINIT(slb_zone_init, SI_SUB_KMEM, SI_ORDER_ANY, slb_zone_init, NULL);
  58
  59 struct slbtnode {
  60         uint16_t        ua_alloc;
  61         uint8_t         ua_level;
  62         /* Only 36 bits needed for full 64-bit address space. */
  63         uint64_t        ua_base;
  64         union {
  65                 struct slbtnode *ua_child[16];
  66                 struct slb      slb_entries[16];
  67         } u;
  68 };
  69
  70 /*
  71  * For a full 64-bit address space, there are 36 bits in play in an
  72  * esid, so 8 levels, with the leaf being at level 0.
  73  *
  74  * |3333|3322|2222|2222|1111|1111|11  |    |    |  esid
  75  * |5432|1098|7654|3210|9876|5432|1098|7654|3210|  bits
  76  * +----+----+----+----+----+----+----+----+----+--------
  77  * | 8  | 7  | 6  | 5  | 4  | 3  | 2  | 1  | 0  | level
  78  */
  79 #define UAD_ROOT_LEVEL  8
  80 #define UAD_LEAF_LEVEL  0
  81
  82 static inline int
  83 esid2idx(uint64_t esid, int level)
  84 {
  85         int shift;
  86
  87         shift = level * 4;
  88         return ((esid >> shift) & 0xF);
  89 }
  90
  91 /*
  92  * The ua_base field should have 0 bits after the first 4*(level+1)
  93  * bits; i.e. only
  94  */
  95 #define uad_baseok(ua)                          \
  96         (esid2base(ua->ua_base, ua->ua_level) == ua->ua_base)
  97
  98
  99 static inline uint64_t
 100 esid2base(uint64_t esid, int level)
 101 {
 102         uint64_t mask;
 103         int shift;
 104
 105         shift = (level + 1) * 4;
 106         mask = ~((1ULL << shift) - 1);
 107         return (esid & mask);
 108 }
 109
 110 /*
 111  * Allocate a new leaf node for the specified esid/vmhandle from the
 112  * parent node.
 113  */
 114 static struct slb *
 115 make_new_leaf(uint64_t esid, uint64_t slbv, struct slbtnode *parent)
 116 {
 117         struct slbtnode *child;
 118         struct slb *retval;
 119         int idx;
 120
 121         idx = esid2idx(esid, parent->ua_level);
 122         KASSERT(parent->u.ua_child[idx] == NULL, ("Child already exists!"));
 123
 124         /* unlock and M_WAITOK and loop? */
 125         child = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
 126         KASSERT(child != NULL, ("unhandled NULL case"));
 127
 128         child->ua_level = UAD_LEAF_LEVEL;
 129         child->ua_base = esid2base(esid, child->ua_level);
 130         idx = esid2idx(esid, child->ua_level);
 131         child->u.slb_entries[idx].slbv = slbv;
 132         child->u.slb_entries[idx].slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
 133         setbit(&child->ua_alloc, idx);
 134
 135         retval = &child->u.slb_entries[idx];
 136
 137         /*
 138          * The above stores must be visible before the next one, so
 139          * that a lockless searcher always sees a valid path through
 140          * the tree.
 141          */
 142         powerpc_sync();
 143
 144         idx = esid2idx(esid, parent->ua_level);
 145         parent->u.ua_child[idx] = child;
 146         setbit(&parent->ua_alloc, idx);
 147
 148         return (retval);
 149 }
 150
 151 /*
 152  * Allocate a new intermediate node to fit between the parent and
 153  * esid.
 154  */
 155 static struct slbtnode*
 156 make_intermediate(uint64_t esid, struct slbtnode *parent)
 157 {
 158         struct slbtnode *child, *inter;
 159         int idx, level;
 160
 161         idx = esid2idx(esid, parent->ua_level);
 162         child = parent->u.ua_child[idx];
 163         KASSERT(esid2base(esid, child->ua_level) != child->ua_base,
 164             ("No need for an intermediate node?"));
 165
 166         /*
 167          * Find the level where the existing child and our new esid
 168          * meet.  It must be lower than parent->ua_level or we would
 169          * have chosen a different index in parent.
 170          */
 171         level = child->ua_level + 1;
 172         while (esid2base(esid, level) !=
 173             esid2base(child->ua_base, level))
 174                 level++;
 175         KASSERT(level < parent->ua_level,
 176             ("Found splitting level %d for %09jx and %09jx, "
 177             "but it's the same as %p's",
 178             level, esid, child->ua_base, parent));
 179
 180         /* unlock and M_WAITOK and loop? */
 181         inter = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
 182         KASSERT(inter != NULL, ("unhandled NULL case"));
 183
 184         /* Set up intermediate node to point to child ... */
 185         inter->ua_level = level;
 186         inter->ua_base = esid2base(esid, inter->ua_level);
 187         idx = esid2idx(child->ua_base, inter->ua_level);
 188         inter->u.ua_child[idx] = child;
 189         setbit(&inter->ua_alloc, idx);
 190         powerpc_sync();
 191
 192         /* Set up parent to point to intermediate node ... */
 193         idx = esid2idx(inter->ua_base, parent->ua_level);
 194         parent->u.ua_child[idx] = inter;
 195         setbit(&parent->ua_alloc, idx);
 196
 197         return (inter);
 198 }
 199
 200 uint64_t
 201 kernel_va_to_slbv(vm_offset_t va)
 202 {
 203         uint64_t slbv;
 204
 205         /* Set kernel VSID to deterministic value */
 206         slbv = (KERNEL_VSID((uintptr_t)va >> ADDR_SR_SHFT)) << SLBV_VSID_SHIFT;
 207
 208         /* Figure out if this is a large-page mapping */
 209         if (hw_direct_map && va < VM_MIN_KERNEL_ADDRESS) {
 210                 /*
 211                  * XXX: If we have set up a direct map, assumes
 212                  * all physical memory is mapped with large pages.
 213                  */
 214                 if (mem_valid(va, 0) == 0)
 215                         slbv |= SLBV_L;
 216         }
 217
 218         return (slbv);
 219 }
 220
 221 struct slb *
 222 user_va_to_slb_entry(pmap_t pm, vm_offset_t va)
 223 {
 224         uint64_t esid = va >> ADDR_SR_SHFT;
 225         struct slbtnode *ua;
 226         int idx;
 227
 228         ua = pm->pm_slb_tree_root;
 229
 230         for (;;) {
 231                 KASSERT(uad_baseok(ua), ("uad base %016jx level %d bad!",
 232                     ua->ua_base, ua->ua_level));
 233                 idx = esid2idx(esid, ua->ua_level);
 234
 235                 /*
 236                  * This code is specific to ppc64 where a load is
 237                  * atomic, so no need for atomic_load macro.
 238                  */
 239                 if (ua->ua_level == UAD_LEAF_LEVEL)
 240                         return ((ua->u.slb_entries[idx].slbe & SLBE_VALID) ?
 241                             &ua->u.slb_entries[idx] : NULL);
 242
 243                 ua = ua->u.ua_child[idx];
 244                 if (ua == NULL ||
 245                     esid2base(esid, ua->ua_level) != ua->ua_base)
 246                         return (NULL);
 247         }
 248
 249         return (NULL);
 250 }
 251
 252 uint64_t
 253 va_to_vsid(pmap_t pm, vm_offset_t va)
 254 {
 255         struct slb *entry;
 256
 257         /* Shortcut kernel case */
 258         if (pm == kernel_pmap)
 259                 return (KERNEL_VSID((uintptr_t)va >> ADDR_SR_SHFT));
 260
 261         /*
 262          * If there is no vsid for this VA, we need to add a new entry
 263          * to the PMAP's segment table.
 264          */
 265
 266         entry = user_va_to_slb_entry(pm, va);
 267
 268         if (entry == NULL)
 269                 return (allocate_user_vsid(pm,
 270                     (uintptr_t)va >> ADDR_SR_SHFT, 0));
 271
 272         return ((entry->slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT);
 273 }
 274
 275 uint64_t
 276 allocate_user_vsid(pmap_t pm, uint64_t esid, int large)
 277 {
 278         uint64_t vsid, slbv;
 279         struct slbtnode *ua, *next, *inter;
 280         struct slb *slb;
 281         int idx;
 282
 283         KASSERT(pm != kernel_pmap, ("Attempting to allocate a kernel VSID"));
 284
 285         PMAP_LOCK_ASSERT(pm, MA_OWNED);
 286         vsid = moea64_get_unique_vsid();
 287
 288         slbv = vsid << SLBV_VSID_SHIFT;
 289         if (large)
 290                 slbv |= SLBV_L;
 291
 292         ua = pm->pm_slb_tree_root;
 293
 294         /* Descend to the correct leaf or NULL pointer. */
 295         for (;;) {
 296                 KASSERT(uad_baseok(ua),
 297                    ("uad base %09jx level %d bad!", ua->ua_base, ua->ua_level));
 298                 idx = esid2idx(esid, ua->ua_level);
 299
 300                 if (ua->ua_level == UAD_LEAF_LEVEL) {
 301                         ua->u.slb_entries[idx].slbv = slbv;
 302                         eieio();
 303                         ua->u.slb_entries[idx].slbe = (esid << SLBE_ESID_SHIFT)
 304                             | SLBE_VALID;
 305                         setbit(&ua->ua_alloc, idx);
 306                         slb = &ua->u.slb_entries[idx];
 307                         break;
 308                 }
 309
 310                 next = ua->u.ua_child[idx];
 311                 if (next == NULL) {
 312                         slb = make_new_leaf(esid, slbv, ua);
 313                         break;
 314                 }
 315
 316                 /*
 317                  * Check if the next item down has an okay ua_base.
 318                  * If not, we need to allocate an intermediate node.
 319                  */
 320                 if (esid2base(esid, next->ua_level) != next->ua_base) {
 321                         inter = make_intermediate(esid, ua);
 322                         slb = make_new_leaf(esid, slbv, inter);
 323                         break;
 324                 }
 325
 326                 ua = next;
 327         }
 328
 329         /*
 330          * Someone probably wants this soon, and it may be a wired
 331          * SLB mapping, so pre-spill this entry.
 332          */
 333         eieio();
 334         slb_insert_user(pm, slb);
 335
 336         return (vsid);
 337 }
 338
 339 void
 340 free_vsid(pmap_t pm, uint64_t esid, int large)
 341 {
 342         struct slbtnode *ua;
 343         int idx;
 344
 345         PMAP_LOCK_ASSERT(pm, MA_OWNED);
 346
 347         ua = pm->pm_slb_tree_root;
 348         /* Descend to the correct leaf. */
 349         for (;;) {
 350                 KASSERT(uad_baseok(ua),
 351                    ("uad base %09jx level %d bad!", ua->ua_base, ua->ua_level));
 352
 353                 idx = esid2idx(esid, ua->ua_level);
 354                 if (ua->ua_level == UAD_LEAF_LEVEL) {
 355                         ua->u.slb_entries[idx].slbv = 0;
 356                         eieio();
 357                         ua->u.slb_entries[idx].slbe = 0;
 358                         clrbit(&ua->ua_alloc, idx);
 359                         return;
 360                 }
 361
 362                 ua = ua->u.ua_child[idx];
 363                 if (ua == NULL ||
 364                     esid2base(esid, ua->ua_level) != ua->ua_base) {
 365                         /* Perhaps just return instead of assert? */
 366                         KASSERT(0,
 367                             ("Asked to remove an entry that was never inserted!"));
 368                         return;
 369                 }
 370         }
 371 }
 372
 373 static void
 374 free_slb_tree_node(struct slbtnode *ua)
 375 {
 376         int idx;
 377
 378         for (idx = 0; idx < 16; idx++) {
 379                 if (ua->ua_level != UAD_LEAF_LEVEL) {
 380                         if (ua->u.ua_child[idx] != NULL)
 381                                 free_slb_tree_node(ua->u.ua_child[idx]);
 382                 } else {
 383                         if (ua->u.slb_entries[idx].slbv != 0)
 384                                 moea64_release_vsid(ua->u.slb_entries[idx].slbv
 385                                     >> SLBV_VSID_SHIFT);
 386                 }
 387         }
 388
 389         uma_zfree(slbt_zone, ua);
 390 }
 391
 392 void
 393 slb_free_tree(pmap_t pm)
 394 {
 395
 396         free_slb_tree_node(pm->pm_slb_tree_root);
 397 }
 398
 399 struct slbtnode *
 400 slb_alloc_tree(void)
 401 {
 402         struct slbtnode *root;
 403
 404         root = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
 405         root->ua_level = UAD_ROOT_LEVEL;
 406
 407         return (root);
 408 }
 409
 410 /* Lock entries mapping kernel text and stacks */
 411
 412 void
 413 slb_insert_kernel(uint64_t slbe, uint64_t slbv)
 414 {
 415         struct slb *slbcache;
 416         int i;
 417
 418         /* We don't want to be preempted while modifying the kernel map */
 419         critical_enter();
 420
 421         slbcache = PCPU_GET(slb);
 422
 423         /* Check for an unused slot, abusing the user slot as a full flag */
 424         if (slbcache[USER_SLB_SLOT].slbe == 0) {
 425                 for (i = 0; i < n_slbs; i++) {
 426                         if (i == USER_SLB_SLOT)
 427                                 continue;
 428                         if (!(slbcache[i].slbe & SLBE_VALID))
 429                                 goto fillkernslb;
 430                 }
 431
 432                 if (i == n_slbs)
 433                         slbcache[USER_SLB_SLOT].slbe = 1;
 434         }
 435
 436         i = mftb() % n_slbs;
 437         if (i == USER_SLB_SLOT)
 438                         i = (i+1) % n_slbs;
 439
 440 fillkernslb:
 441         KASSERT(i != USER_SLB_SLOT,
 442             ("Filling user SLB slot with a kernel mapping"));
 443         slbcache[i].slbv = slbv;
 444         slbcache[i].slbe = slbe | (uint64_t)i;
 445
 446         /* If it is for this CPU, put it in the SLB right away */
 447         if (pmap_bootstrapped) {
 448                 /* slbie not required */
 449                 __asm __volatile ("slbmte %0, %1" ::
 450                     "r"(slbcache[i].slbv), "r"(slbcache[i].slbe));
 451         }
 452
 453         critical_exit();
 454 }
 455
 456 void
 457 slb_insert_user(pmap_t pm, struct slb *slb)
 458 {
 459         int i;
 460
 461         PMAP_LOCK_ASSERT(pm, MA_OWNED);
 462
 463         if (pm->pm_slb_len < n_slbs) {
 464                 i = pm->pm_slb_len;
 465                 pm->pm_slb_len++;
 466         } else {
 467                 i = mftb() % n_slbs;
 468         }
 469
 470         /* Note that this replacement is atomic with respect to trap_subr */
 471         pm->pm_slb[i] = slb;
 472 }
 473
 474 static void *
 475 slb_uma_real_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
 476 {
 477         static vm_offset_t realmax = 0;
 478         void *va;
 479         vm_page_t m;
 480         int pflags;
 481
 482         if (realmax == 0)
 483                 realmax = platform_real_maxaddr();
 484
 485         *flags = UMA_SLAB_PRIV;
 486         if ((wait & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
 487                 pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
 488         else
 489                 pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
 490         if (wait & M_ZERO)
 491                 pflags |= VM_ALLOC_ZERO;
 492
 493         for (;;) {
 494                 m = vm_page_alloc_contig(NULL, 0, pflags, 1, 0, realmax,
 495                     PAGE_SIZE, PAGE_SIZE, VM_MEMATTR_DEFAULT);
 496                 if (m == NULL) {
 497                         if (wait & M_NOWAIT)
 498                                 return (NULL);
 499                         VM_WAIT;
 500                 } else
 501                         break;
 502         }
 503
 504         va = (void *) VM_PAGE_TO_PHYS(m);
 505
 506         if (!hw_direct_map)
 507                 pmap_kenter((vm_offset_t)va, VM_PAGE_TO_PHYS(m));
 508
 509         if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
 510                 bzero(va, PAGE_SIZE);
 511
 512         return (va);
 513 }
 514
 515 static void
 516 slb_zone_init(void *dummy)
 517 {
 518
 519         slbt_zone = uma_zcreate("SLB tree node", sizeof(struct slbtnode),
 520             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
 521         slb_cache_zone = uma_zcreate("SLB cache",
 522             (n_slbs + 1)*sizeof(struct slb *), NULL, NULL, NULL, NULL,
 523             UMA_ALIGN_PTR, UMA_ZONE_VM);
 524
 525         if (platform_real_maxaddr() != VM_MAX_ADDRESS) {
 526                 uma_zone_set_allocf(slb_cache_zone, slb_uma_real_alloc);
 527                 uma_zone_set_allocf(slbt_zone, slb_uma_real_alloc);
 528         }
 529 }
 530
 531 struct slb **
 532 slb_alloc_user_cache(void)
 533 {
 534         return (uma_zalloc(slb_cache_zone, M_ZERO));
 535 }
 536
 537 void
 538 slb_free_user_cache(struct slb **slb)
 539 {
 540         uma_zfree(slb_cache_zone, slb);
 541 }