sys/powerpc/aim/slb.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3  *
   4  * Copyright (c) 2010 Nathan Whitehorn
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  *
  28  * $FreeBSD$
  29  */
  30
  31 #include <sys/param.h>
  32 #include <sys/kernel.h>
  33 #include <sys/lock.h>
  34 #include <sys/malloc.h>
  35 #include <sys/mutex.h>
  36 #include <sys/proc.h>
  37 #include <sys/systm.h>
  38
  39 #include <vm/vm.h>
  40 #include <vm/pmap.h>
  41 #include <vm/uma.h>
  42 #include <vm/vm.h>
  43 #include <vm/vm_map.h>
  44 #include <vm/vm_page.h>
  45 #include <vm/vm_pageout.h>
  46
  47 #include <machine/md_var.h>
  48 #include <machine/platform.h>
  49 #include <machine/vmparam.h>
  50
  51 uintptr_t moea64_get_unique_vsid(void);
  52 void moea64_release_vsid(uint64_t vsid);
  53 static void slb_zone_init(void *);
  54
  55 static uma_zone_t slbt_zone;
  56 static uma_zone_t slb_cache_zone;
  57 int n_slbs = 64;
  58
  59 SYSINIT(slb_zone_init, SI_SUB_KMEM, SI_ORDER_ANY, slb_zone_init, NULL);
  60
  61 struct slbtnode {
  62         uint16_t        ua_alloc;
  63         uint8_t         ua_level;
  64         /* Only 36 bits needed for full 64-bit address space. */
  65         uint64_t        ua_base;
  66         union {
  67                 struct slbtnode *ua_child[16];
  68                 struct slb      slb_entries[16];
  69         } u;
  70 };
  71
  72 /*
  73  * For a full 64-bit address space, there are 36 bits in play in an
  74  * esid, so 8 levels, with the leaf being at level 0.
  75  *
  76  * |3333|3322|2222|2222|1111|1111|11  |    |    |  esid
  77  * |5432|1098|7654|3210|9876|5432|1098|7654|3210|  bits
  78  * +----+----+----+----+----+----+----+----+----+--------
  79  * | 8  | 7  | 6  | 5  | 4  | 3  | 2  | 1  | 0  | level
  80  */
  81 #define UAD_ROOT_LEVEL  8
  82 #define UAD_LEAF_LEVEL  0
  83
  84 static inline int
  85 esid2idx(uint64_t esid, int level)
  86 {
  87         int shift;
  88
  89         shift = level * 4;
  90         return ((esid >> shift) & 0xF);
  91 }
  92
  93 /*
  94  * The ua_base field should have 0 bits after the first 4*(level+1)
  95  * bits; i.e. only
  96  */
  97 #define uad_baseok(ua)                          \
  98         (esid2base(ua->ua_base, ua->ua_level) == ua->ua_base)
  99
 100
 101 static inline uint64_t
 102 esid2base(uint64_t esid, int level)
 103 {
 104         uint64_t mask;
 105         int shift;
 106
 107         shift = (level + 1) * 4;
 108         mask = ~((1ULL << shift) - 1);
 109         return (esid & mask);
 110 }
 111
 112 /*
 113  * Allocate a new leaf node for the specified esid/vmhandle from the
 114  * parent node.
 115  */
 116 static struct slb *
 117 make_new_leaf(uint64_t esid, uint64_t slbv, struct slbtnode *parent)
 118 {
 119         struct slbtnode *child;
 120         struct slb *retval;
 121         int idx;
 122
 123         idx = esid2idx(esid, parent->ua_level);
 124         KASSERT(parent->u.ua_child[idx] == NULL, ("Child already exists!"));
 125
 126         /* unlock and M_WAITOK and loop? */
 127         child = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
 128         KASSERT(child != NULL, ("unhandled NULL case"));
 129
 130         child->ua_level = UAD_LEAF_LEVEL;
 131         child->ua_base = esid2base(esid, child->ua_level);
 132         idx = esid2idx(esid, child->ua_level);
 133         child->u.slb_entries[idx].slbv = slbv;
 134         child->u.slb_entries[idx].slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
 135         setbit(&child->ua_alloc, idx);
 136
 137         retval = &child->u.slb_entries[idx];
 138
 139         /*
 140          * The above stores must be visible before the next one, so
 141          * that a lockless searcher always sees a valid path through
 142          * the tree.
 143          */
 144         powerpc_lwsync();
 145
 146         idx = esid2idx(esid, parent->ua_level);
 147         parent->u.ua_child[idx] = child;
 148         setbit(&parent->ua_alloc, idx);
 149
 150         return (retval);
 151 }
 152
 153 /*
 154  * Allocate a new intermediate node to fit between the parent and
 155  * esid.
 156  */
 157 static struct slbtnode*
 158 make_intermediate(uint64_t esid, struct slbtnode *parent)
 159 {
 160         struct slbtnode *child, *inter;
 161         int idx, level;
 162
 163         idx = esid2idx(esid, parent->ua_level);
 164         child = parent->u.ua_child[idx];
 165         KASSERT(esid2base(esid, child->ua_level) != child->ua_base,
 166             ("No need for an intermediate node?"));
 167
 168         /*
 169          * Find the level where the existing child and our new esid
 170          * meet.  It must be lower than parent->ua_level or we would
 171          * have chosen a different index in parent.
 172          */
 173         level = child->ua_level + 1;
 174         while (esid2base(esid, level) !=
 175             esid2base(child->ua_base, level))
 176                 level++;
 177         KASSERT(level < parent->ua_level,
 178             ("Found splitting level %d for %09jx and %09jx, "
 179             "but it's the same as %p's",
 180             level, esid, child->ua_base, parent));
 181
 182         /* unlock and M_WAITOK and loop? */
 183         inter = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
 184         KASSERT(inter != NULL, ("unhandled NULL case"));
 185
 186         /* Set up intermediate node to point to child ... */
 187         inter->ua_level = level;
 188         inter->ua_base = esid2base(esid, inter->ua_level);
 189         idx = esid2idx(child->ua_base, inter->ua_level);
 190         inter->u.ua_child[idx] = child;
 191         setbit(&inter->ua_alloc, idx);
 192         powerpc_lwsync();
 193
 194         /* Set up parent to point to intermediate node ... */
 195         idx = esid2idx(inter->ua_base, parent->ua_level);
 196         parent->u.ua_child[idx] = inter;
 197         setbit(&parent->ua_alloc, idx);
 198
 199         return (inter);
 200 }
 201
 202 uint64_t
 203 kernel_va_to_slbv(vm_offset_t va)
 204 {
 205         uint64_t slbv;
 206
 207         /* Set kernel VSID to deterministic value */
 208         slbv = (KERNEL_VSID((uintptr_t)va >> ADDR_SR_SHFT)) << SLBV_VSID_SHIFT;
 209
 210         /*
 211          * Figure out if this is a large-page mapping.
 212          */
 213         if (hw_direct_map && va > DMAP_BASE_ADDRESS && va < DMAP_MAX_ADDRESS) {
 214                 /*
 215                  * XXX: If we have set up a direct map, assumes
 216                  * all physical memory is mapped with large pages.
 217                  */
 218
 219                 if (mem_valid(DMAP_TO_PHYS(va), 0) == 0)
 220                         slbv |= SLBV_L;
 221         }
 222
 223         return (slbv);
 224 }
 225
 226 struct slb *
 227 user_va_to_slb_entry(pmap_t pm, vm_offset_t va)
 228 {
 229         uint64_t esid = va >> ADDR_SR_SHFT;
 230         struct slbtnode *ua;
 231         int idx;
 232
 233         ua = pm->pm_slb_tree_root;
 234
 235         for (;;) {
 236                 KASSERT(uad_baseok(ua), ("uad base %016jx level %d bad!",
 237                     ua->ua_base, ua->ua_level));
 238                 idx = esid2idx(esid, ua->ua_level);
 239
 240                 /*
 241                  * This code is specific to ppc64 where a load is
 242                  * atomic, so no need for atomic_load macro.
 243                  */
 244                 if (ua->ua_level == UAD_LEAF_LEVEL)
 245                         return ((ua->u.slb_entries[idx].slbe & SLBE_VALID) ?
 246                             &ua->u.slb_entries[idx] : NULL);
 247
 248                 /*
 249                  * The following accesses are implicitly ordered under the POWER
 250                  * ISA by load dependencies (the store ordering is provided by
 251                  * the powerpc_lwsync() calls elsewhere) and so are run without
 252                  * barriers.
 253                  */
 254                 ua = ua->u.ua_child[idx];
 255                 if (ua == NULL ||
 256                     esid2base(esid, ua->ua_level) != ua->ua_base)
 257                         return (NULL);
 258         }
 259
 260         return (NULL);
 261 }
 262
 263 uint64_t
 264 va_to_vsid(pmap_t pm, vm_offset_t va)
 265 {
 266         struct slb *entry;
 267
 268         /* Shortcut kernel case */
 269         if (pm == kernel_pmap)
 270                 return (KERNEL_VSID((uintptr_t)va >> ADDR_SR_SHFT));
 271
 272         /*
 273          * If there is no vsid for this VA, we need to add a new entry
 274          * to the PMAP's segment table.
 275          */
 276
 277         entry = user_va_to_slb_entry(pm, va);
 278
 279         if (entry == NULL)
 280                 return (allocate_user_vsid(pm,
 281                     (uintptr_t)va >> ADDR_SR_SHFT, 0));
 282
 283         return ((entry->slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT);
 284 }
 285
 286 uint64_t
 287 allocate_user_vsid(pmap_t pm, uint64_t esid, int large)
 288 {
 289         uint64_t vsid, slbv;
 290         struct slbtnode *ua, *next, *inter;
 291         struct slb *slb;
 292         int idx;
 293
 294         KASSERT(pm != kernel_pmap, ("Attempting to allocate a kernel VSID"));
 295
 296         PMAP_LOCK_ASSERT(pm, MA_OWNED);
 297         vsid = moea64_get_unique_vsid();
 298
 299         slbv = vsid << SLBV_VSID_SHIFT;
 300         if (large)
 301                 slbv |= SLBV_L;
 302
 303         ua = pm->pm_slb_tree_root;
 304
 305         /* Descend to the correct leaf or NULL pointer. */
 306         for (;;) {
 307                 KASSERT(uad_baseok(ua),
 308                    ("uad base %09jx level %d bad!", ua->ua_base, ua->ua_level));
 309                 idx = esid2idx(esid, ua->ua_level);
 310
 311                 if (ua->ua_level == UAD_LEAF_LEVEL) {
 312                         ua->u.slb_entries[idx].slbv = slbv;
 313                         eieio();
 314                         ua->u.slb_entries[idx].slbe = (esid << SLBE_ESID_SHIFT)
 315                             | SLBE_VALID;
 316                         setbit(&ua->ua_alloc, idx);
 317                         slb = &ua->u.slb_entries[idx];
 318                         break;
 319                 }
 320
 321                 next = ua->u.ua_child[idx];
 322                 if (next == NULL) {
 323                         slb = make_new_leaf(esid, slbv, ua);
 324                         break;
 325                 }
 326
 327                 /*
 328                  * Check if the next item down has an okay ua_base.
 329                  * If not, we need to allocate an intermediate node.
 330                  */
 331                 if (esid2base(esid, next->ua_level) != next->ua_base) {
 332                         inter = make_intermediate(esid, ua);
 333                         slb = make_new_leaf(esid, slbv, inter);
 334                         break;
 335                 }
 336
 337                 ua = next;
 338         }
 339
 340         /*
 341          * Someone probably wants this soon, and it may be a wired
 342          * SLB mapping, so pre-spill this entry.
 343          */
 344         eieio();
 345         slb_insert_user(pm, slb);
 346
 347         return (vsid);
 348 }
 349
 350 void
 351 free_vsid(pmap_t pm, uint64_t esid, int large)
 352 {
 353         struct slbtnode *ua;
 354         int idx;
 355
 356         PMAP_LOCK_ASSERT(pm, MA_OWNED);
 357
 358         ua = pm->pm_slb_tree_root;
 359         /* Descend to the correct leaf. */
 360         for (;;) {
 361                 KASSERT(uad_baseok(ua),
 362                    ("uad base %09jx level %d bad!", ua->ua_base, ua->ua_level));
 363
 364                 idx = esid2idx(esid, ua->ua_level);
 365                 if (ua->ua_level == UAD_LEAF_LEVEL) {
 366                         ua->u.slb_entries[idx].slbv = 0;
 367                         eieio();
 368                         ua->u.slb_entries[idx].slbe = 0;
 369                         clrbit(&ua->ua_alloc, idx);
 370                         return;
 371                 }
 372
 373                 ua = ua->u.ua_child[idx];
 374                 if (ua == NULL ||
 375                     esid2base(esid, ua->ua_level) != ua->ua_base) {
 376                         /* Perhaps just return instead of assert? */
 377                         KASSERT(0,
 378                             ("Asked to remove an entry that was never inserted!"));
 379                         return;
 380                 }
 381         }
 382 }
 383
 384 static void
 385 free_slb_tree_node(struct slbtnode *ua)
 386 {
 387         int idx;
 388
 389         for (idx = 0; idx < 16; idx++) {
 390                 if (ua->ua_level != UAD_LEAF_LEVEL) {
 391                         if (ua->u.ua_child[idx] != NULL)
 392                                 free_slb_tree_node(ua->u.ua_child[idx]);
 393                 } else {
 394                         if (ua->u.slb_entries[idx].slbv != 0)
 395                                 moea64_release_vsid(ua->u.slb_entries[idx].slbv
 396                                     >> SLBV_VSID_SHIFT);
 397                 }
 398         }
 399
 400         uma_zfree(slbt_zone, ua);
 401 }
 402
 403 void
 404 slb_free_tree(pmap_t pm)
 405 {
 406
 407         free_slb_tree_node(pm->pm_slb_tree_root);
 408 }
 409
 410 struct slbtnode *
 411 slb_alloc_tree(void)
 412 {
 413         struct slbtnode *root;
 414
 415         root = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
 416         root->ua_level = UAD_ROOT_LEVEL;
 417
 418         return (root);
 419 }
 420
 421 /* Lock entries mapping kernel text and stacks */
 422
 423 void
 424 slb_insert_kernel(uint64_t slbe, uint64_t slbv)
 425 {
 426         struct slb *slbcache;
 427         int i;
 428
 429         /* We don't want to be preempted while modifying the kernel map */
 430         critical_enter();
 431
 432         slbcache = PCPU_GET(aim.slb);
 433
 434         /* Check for an unused slot, abusing the user slot as a full flag */
 435         if (slbcache[USER_SLB_SLOT].slbe == 0) {
 436                 for (i = 0; i < n_slbs; i++) {
 437                         if (i == USER_SLB_SLOT)
 438                                 continue;
 439                         if (!(slbcache[i].slbe & SLBE_VALID))
 440                                 goto fillkernslb;
 441                 }
 442
 443                 if (i == n_slbs)
 444                         slbcache[USER_SLB_SLOT].slbe = 1;
 445         }
 446
 447         i = mftb() % n_slbs;
 448         if (i == USER_SLB_SLOT)
 449                         i = (i+1) % n_slbs;
 450
 451 fillkernslb:
 452         KASSERT(i != USER_SLB_SLOT,
 453             ("Filling user SLB slot with a kernel mapping"));
 454         slbcache[i].slbv = slbv;
 455         slbcache[i].slbe = slbe | (uint64_t)i;
 456
 457         /* If it is for this CPU, put it in the SLB right away */
 458         if (pmap_bootstrapped) {
 459                 /* slbie not required */
 460                 __asm __volatile ("slbmte %0, %1" ::
 461                     "r"(slbcache[i].slbv), "r"(slbcache[i].slbe));
 462         }
 463
 464         critical_exit();
 465 }
 466
 467 void
 468 slb_insert_user(pmap_t pm, struct slb *slb)
 469 {
 470         int i;
 471
 472         PMAP_LOCK_ASSERT(pm, MA_OWNED);
 473
 474         if (pm->pm_slb_len < n_slbs) {
 475                 i = pm->pm_slb_len;
 476                 pm->pm_slb_len++;
 477         } else {
 478                 i = mftb() % n_slbs;
 479         }
 480
 481         /* Note that this replacement is atomic with respect to trap_subr */
 482         pm->pm_slb[i] = slb;
 483 }
 484
 485 static void *
 486 slb_uma_real_alloc(uma_zone_t zone, vm_size_t bytes, int domain,
 487     u_int8_t *flags, int wait)
 488 {
 489         static vm_offset_t realmax = 0;
 490         void *va;
 491         vm_page_t m;
 492
 493         if (realmax == 0)
 494                 realmax = platform_real_maxaddr();
 495
 496         *flags = UMA_SLAB_PRIV;
 497         m = vm_page_alloc_contig_domain(NULL, 0, domain,
 498             malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED,
 499             1, 0, realmax, PAGE_SIZE, PAGE_SIZE, VM_MEMATTR_DEFAULT);
 500         if (m == NULL)
 501                 return (NULL);
 502
 503         va = (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 504
 505         if (!hw_direct_map)
 506                 pmap_kenter((vm_offset_t)va, VM_PAGE_TO_PHYS(m));
 507
 508         if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
 509                 bzero(va, PAGE_SIZE);
 510
 511         return (va);
 512 }
 513
 514 static void
 515 slb_zone_init(void *dummy)
 516 {
 517
 518         slbt_zone = uma_zcreate("SLB tree node", sizeof(struct slbtnode),
 519             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
 520         slb_cache_zone = uma_zcreate("SLB cache",
 521             (n_slbs + 1)*sizeof(struct slb *), NULL, NULL, NULL, NULL,
 522             UMA_ALIGN_PTR, UMA_ZONE_VM);
 523
 524         if (platform_real_maxaddr() != VM_MAX_ADDRESS) {
 525                 uma_zone_set_allocf(slb_cache_zone, slb_uma_real_alloc);
 526                 uma_zone_set_allocf(slbt_zone, slb_uma_real_alloc);
 527         }
 528 }
 529
 530 struct slb **
 531 slb_alloc_user_cache(void)
 532 {
 533         return (uma_zalloc(slb_cache_zone, M_ZERO));
 534 }
 535
 536 void
 537 slb_free_user_cache(struct slb **slb)
 538 {
 539         uma_zfree(slb_cache_zone, slb);
 540 }