sys/vm/uma_core.c

   1 /*-
   2  * Copyright (c) 2004, 2005,
   3  *     Bosko Milekic <bmilekic@FreeBSD.org>.  All rights reserved.
   4  * Copyright (c) 2002, 2003, 2004, 2005,
   5  *     Jeffrey Roberson <jeff@FreeBSD.org>.  All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice unmodified, this list of conditions, and the following
  12  *    disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28
  29 /*
  30  * uma_core.c  Implementation of the Universal Memory allocator
  31  *
  32  * This allocator is intended to replace the multitude of similar object caches
  33  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
  34  * effecient.  A primary design goal is to return unused memory to the rest of
  35  * the system.  This will make the system as a whole more flexible due to the
  36  * ability to move memory to subsystems which most need it instead of leaving
  37  * pools of reserved memory unused.
  38  *
  39  * The basic ideas stem from similar slab/zone based allocators whose algorithms
  40  * are well known.
  41  *
  42  */
  43
  44 /*
  45  * TODO:
  46  *      - Improve memory usage for large allocations
  47  *      - Investigate cache size adjustments
  48  */
  49
  50 #include <sys/cdefs.h>
  51 __FBSDID("$FreeBSD$");
  52
  53 /* I should really use ktr.. */
  54 /*
  55 #define UMA_DEBUG 1
  56 #define UMA_DEBUG_ALLOC 1
  57 #define UMA_DEBUG_ALLOC_1 1
  58 */
  59
  60 #include "opt_param.h"
  61 #include <sys/param.h>
  62 #include <sys/systm.h>
  63 #include <sys/kernel.h>
  64 #include <sys/types.h>
  65 #include <sys/queue.h>
  66 #include <sys/malloc.h>
  67 #include <sys/ktr.h>
  68 #include <sys/lock.h>
  69 #include <sys/sysctl.h>
  70 #include <sys/mutex.h>
  71 #include <sys/proc.h>
  72 #include <sys/smp.h>
  73 #include <sys/vmmeter.h>
  74
  75 #include <vm/vm.h>
  76 #include <vm/vm_object.h>
  77 #include <vm/vm_page.h>
  78 #include <vm/vm_param.h>
  79 #include <vm/vm_map.h>
  80 #include <vm/vm_kern.h>
  81 #include <vm/vm_extern.h>
  82 #include <vm/uma.h>
  83 #include <vm/uma_int.h>
  84 #include <vm/uma_dbg.h>
  85
  86 #include <machine/vmparam.h>
  87
  88 /*
  89  * This is the zone and keg from which all zones are spawned.  The idea is that
  90  * even the zone & keg heads are allocated from the allocator, so we use the
  91  * bss section to bootstrap us.
  92  */
  93 static struct uma_keg masterkeg;
  94 static struct uma_zone masterzone_k;
  95 static struct uma_zone masterzone_z;
  96 static uma_zone_t kegs = &masterzone_k;
  97 static uma_zone_t zones = &masterzone_z;
  98
  99 /* This is the zone from which all of uma_slab_t's are allocated. */
 100 static uma_zone_t slabzone;
 101 static uma_zone_t slabrefzone;  /* With refcounters (for UMA_ZONE_REFCNT) */
 102
 103 /*
 104  * The initial hash tables come out of this zone so they can be allocated
 105  * prior to malloc coming up.
 106  */
 107 static uma_zone_t hashzone;
 108
 109 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
 110
 111 /*
 112  * Are we allowed to allocate buckets?
 113  */
 114 static int bucketdisable = 1;
 115
 116 /* Linked list of all kegs in the system */
 117 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
 118
 119 /* This mutex protects the keg list */
 120 static struct mtx uma_mtx;
 121
 122 /* These are the pcpu cache locks */
 123 static struct mtx uma_pcpu_mtx[MAXCPU];
 124
 125 /* Linked list of boot time pages */
 126 static LIST_HEAD(,uma_slab) uma_boot_pages =
 127     LIST_HEAD_INITIALIZER(&uma_boot_pages);
 128
 129 /* Count of free boottime pages */
 130 static int uma_boot_free = 0;
 131
 132 /* Is the VM done starting up? */
 133 static int booted = 0;
 134
 135 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
 136 static u_int uma_max_ipers;
 137 static u_int uma_max_ipers_ref;
 138
 139 /*
 140  * This is the handle used to schedule events that need to happen
 141  * outside of the allocation fast path.
 142  */
 143 static struct callout uma_callout;
 144 #define UMA_TIMEOUT     20              /* Seconds for callout interval. */
 145
 146 /*
 147  * This structure is passed as the zone ctor arg so that I don't have to create
 148  * a special allocation function just for zones.
 149  */
 150 struct uma_zctor_args {
 151         char *name;
 152         size_t size;
 153         uma_ctor ctor;
 154         uma_dtor dtor;
 155         uma_init uminit;
 156         uma_fini fini;
 157         uma_keg_t keg;
 158         int align;
 159         u_int16_t flags;
 160 };
 161
 162 struct uma_kctor_args {
 163         uma_zone_t zone;
 164         size_t size;
 165         uma_init uminit;
 166         uma_fini fini;
 167         int align;
 168         u_int16_t flags;
 169 };
 170
 171 struct uma_bucket_zone {
 172         uma_zone_t      ubz_zone;
 173         char            *ubz_name;
 174         int             ubz_entries;
 175 };
 176
 177 #define BUCKET_MAX      128
 178
 179 struct uma_bucket_zone bucket_zones[] = {
 180         { NULL, "16 Bucket", 16 },
 181         { NULL, "32 Bucket", 32 },
 182         { NULL, "64 Bucket", 64 },
 183         { NULL, "128 Bucket", 128 },
 184         { NULL, NULL, 0}
 185 };
 186
 187 #define BUCKET_SHIFT    4
 188 #define BUCKET_ZONES    ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
 189
 190 /*
 191  * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
 192  * of approximately the right size.
 193  */
 194 static uint8_t bucket_size[BUCKET_ZONES];
 195
 196 enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
 197
 198 /* Prototypes.. */
 199
 200 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
 201 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
 202 static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
 203 static void page_free(void *, int, u_int8_t);
 204 static uma_slab_t slab_zalloc(uma_zone_t, int);
 205 static void cache_drain(uma_zone_t);
 206 static void bucket_drain(uma_zone_t, uma_bucket_t);
 207 static void bucket_cache_drain(uma_zone_t zone);
 208 static int keg_ctor(void *, int, void *, int);
 209 static void keg_dtor(void *, int, void *);
 210 static int zone_ctor(void *, int, void *, int);
 211 static void zone_dtor(void *, int, void *);
 212 static int zero_init(void *, int, int);
 213 static void zone_small_init(uma_zone_t zone);
 214 static void zone_large_init(uma_zone_t zone);
 215 static void zone_foreach(void (*zfunc)(uma_zone_t));
 216 static void zone_timeout(uma_zone_t zone);
 217 static int hash_alloc(struct uma_hash *);
 218 static int hash_expand(struct uma_hash *, struct uma_hash *);
 219 static void hash_free(struct uma_hash *hash);
 220 static void uma_timeout(void *);
 221 static void uma_startup3(void);
 222 static void *uma_zalloc_internal(uma_zone_t, void *, int);
 223 static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip);
 224 static void bucket_enable(void);
 225 static void bucket_init(void);
 226 static uma_bucket_t bucket_alloc(int, int);
 227 static void bucket_free(uma_bucket_t);
 228 static void bucket_zone_drain(void);
 229 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
 230 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
 231 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
 232 static void zone_drain(uma_zone_t);
 233 static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
 234     uma_fini fini, int align, u_int16_t flags);
 235
 236 void uma_print_zone(uma_zone_t);
 237 void uma_print_stats(void);
 238 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
 239
 240 #ifdef WITNESS
 241 static int nosleepwithlocks = 1;
 242 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
 243     0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
 244 #else
 245 static int nosleepwithlocks = 0;
 246 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
 247     0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
 248 #endif
 249 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
 250     NULL, 0, sysctl_vm_zone, "A", "Zone Info");
 251 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
 252
 253 /*
 254  * This routine checks to see whether or not it's safe to enable buckets.
 255  */
 256
 257 static void
 258 bucket_enable(void)
 259 {
 260         if (cnt.v_free_count < cnt.v_free_min)
 261                 bucketdisable = 1;
 262         else
 263                 bucketdisable = 0;
 264 }
 265
 266 /*
 267  * Initialize bucket_zones, the array of zones of buckets of various sizes.
 268  *
 269  * For each zone, calculate the memory required for each bucket, consisting
 270  * of the header and an array of pointers.  Initialize bucket_size[] to point
 271  * the range of appropriate bucket sizes at the zone.
 272  */
 273 static void
 274 bucket_init(void)
 275 {
 276         struct uma_bucket_zone *ubz;
 277         int i;
 278         int j;
 279
 280         for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
 281                 int size;
 282
 283                 ubz = &bucket_zones[j];
 284                 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
 285                 size += sizeof(void *) * ubz->ubz_entries;
 286                 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
 287                     NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 288                 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
 289                         bucket_size[i >> BUCKET_SHIFT] = j;
 290         }
 291 }
 292
 293 /*
 294  * Given a desired number of entries for a bucket, return the zone from which
 295  * to allocate the bucket.
 296  */
 297 static struct uma_bucket_zone *
 298 bucket_zone_lookup(int entries)
 299 {
 300         int idx;
 301
 302         idx = howmany(entries, 1 << BUCKET_SHIFT);
 303         return (&bucket_zones[bucket_size[idx]]);
 304 }
 305
 306 static uma_bucket_t
 307 bucket_alloc(int entries, int bflags)
 308 {
 309         struct uma_bucket_zone *ubz;
 310         uma_bucket_t bucket;
 311
 312         /*
 313          * This is to stop us from allocating per cpu buckets while we're
 314          * running out of UMA_BOOT_PAGES.  Otherwise, we would exhaust the
 315          * boot pages.  This also prevents us from allocating buckets in
 316          * low memory situations.
 317          */
 318         if (bucketdisable)
 319                 return (NULL);
 320
 321         ubz = bucket_zone_lookup(entries);
 322         bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
 323         if (bucket) {
 324 #ifdef INVARIANTS
 325                 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
 326 #endif
 327                 bucket->ub_cnt = 0;
 328                 bucket->ub_entries = ubz->ubz_entries;
 329         }
 330
 331         return (bucket);
 332 }
 333
 334 static void
 335 bucket_free(uma_bucket_t bucket)
 336 {
 337         struct uma_bucket_zone *ubz;
 338
 339         ubz = bucket_zone_lookup(bucket->ub_entries);
 340         uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE);
 341 }
 342
 343 static void
 344 bucket_zone_drain(void)
 345 {
 346         struct uma_bucket_zone *ubz;
 347
 348         for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
 349                 zone_drain(ubz->ubz_zone);
 350 }
 351
 352
 353 /*
 354  * Routine called by timeout which is used to fire off some time interval
 355  * based calculations.  (stats, hash size, etc.)
 356  *
 357  * Arguments:
 358  *      arg   Unused
 359  *
 360  * Returns:
 361  *      Nothing
 362  */
 363 static void
 364 uma_timeout(void *unused)
 365 {
 366         bucket_enable();
 367         zone_foreach(zone_timeout);
 368
 369         /* Reschedule this event */
 370         callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 371 }
 372
 373 /*
 374  * Routine to perform timeout driven calculations.  This expands the
 375  * hashes and does per cpu statistics aggregation.
 376  *
 377  *  Arguments:
 378  *      zone  The zone to operate on
 379  *
 380  *  Returns:
 381  *      Nothing
 382  */
 383 static void
 384 zone_timeout(uma_zone_t zone)
 385 {
 386         uma_keg_t keg;
 387         uma_cache_t cache;
 388         u_int64_t alloc;
 389         int cpu;
 390
 391         keg = zone->uz_keg;
 392         alloc = 0;
 393
 394         /*
 395          * Aggregate per cpu cache statistics back to the zone.
 396          *
 397          * XXX This should be done in the sysctl handler.
 398          *
 399          * I may rewrite this to set a flag in the per cpu cache instead of
 400          * locking.  If the flag is not cleared on the next round I will have
 401          * to lock and do it here instead so that the statistics don't get too
 402          * far out of sync.
 403          */
 404         if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) {
 405                 for (cpu = 0; cpu <= mp_maxid; cpu++) {
 406                         if (CPU_ABSENT(cpu))
 407                                 continue;
 408                         CPU_LOCK(cpu);
 409                         cache = &zone->uz_cpu[cpu];
 410                         /* Add them up, and reset */
 411                         alloc += cache->uc_allocs;
 412                         cache->uc_allocs = 0;
 413                         CPU_UNLOCK(cpu);
 414                 }
 415         }
 416
 417         /* Now push these stats back into the zone.. */
 418         ZONE_LOCK(zone);
 419         zone->uz_allocs += alloc;
 420
 421         /*
 422          * Expand the zone hash table.
 423          *
 424          * This is done if the number of slabs is larger than the hash size.
 425          * What I'm trying to do here is completely reduce collisions.  This
 426          * may be a little aggressive.  Should I allow for two collisions max?
 427          */
 428
 429         if (keg->uk_flags & UMA_ZONE_HASH &&
 430             keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
 431                 struct uma_hash newhash;
 432                 struct uma_hash oldhash;
 433                 int ret;
 434
 435                 /*
 436                  * This is so involved because allocating and freeing
 437                  * while the zone lock is held will lead to deadlock.
 438                  * I have to do everything in stages and check for
 439                  * races.
 440                  */
 441                 newhash = keg->uk_hash;
 442                 ZONE_UNLOCK(zone);
 443                 ret = hash_alloc(&newhash);
 444                 ZONE_LOCK(zone);
 445                 if (ret) {
 446                         if (hash_expand(&keg->uk_hash, &newhash)) {
 447                                 oldhash = keg->uk_hash;
 448                                 keg->uk_hash = newhash;
 449                         } else
 450                                 oldhash = newhash;
 451
 452                         ZONE_UNLOCK(zone);
 453                         hash_free(&oldhash);
 454                         ZONE_LOCK(zone);
 455                 }
 456         }
 457         ZONE_UNLOCK(zone);
 458 }
 459
 460 /*
 461  * Allocate and zero fill the next sized hash table from the appropriate
 462  * backing store.
 463  *
 464  * Arguments:
 465  *      hash  A new hash structure with the old hash size in uh_hashsize
 466  *
 467  * Returns:
 468  *      1 on sucess and 0 on failure.
 469  */
 470 static int
 471 hash_alloc(struct uma_hash *hash)
 472 {
 473         int oldsize;
 474         int alloc;
 475
 476         oldsize = hash->uh_hashsize;
 477
 478         /* We're just going to go to a power of two greater */
 479         if (oldsize)  {
 480                 hash->uh_hashsize = oldsize * 2;
 481                 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
 482                 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
 483                     M_UMAHASH, M_NOWAIT);
 484         } else {
 485                 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
 486                 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
 487                     M_WAITOK);
 488                 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
 489         }
 490         if (hash->uh_slab_hash) {
 491                 bzero(hash->uh_slab_hash, alloc);
 492                 hash->uh_hashmask = hash->uh_hashsize - 1;
 493                 return (1);
 494         }
 495
 496         return (0);
 497 }
 498
 499 /*
 500  * Expands the hash table for HASH zones.  This is done from zone_timeout
 501  * to reduce collisions.  This must not be done in the regular allocation
 502  * path, otherwise, we can recurse on the vm while allocating pages.
 503  *
 504  * Arguments:
 505  *      oldhash  The hash you want to expand
 506  *      newhash  The hash structure for the new table
 507  *
 508  * Returns:
 509  *      Nothing
 510  *
 511  * Discussion:
 512  */
 513 static int
 514 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
 515 {
 516         uma_slab_t slab;
 517         int hval;
 518         int i;
 519
 520         if (!newhash->uh_slab_hash)
 521                 return (0);
 522
 523         if (oldhash->uh_hashsize >= newhash->uh_hashsize)
 524                 return (0);
 525
 526         /*
 527          * I need to investigate hash algorithms for resizing without a
 528          * full rehash.
 529          */
 530
 531         for (i = 0; i < oldhash->uh_hashsize; i++)
 532                 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
 533                         slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
 534                         SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
 535                         hval = UMA_HASH(newhash, slab->us_data);
 536                         SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
 537                             slab, us_hlink);
 538                 }
 539
 540         return (1);
 541 }
 542
 543 /*
 544  * Free the hash bucket to the appropriate backing store.
 545  *
 546  * Arguments:
 547  *      slab_hash  The hash bucket we're freeing
 548  *      hashsize   The number of entries in that hash bucket
 549  *
 550  * Returns:
 551  *      Nothing
 552  */
 553 static void
 554 hash_free(struct uma_hash *hash)
 555 {
 556         if (hash->uh_slab_hash == NULL)
 557                 return;
 558         if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
 559                 uma_zfree_internal(hashzone,
 560                     hash->uh_slab_hash, NULL, SKIP_NONE);
 561         else
 562                 free(hash->uh_slab_hash, M_UMAHASH);
 563 }
 564
 565 /*
 566  * Frees all outstanding items in a bucket
 567  *
 568  * Arguments:
 569  *      zone   The zone to free to, must be unlocked.
 570  *      bucket The free/alloc bucket with items, cpu queue must be locked.
 571  *
 572  * Returns:
 573  *      Nothing
 574  */
 575
 576 static void
 577 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
 578 {
 579         uma_slab_t slab;
 580         int mzone;
 581         void *item;
 582
 583         if (bucket == NULL)
 584                 return;
 585
 586         slab = NULL;
 587         mzone = 0;
 588
 589         /* We have to lookup the slab again for malloc.. */
 590         if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
 591                 mzone = 1;
 592
 593         while (bucket->ub_cnt > 0)  {
 594                 bucket->ub_cnt--;
 595                 item = bucket->ub_bucket[bucket->ub_cnt];
 596 #ifdef INVARIANTS
 597                 bucket->ub_bucket[bucket->ub_cnt] = NULL;
 598                 KASSERT(item != NULL,
 599                     ("bucket_drain: botched ptr, item is NULL"));
 600 #endif
 601                 /*
 602                  * This is extremely inefficient.  The slab pointer was passed
 603                  * to uma_zfree_arg, but we lost it because the buckets don't
 604                  * hold them.  This will go away when free() gets a size passed
 605                  * to it.
 606                  */
 607                 if (mzone)
 608                         slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
 609                 uma_zfree_internal(zone, item, slab, SKIP_DTOR);
 610         }
 611 }
 612
 613 /*
 614  * Drains the per cpu caches for a zone.
 615  *
 616  * Arguments:
 617  *      zone     The zone to drain, must be unlocked.
 618  *
 619  * Returns:
 620  *      Nothing
 621  */
 622 static void
 623 cache_drain(uma_zone_t zone)
 624 {
 625         uma_cache_t cache;
 626         int cpu;
 627
 628         /*
 629          * We have to lock each cpu cache before locking the zone
 630          */
 631         for (cpu = 0; cpu <= mp_maxid; cpu++) {
 632                 if (CPU_ABSENT(cpu))
 633                         continue;
 634                 CPU_LOCK(cpu);
 635                 cache = &zone->uz_cpu[cpu];
 636                 bucket_drain(zone, cache->uc_allocbucket);
 637                 bucket_drain(zone, cache->uc_freebucket);
 638                 if (cache->uc_allocbucket != NULL)
 639                         bucket_free(cache->uc_allocbucket);
 640                 if (cache->uc_freebucket != NULL)
 641                         bucket_free(cache->uc_freebucket);
 642                 cache->uc_allocbucket = cache->uc_freebucket = NULL;
 643         }
 644         ZONE_LOCK(zone);
 645         bucket_cache_drain(zone);
 646         ZONE_UNLOCK(zone);
 647         for (cpu = 0; cpu <= mp_maxid; cpu++) {
 648                 if (CPU_ABSENT(cpu))
 649                         continue;
 650                 CPU_UNLOCK(cpu);
 651         }
 652 }
 653
 654 /*
 655  * Drain the cached buckets from a zone.  Expects a locked zone on entry.
 656  */
 657 static void
 658 bucket_cache_drain(uma_zone_t zone)
 659 {
 660         uma_bucket_t bucket;
 661
 662         /*
 663          * Drain the bucket queues and free the buckets, we just keep two per
 664          * cpu (alloc/free).
 665          */
 666         while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
 667                 LIST_REMOVE(bucket, ub_link);
 668                 ZONE_UNLOCK(zone);
 669                 bucket_drain(zone, bucket);
 670                 bucket_free(bucket);
 671                 ZONE_LOCK(zone);
 672         }
 673
 674         /* Now we do the free queue.. */
 675         while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
 676                 LIST_REMOVE(bucket, ub_link);
 677                 bucket_free(bucket);
 678         }
 679 }
 680
 681 /*
 682  * Frees pages from a zone back to the system.  This is done on demand from
 683  * the pageout daemon.
 684  *
 685  * Arguments:
 686  *      zone  The zone to free pages from
 687  *       all  Should we drain all items?
 688  *
 689  * Returns:
 690  *      Nothing.
 691  */
 692 static void
 693 zone_drain(uma_zone_t zone)
 694 {
 695         struct slabhead freeslabs = { 0 };
 696         uma_keg_t keg;
 697         uma_slab_t slab;
 698         uma_slab_t n;
 699         u_int8_t flags;
 700         u_int8_t *mem;
 701         int i;
 702
 703         keg = zone->uz_keg;
 704
 705         /*
 706          * We don't want to take pages from statically allocated zones at this
 707          * time
 708          */
 709         if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
 710                 return;
 711
 712         ZONE_LOCK(zone);
 713
 714 #ifdef UMA_DEBUG
 715         printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
 716 #endif
 717         bucket_cache_drain(zone);
 718         if (keg->uk_free == 0)
 719                 goto finished;
 720
 721         slab = LIST_FIRST(&keg->uk_free_slab);
 722         while (slab) {
 723                 n = LIST_NEXT(slab, us_link);
 724
 725                 /* We have no where to free these to */
 726                 if (slab->us_flags & UMA_SLAB_BOOT) {
 727                         slab = n;
 728                         continue;
 729                 }
 730
 731                 LIST_REMOVE(slab, us_link);
 732                 keg->uk_pages -= keg->uk_ppera;
 733                 keg->uk_free -= keg->uk_ipers;
 734
 735                 if (keg->uk_flags & UMA_ZONE_HASH)
 736                         UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
 737
 738                 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
 739
 740                 slab = n;
 741         }
 742 finished:
 743         ZONE_UNLOCK(zone);
 744
 745         while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
 746                 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
 747                 if (keg->uk_fini)
 748                         for (i = 0; i < keg->uk_ipers; i++)
 749                                 keg->uk_fini(
 750                                     slab->us_data + (keg->uk_rsize * i),
 751                                     keg->uk_size);
 752                 flags = slab->us_flags;
 753                 mem = slab->us_data;
 754
 755                 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
 756                     (keg->uk_flags & UMA_ZONE_REFCNT)) {
 757                         for (i = 0; i < keg->uk_ppera; i++)
 758                                 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
 759                                     kmem_object);
 760                 }
 761                 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 762                         uma_zfree_internal(keg->uk_slabzone, slab, NULL,
 763                             SKIP_NONE);
 764 #ifdef UMA_DEBUG
 765                 printf("%s: Returning %d bytes.\n",
 766                     zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
 767 #endif
 768                 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
 769         }
 770 }
 771
 772 /*
 773  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
 774  *
 775  * Arguments:
 776  *      zone  The zone to allocate slabs for
 777  *      wait  Shall we wait?
 778  *
 779  * Returns:
 780  *      The slab that was allocated or NULL if there is no memory and the
 781  *      caller specified M_NOWAIT.
 782  */
 783 static uma_slab_t
 784 slab_zalloc(uma_zone_t zone, int wait)
 785 {
 786         uma_slabrefcnt_t slabref;
 787         uma_slab_t slab;
 788         uma_keg_t keg;
 789         u_int8_t *mem;
 790         u_int8_t flags;
 791         int i;
 792
 793         slab = NULL;
 794         keg = zone->uz_keg;
 795
 796 #ifdef UMA_DEBUG
 797         printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
 798 #endif
 799         ZONE_UNLOCK(zone);
 800
 801         if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
 802                 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
 803                 if (slab == NULL) {
 804                         ZONE_LOCK(zone);
 805                         return NULL;
 806                 }
 807         }
 808
 809         /*
 810          * This reproduces the old vm_zone behavior of zero filling pages the
 811          * first time they are added to a zone.
 812          *
 813          * Malloced items are zeroed in uma_zalloc.
 814          */
 815
 816         if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
 817                 wait |= M_ZERO;
 818         else
 819                 wait &= ~M_ZERO;
 820
 821         mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
 822             &flags, wait);
 823         if (mem == NULL) {
 824                 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 825                         uma_zfree_internal(keg->uk_slabzone, slab, NULL, 0);
 826                 ZONE_LOCK(zone);
 827                 return (NULL);
 828         }
 829
 830         /* Point the slab into the allocated memory */
 831         if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
 832                 slab = (uma_slab_t )(mem + keg->uk_pgoff);
 833
 834         if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
 835             (keg->uk_flags & UMA_ZONE_REFCNT))
 836                 for (i = 0; i < keg->uk_ppera; i++)
 837                         vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
 838
 839         slab->us_keg = keg;
 840         slab->us_data = mem;
 841         slab->us_freecount = keg->uk_ipers;
 842         slab->us_firstfree = 0;
 843         slab->us_flags = flags;
 844
 845         if (keg->uk_flags & UMA_ZONE_REFCNT) {
 846                 slabref = (uma_slabrefcnt_t)slab;
 847                 for (i = 0; i < keg->uk_ipers; i++) {
 848                         slabref->us_freelist[i].us_refcnt = 0;
 849                         slabref->us_freelist[i].us_item = i+1;
 850                 }
 851         } else {
 852                 for (i = 0; i < keg->uk_ipers; i++)
 853                         slab->us_freelist[i].us_item = i+1;
 854         }
 855
 856         if (keg->uk_init != NULL) {
 857                 for (i = 0; i < keg->uk_ipers; i++)
 858                         if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
 859                             keg->uk_size, wait) != 0)
 860                                 break;
 861                 if (i != keg->uk_ipers) {
 862                         if (keg->uk_fini != NULL) {
 863                                 for (i--; i > -1; i--)
 864                                         keg->uk_fini(slab->us_data +
 865                                             (keg->uk_rsize * i),
 866                                             keg->uk_size);
 867                         }
 868                         if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
 869                             (keg->uk_flags & UMA_ZONE_REFCNT))
 870                                 for (i = 0; i < keg->uk_ppera; i++)
 871                                         vsetobj((vm_offset_t)mem +
 872                                             (i * PAGE_SIZE), kmem_object);
 873                         if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 874                                 uma_zfree_internal(keg->uk_slabzone, slab,
 875                                     NULL, SKIP_NONE);
 876                         keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
 877                             flags);
 878                         ZONE_LOCK(zone);
 879                         return (NULL);
 880                 }
 881         }
 882         ZONE_LOCK(zone);
 883
 884         if (keg->uk_flags & UMA_ZONE_HASH)
 885                 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
 886
 887         keg->uk_pages += keg->uk_ppera;
 888         keg->uk_free += keg->uk_ipers;
 889
 890         return (slab);
 891 }
 892
 893 /*
 894  * This function is intended to be used early on in place of page_alloc() so
 895  * that we may use the boot time page cache to satisfy allocations before
 896  * the VM is ready.
 897  */
 898 static void *
 899 startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
 900 {
 901         uma_keg_t keg;
 902
 903         keg = zone->uz_keg;
 904
 905         /*
 906          * Check our small startup cache to see if it has pages remaining.
 907          */
 908         mtx_lock(&uma_mtx);
 909         if (uma_boot_free != 0) {
 910                 uma_slab_t tmps;
 911
 912                 tmps = LIST_FIRST(&uma_boot_pages);
 913                 LIST_REMOVE(tmps, us_link);
 914                 uma_boot_free--;
 915                 mtx_unlock(&uma_mtx);
 916                 *pflag = tmps->us_flags;
 917                 return (tmps->us_data);
 918         }
 919         mtx_unlock(&uma_mtx);
 920         if (booted == 0)
 921                 panic("UMA: Increase UMA_BOOT_PAGES");
 922         /*
 923          * Now that we've booted reset these users to their real allocator.
 924          */
 925 #ifdef UMA_MD_SMALL_ALLOC
 926         keg->uk_allocf = uma_small_alloc;
 927 #else
 928         keg->uk_allocf = page_alloc;
 929 #endif
 930         return keg->uk_allocf(zone, bytes, pflag, wait);
 931 }
 932
 933 /*
 934  * Allocates a number of pages from the system
 935  *
 936  * Arguments:
 937  *      zone  Unused
 938  *      bytes  The number of bytes requested
 939  *      wait  Shall we wait?
 940  *
 941  * Returns:
 942  *      A pointer to the alloced memory or possibly
 943  *      NULL if M_NOWAIT is set.
 944  */
 945 static void *
 946 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
 947 {
 948         void *p;        /* Returned page */
 949
 950         *pflag = UMA_SLAB_KMEM;
 951         p = (void *) kmem_malloc(kmem_map, bytes, wait);
 952
 953         return (p);
 954 }
 955
 956 /*
 957  * Allocates a number of pages from within an object
 958  *
 959  * Arguments:
 960  *      zone   Unused
 961  *      bytes  The number of bytes requested
 962  *      wait   Shall we wait?
 963  *
 964  * Returns:
 965  *      A pointer to the alloced memory or possibly
 966  *      NULL if M_NOWAIT is set.
 967  */
 968 static void *
 969 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
 970 {
 971         vm_object_t object;
 972         vm_offset_t retkva, zkva;
 973         vm_page_t p;
 974         int pages, startpages;
 975
 976         object = zone->uz_keg->uk_obj;
 977         retkva = 0;
 978
 979         /*
 980          * This looks a little weird since we're getting one page at a time.
 981          */
 982         VM_OBJECT_LOCK(object);
 983         p = TAILQ_LAST(&object->memq, pglist);
 984         pages = p != NULL ? p->pindex + 1 : 0;
 985         startpages = pages;
 986         zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
 987         for (; bytes > 0; bytes -= PAGE_SIZE) {
 988                 p = vm_page_alloc(object, pages,
 989                     VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
 990                 if (p == NULL) {
 991                         if (pages != startpages)
 992                                 pmap_qremove(retkva, pages - startpages);
 993                         while (pages != startpages) {
 994                                 pages--;
 995                                 p = TAILQ_LAST(&object->memq, pglist);
 996                                 vm_page_lock_queues();
 997                                 vm_page_unwire(p, 0);
 998                                 vm_page_free(p);
 999                                 vm_page_unlock_queues();
1000                         }
1001                         retkva = 0;
1002                         goto done;
1003                 }
1004                 pmap_qenter(zkva, &p, 1);
1005                 if (retkva == 0)
1006                         retkva = zkva;
1007                 zkva += PAGE_SIZE;
1008                 pages += 1;
1009         }
1010 done:
1011         VM_OBJECT_UNLOCK(object);
1012         *flags = UMA_SLAB_PRIV;
1013
1014         return ((void *)retkva);
1015 }
1016
1017 /*
1018  * Frees a number of pages to the system
1019  *
1020  * Arguments:
1021  *      mem   A pointer to the memory to be freed
1022  *      size  The size of the memory being freed
1023  *      flags The original p->us_flags field
1024  *
1025  * Returns:
1026  *      Nothing
1027  */
1028 static void
1029 page_free(void *mem, int size, u_int8_t flags)
1030 {
1031         vm_map_t map;
1032
1033         if (flags & UMA_SLAB_KMEM)
1034                 map = kmem_map;
1035         else
1036                 panic("UMA: page_free used with invalid flags %d\n", flags);
1037
1038         kmem_free(map, (vm_offset_t)mem, size);
1039 }
1040
1041 /*
1042  * Zero fill initializer
1043  *
1044  * Arguments/Returns follow uma_init specifications
1045  */
1046 static int
1047 zero_init(void *mem, int size, int flags)
1048 {
1049         bzero(mem, size);
1050         return (0);
1051 }
1052
1053 /*
1054  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
1055  *
1056  * Arguments
1057  *      zone  The zone we should initialize
1058  *
1059  * Returns
1060  *      Nothing
1061  */
1062 static void
1063 zone_small_init(uma_zone_t zone)
1064 {
1065         uma_keg_t keg;
1066         u_int rsize;
1067         u_int memused;
1068         u_int wastedspace;
1069         u_int shsize;
1070
1071         keg = zone->uz_keg;
1072         KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
1073         rsize = keg->uk_size;
1074
1075         if (rsize < UMA_SMALLEST_UNIT)
1076                 rsize = UMA_SMALLEST_UNIT;
1077         if (rsize & keg->uk_align)
1078                 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1079
1080         keg->uk_rsize = rsize;
1081         keg->uk_ppera = 1;
1082
1083         if (keg->uk_flags & UMA_ZONE_REFCNT) {
1084                 rsize += UMA_FRITMREF_SZ;       /* linkage & refcnt */
1085                 shsize = sizeof(struct uma_slab_refcnt);
1086         } else {
1087                 rsize += UMA_FRITM_SZ;  /* Account for linkage */
1088                 shsize = sizeof(struct uma_slab);
1089         }
1090
1091         keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1092         KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
1093         memused = keg->uk_ipers * rsize + shsize;
1094         wastedspace = UMA_SLAB_SIZE - memused;
1095
1096         /*
1097          * We can't do OFFPAGE if we're internal or if we've been
1098          * asked to not go to the VM for buckets.  If we do this we
1099          * may end up going to the VM (kmem_map) for slabs which we
1100          * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1101          * result of UMA_ZONE_VM, which clearly forbids it.
1102          */
1103         if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1104             (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1105                 return;
1106
1107         if ((wastedspace >= UMA_MAX_WASTE) &&
1108             (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1109                 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1110                 KASSERT(keg->uk_ipers <= 255,
1111                     ("zone_small_init: keg->uk_ipers too high!"));
1112 #ifdef UMA_DEBUG
1113                 printf("UMA decided we need offpage slab headers for "
1114                     "zone: %s, calculated wastedspace = %d, "
1115                     "maximum wasted space allowed = %d, "
1116                     "calculated ipers = %d, "
1117                     "new wasted space = %d\n", zone->uz_name, wastedspace,
1118                     UMA_MAX_WASTE, keg->uk_ipers,
1119                     UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1120 #endif
1121                 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1122                 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1123                         keg->uk_flags |= UMA_ZONE_HASH;
1124         }
1125 }
1126
1127 /*
1128  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do
1129  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
1130  * more complicated.
1131  *
1132  * Arguments
1133  *      zone  The zone we should initialize
1134  *
1135  * Returns
1136  *      Nothing
1137  */
1138 static void
1139 zone_large_init(uma_zone_t zone)
1140 {
1141         uma_keg_t keg;
1142         int pages;
1143
1144         keg = zone->uz_keg;
1145
1146         KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
1147         KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1148             ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
1149
1150         pages = keg->uk_size / UMA_SLAB_SIZE;
1151
1152         /* Account for remainder */
1153         if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1154                 pages++;
1155
1156         keg->uk_ppera = pages;
1157         keg->uk_ipers = 1;
1158
1159         keg->uk_flags |= UMA_ZONE_OFFPAGE;
1160         if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1161                 keg->uk_flags |= UMA_ZONE_HASH;
1162
1163         keg->uk_rsize = keg->uk_size;
1164 }
1165
1166 /*
1167  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1168  * the keg onto the global keg list.
1169  *
1170  * Arguments/Returns follow uma_ctor specifications
1171  *      udata  Actually uma_kctor_args
1172  */
1173 static int
1174 keg_ctor(void *mem, int size, void *udata, int flags)
1175 {
1176         struct uma_kctor_args *arg = udata;
1177         uma_keg_t keg = mem;
1178         uma_zone_t zone;
1179
1180         bzero(keg, size);
1181         keg->uk_size = arg->size;
1182         keg->uk_init = arg->uminit;
1183         keg->uk_fini = arg->fini;
1184         keg->uk_align = arg->align;
1185         keg->uk_free = 0;
1186         keg->uk_pages = 0;
1187         keg->uk_flags = arg->flags;
1188         keg->uk_allocf = page_alloc;
1189         keg->uk_freef = page_free;
1190         keg->uk_recurse = 0;
1191         keg->uk_slabzone = NULL;
1192
1193         /*
1194          * The master zone is passed to us at keg-creation time.
1195          */
1196         zone = arg->zone;
1197         zone->uz_keg = keg;
1198
1199         if (arg->flags & UMA_ZONE_VM)
1200                 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1201
1202         if (arg->flags & UMA_ZONE_ZINIT)
1203                 keg->uk_init = zero_init;
1204
1205         /*
1206          * The +UMA_FRITM_SZ added to uk_size is to account for the
1207          * linkage that is added to the size in zone_small_init().  If
1208          * we don't account for this here then we may end up in
1209          * zone_small_init() with a calculated 'ipers' of 0.
1210          */
1211         if (keg->uk_flags & UMA_ZONE_REFCNT) {
1212                 if ((keg->uk_size+UMA_FRITMREF_SZ) >
1213                     (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1214                         zone_large_init(zone);
1215                 else
1216                         zone_small_init(zone);
1217         } else {
1218                 if ((keg->uk_size+UMA_FRITM_SZ) >
1219                     (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1220                         zone_large_init(zone);
1221                 else
1222                         zone_small_init(zone);
1223         }
1224
1225         if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1226                 if (keg->uk_flags & UMA_ZONE_REFCNT)
1227                         keg->uk_slabzone = slabrefzone;
1228                 else
1229                         keg->uk_slabzone = slabzone;
1230         }
1231
1232         /*
1233          * If we haven't booted yet we need allocations to go through the
1234          * startup cache until the vm is ready.
1235          */
1236         if (keg->uk_ppera == 1) {
1237 #ifdef UMA_MD_SMALL_ALLOC
1238                 keg->uk_allocf = uma_small_alloc;
1239                 keg->uk_freef = uma_small_free;
1240 #endif
1241                 if (booted == 0)
1242                         keg->uk_allocf = startup_alloc;
1243         }
1244
1245         /*
1246          * Initialize keg's lock (shared among zones) through
1247          * Master zone
1248          */
1249         zone->uz_lock = &keg->uk_lock;
1250         if (arg->flags & UMA_ZONE_MTXCLASS)
1251                 ZONE_LOCK_INIT(zone, 1);
1252         else
1253                 ZONE_LOCK_INIT(zone, 0);
1254
1255         /*
1256          * If we're putting the slab header in the actual page we need to
1257          * figure out where in each page it goes.  This calculates a right
1258          * justified offset into the memory on an ALIGN_PTR boundary.
1259          */
1260         if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1261                 u_int totsize;
1262
1263                 /* Size of the slab struct and free list */
1264                 if (keg->uk_flags & UMA_ZONE_REFCNT)
1265                         totsize = sizeof(struct uma_slab_refcnt) +
1266                             keg->uk_ipers * UMA_FRITMREF_SZ;
1267                 else
1268                         totsize = sizeof(struct uma_slab) +
1269                             keg->uk_ipers * UMA_FRITM_SZ;
1270
1271                 if (totsize & UMA_ALIGN_PTR)
1272                         totsize = (totsize & ~UMA_ALIGN_PTR) +
1273                             (UMA_ALIGN_PTR + 1);
1274                 keg->uk_pgoff = UMA_SLAB_SIZE - totsize;
1275
1276                 if (keg->uk_flags & UMA_ZONE_REFCNT)
1277                         totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1278                             + keg->uk_ipers * UMA_FRITMREF_SZ;
1279                 else
1280                         totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1281                             + keg->uk_ipers * UMA_FRITM_SZ;
1282
1283                 /*
1284                  * The only way the following is possible is if with our
1285                  * UMA_ALIGN_PTR adjustments we are now bigger than
1286                  * UMA_SLAB_SIZE.  I haven't checked whether this is
1287                  * mathematically possible for all cases, so we make
1288                  * sure here anyway.
1289                  */
1290                 if (totsize > UMA_SLAB_SIZE) {
1291                         printf("zone %s ipers %d rsize %d size %d\n",
1292                             zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1293                             keg->uk_size);
1294                         panic("UMA slab won't fit.\n");
1295                 }
1296         }
1297
1298         if (keg->uk_flags & UMA_ZONE_HASH)
1299                 hash_alloc(&keg->uk_hash);
1300
1301 #ifdef UMA_DEBUG
1302         printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1303             zone->uz_name, zone,
1304             keg->uk_size, keg->uk_ipers,
1305             keg->uk_ppera, keg->uk_pgoff);
1306 #endif
1307
1308         LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1309
1310         mtx_lock(&uma_mtx);
1311         LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1312         mtx_unlock(&uma_mtx);
1313         return (0);
1314 }
1315
1316 /*
1317  * Zone header ctor.  This initializes all fields, locks, etc.
1318  *
1319  * Arguments/Returns follow uma_ctor specifications
1320  *      udata  Actually uma_zctor_args
1321  */
1322
1323 static int
1324 zone_ctor(void *mem, int size, void *udata, int flags)
1325 {
1326         struct uma_zctor_args *arg = udata;
1327         uma_zone_t zone = mem;
1328         uma_zone_t z;
1329         uma_keg_t keg;
1330
1331         bzero(zone, size);
1332         zone->uz_name = arg->name;
1333         zone->uz_ctor = arg->ctor;
1334         zone->uz_dtor = arg->dtor;
1335         zone->uz_init = NULL;
1336         zone->uz_fini = NULL;
1337         zone->uz_allocs = 0;
1338         zone->uz_fills = zone->uz_count = 0;
1339
1340         if (arg->flags & UMA_ZONE_SECONDARY) {
1341                 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1342                 keg = arg->keg;
1343                 zone->uz_keg = keg;
1344                 zone->uz_init = arg->uminit;
1345                 zone->uz_fini = arg->fini;
1346                 zone->uz_lock = &keg->uk_lock;
1347                 mtx_lock(&uma_mtx);
1348                 ZONE_LOCK(zone);
1349                 keg->uk_flags |= UMA_ZONE_SECONDARY;
1350                 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1351                         if (LIST_NEXT(z, uz_link) == NULL) {
1352                                 LIST_INSERT_AFTER(z, zone, uz_link);
1353                                 break;
1354                         }
1355                 }
1356                 ZONE_UNLOCK(zone);
1357                 mtx_unlock(&uma_mtx);
1358         } else if (arg->keg == NULL) {
1359                 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1360                     arg->align, arg->flags) == NULL)
1361                         return (ENOMEM);
1362         } else {
1363                 struct uma_kctor_args karg;
1364                 int error;
1365
1366                 /* We should only be here from uma_startup() */
1367                 karg.size = arg->size;
1368                 karg.uminit = arg->uminit;
1369                 karg.fini = arg->fini;
1370                 karg.align = arg->align;
1371                 karg.flags = arg->flags;
1372                 karg.zone = zone;
1373                 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1374                     flags);
1375                 if (error)
1376                         return (error);
1377         }
1378         keg = zone->uz_keg;
1379         zone->uz_lock = &keg->uk_lock;
1380
1381         /*
1382          * Some internal zones don't have room allocated for the per cpu
1383          * caches.  If we're internal, bail out here.
1384          */
1385         if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1386                 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
1387                     ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1388                 return (0);
1389         }
1390
1391         if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1392                 zone->uz_count = BUCKET_MAX;
1393         else if (keg->uk_ipers <= BUCKET_MAX)
1394                 zone->uz_count = keg->uk_ipers;
1395         else
1396                 zone->uz_count = BUCKET_MAX;
1397         return (0);
1398 }
1399
1400 /*
1401  * Keg header dtor.  This frees all data, destroys locks, frees the hash
1402  * table and removes the keg from the global list.
1403  *
1404  * Arguments/Returns follow uma_dtor specifications
1405  *      udata  unused
1406  */
1407 static void
1408 keg_dtor(void *arg, int size, void *udata)
1409 {
1410         uma_keg_t keg;
1411
1412         keg = (uma_keg_t)arg;
1413         mtx_lock(&keg->uk_lock);
1414         if (keg->uk_free != 0) {
1415                 printf("Freed UMA keg was not empty (%d items). "
1416                     " Lost %d pages of memory.\n",
1417                     keg->uk_free, keg->uk_pages);
1418         }
1419         mtx_unlock(&keg->uk_lock);
1420
1421         if (keg->uk_flags & UMA_ZONE_HASH)
1422                 hash_free(&keg->uk_hash);
1423
1424         mtx_destroy(&keg->uk_lock);
1425 }
1426
1427 /*
1428  * Zone header dtor.
1429  *
1430  * Arguments/Returns follow uma_dtor specifications
1431  *      udata  unused
1432  */
1433 static void
1434 zone_dtor(void *arg, int size, void *udata)
1435 {
1436         uma_zone_t zone;
1437         uma_keg_t keg;
1438
1439         zone = (uma_zone_t)arg;
1440         keg = zone->uz_keg;
1441
1442         if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1443                 cache_drain(zone);
1444
1445         mtx_lock(&uma_mtx);
1446         zone_drain(zone);
1447         if (keg->uk_flags & UMA_ZONE_SECONDARY) {
1448                 LIST_REMOVE(zone, uz_link);
1449                 /*
1450                  * XXX there are some races here where
1451                  * the zone can be drained but zone lock
1452                  * released and then refilled before we
1453                  * remove it... we dont care for now
1454                  */
1455                 ZONE_LOCK(zone);
1456                 if (LIST_EMPTY(&keg->uk_zones))
1457                         keg->uk_flags &= ~UMA_ZONE_SECONDARY;
1458                 ZONE_UNLOCK(zone);
1459                 mtx_unlock(&uma_mtx);
1460         } else {
1461                 LIST_REMOVE(keg, uk_link);
1462                 LIST_REMOVE(zone, uz_link);
1463                 mtx_unlock(&uma_mtx);
1464                 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE);
1465         }
1466         zone->uz_keg = NULL;
1467 }
1468
1469 /*
1470  * Traverses every zone in the system and calls a callback
1471  *
1472  * Arguments:
1473  *      zfunc  A pointer to a function which accepts a zone
1474  *              as an argument.
1475  *
1476  * Returns:
1477  *      Nothing
1478  */
1479 static void
1480 zone_foreach(void (*zfunc)(uma_zone_t))
1481 {
1482         uma_keg_t keg;
1483         uma_zone_t zone;
1484
1485         mtx_lock(&uma_mtx);
1486         LIST_FOREACH(keg, &uma_kegs, uk_link) {
1487                 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1488                         zfunc(zone);
1489         }
1490         mtx_unlock(&uma_mtx);
1491 }
1492
1493 /* Public functions */
1494 /* See uma.h */
1495 void
1496 uma_startup(void *bootmem)
1497 {
1498         struct uma_zctor_args args;
1499         uma_slab_t slab;
1500         u_int slabsize;
1501         u_int objsize, totsize, wsize;
1502         int i;
1503
1504 #ifdef UMA_DEBUG
1505         printf("Creating uma keg headers zone and keg.\n");
1506 #endif
1507         /*
1508          * The general UMA lock is a recursion-allowed lock because
1509          * there is a code path where, while we're still configured
1510          * to use startup_alloc() for backend page allocations, we
1511          * may end up in uma_reclaim() which calls zone_foreach(zone_drain),
1512          * which grabs uma_mtx, only to later call into startup_alloc()
1513          * because while freeing we needed to allocate a bucket.  Since
1514          * startup_alloc() also takes uma_mtx, we need to be able to
1515          * recurse on it.
1516          */
1517         mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF | MTX_RECURSE);
1518
1519         /*
1520          * Figure out the maximum number of items-per-slab we'll have if
1521          * we're using the OFFPAGE slab header to track free items, given
1522          * all possible object sizes and the maximum desired wastage
1523          * (UMA_MAX_WASTE).
1524          *
1525          * We iterate until we find an object size for
1526          * which the calculated wastage in zone_small_init() will be
1527          * enough to warrant OFFPAGE.  Since wastedspace versus objsize
1528          * is an overall increasing see-saw function, we find the smallest
1529          * objsize such that the wastage is always acceptable for objects
1530          * with that objsize or smaller.  Since a smaller objsize always
1531          * generates a larger possible uma_max_ipers, we use this computed
1532          * objsize to calculate the largest ipers possible.  Since the
1533          * ipers calculated for OFFPAGE slab headers is always larger than
1534          * the ipers initially calculated in zone_small_init(), we use
1535          * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1536          * obtain the maximum ipers possible for offpage slab headers.
1537          *
1538          * It should be noted that ipers versus objsize is an inversly
1539          * proportional function which drops off rather quickly so as
1540          * long as our UMA_MAX_WASTE is such that the objsize we calculate
1541          * falls into the portion of the inverse relation AFTER the steep
1542          * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1543          *
1544          * Note that we have 8-bits (1 byte) to use as a freelist index
1545          * inside the actual slab header itself and this is enough to
1546          * accomodate us.  In the worst case, a UMA_SMALLEST_UNIT sized
1547          * object with offpage slab header would have ipers =
1548          * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1549          * 1 greater than what our byte-integer freelist index can
1550          * accomodate, but we know that this situation never occurs as
1551          * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1552          * that we need to go to offpage slab headers.  Or, if we do,
1553          * then we trap that condition below and panic in the INVARIANTS case.
1554          */
1555         wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1556         totsize = wsize;
1557         objsize = UMA_SMALLEST_UNIT;
1558         while (totsize >= wsize) {
1559                 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1560                     (objsize + UMA_FRITM_SZ);
1561                 totsize *= (UMA_FRITM_SZ + objsize);
1562                 objsize++;
1563         }
1564         if (objsize > UMA_SMALLEST_UNIT)
1565                 objsize--;
1566         uma_max_ipers = UMA_SLAB_SIZE / objsize;
1567
1568         wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1569         totsize = wsize;
1570         objsize = UMA_SMALLEST_UNIT;
1571         while (totsize >= wsize) {
1572                 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1573                     (objsize + UMA_FRITMREF_SZ);
1574                 totsize *= (UMA_FRITMREF_SZ + objsize);
1575                 objsize++;
1576         }
1577         if (objsize > UMA_SMALLEST_UNIT)
1578                 objsize--;
1579         uma_max_ipers_ref = UMA_SLAB_SIZE / objsize;
1580
1581         KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1582             ("uma_startup: calculated uma_max_ipers values too large!"));
1583
1584 #ifdef UMA_DEBUG
1585         printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1586         printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1587             uma_max_ipers_ref);
1588 #endif
1589
1590         /* "manually" create the initial zone */
1591         args.name = "UMA Kegs";
1592         args.size = sizeof(struct uma_keg);
1593         args.ctor = keg_ctor;
1594         args.dtor = keg_dtor;
1595         args.uminit = zero_init;
1596         args.fini = NULL;
1597         args.keg = &masterkeg;
1598         args.align = 32 - 1;
1599         args.flags = UMA_ZFLAG_INTERNAL;
1600         /* The initial zone has no Per cpu queues so it's smaller */
1601         zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1602
1603 #ifdef UMA_DEBUG
1604         printf("Filling boot free list.\n");
1605 #endif
1606         for (i = 0; i < UMA_BOOT_PAGES; i++) {
1607                 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1608                 slab->us_data = (u_int8_t *)slab;
1609                 slab->us_flags = UMA_SLAB_BOOT;
1610                 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1611                 uma_boot_free++;
1612         }
1613
1614 #ifdef UMA_DEBUG
1615         printf("Creating uma zone headers zone and keg.\n");
1616 #endif
1617         args.name = "UMA Zones";
1618         args.size = sizeof(struct uma_zone) +
1619             (sizeof(struct uma_cache) * (mp_maxid + 1));
1620         args.ctor = zone_ctor;
1621         args.dtor = zone_dtor;
1622         args.uminit = zero_init;
1623         args.fini = NULL;
1624         args.keg = NULL;
1625         args.align = 32 - 1;
1626         args.flags = UMA_ZFLAG_INTERNAL;
1627         /* The initial zone has no Per cpu queues so it's smaller */
1628         zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1629
1630 #ifdef UMA_DEBUG
1631         printf("Initializing pcpu cache locks.\n");
1632 #endif
1633         /* Initialize the pcpu cache lock set once and for all */
1634         for (i = 0; i <= mp_maxid; i++)
1635                 CPU_LOCK_INIT(i);
1636
1637 #ifdef UMA_DEBUG
1638         printf("Creating slab and hash zones.\n");
1639 #endif
1640
1641         /*
1642          * This is the max number of free list items we'll have with
1643          * offpage slabs.
1644          */
1645         slabsize = uma_max_ipers * UMA_FRITM_SZ;
1646         slabsize += sizeof(struct uma_slab);
1647
1648         /* Now make a zone for slab headers */
1649         slabzone = uma_zcreate("UMA Slabs",
1650                                 slabsize,
1651                                 NULL, NULL, NULL, NULL,
1652                                 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1653
1654         /*
1655          * We also create a zone for the bigger slabs with reference
1656          * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1657          */
1658         slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1659         slabsize += sizeof(struct uma_slab_refcnt);
1660         slabrefzone = uma_zcreate("UMA RCntSlabs",
1661                                   slabsize,
1662                                   NULL, NULL, NULL, NULL,
1663                                   UMA_ALIGN_PTR,
1664                                   UMA_ZFLAG_INTERNAL);
1665
1666         hashzone = uma_zcreate("UMA Hash",
1667             sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1668             NULL, NULL, NULL, NULL,
1669             UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1670
1671         bucket_init();
1672
1673 #ifdef UMA_MD_SMALL_ALLOC
1674         booted = 1;
1675 #endif
1676
1677 #ifdef UMA_DEBUG
1678         printf("UMA startup complete.\n");
1679 #endif
1680 }
1681
1682 /* see uma.h */
1683 void
1684 uma_startup2(void)
1685 {
1686         booted = 1;
1687         bucket_enable();
1688 #ifdef UMA_DEBUG
1689         printf("UMA startup2 complete.\n");
1690 #endif
1691 }
1692
1693 /*
1694  * Initialize our callout handle
1695  *
1696  */
1697
1698 static void
1699 uma_startup3(void)
1700 {
1701 #ifdef UMA_DEBUG
1702         printf("Starting callout.\n");
1703 #endif
1704         callout_init(&uma_callout, CALLOUT_MPSAFE);
1705         callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1706 #ifdef UMA_DEBUG
1707         printf("UMA startup3 complete.\n");
1708 #endif
1709 }
1710
1711 static uma_zone_t
1712 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1713                 int align, u_int16_t flags)
1714 {
1715         struct uma_kctor_args args;
1716
1717         args.size = size;
1718         args.uminit = uminit;
1719         args.fini = fini;
1720         args.align = align;
1721         args.flags = flags;
1722         args.zone = zone;
1723         return (uma_zalloc_internal(kegs, &args, M_WAITOK));
1724 }
1725
1726 /* See uma.h */
1727 uma_zone_t
1728 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1729                 uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1730
1731 {
1732         struct uma_zctor_args args;
1733
1734         /* This stuff is essential for the zone ctor */
1735         args.name = name;
1736         args.size = size;
1737         args.ctor = ctor;
1738         args.dtor = dtor;
1739         args.uminit = uminit;
1740         args.fini = fini;
1741         args.align = align;
1742         args.flags = flags;
1743         args.keg = NULL;
1744
1745         return (uma_zalloc_internal(zones, &args, M_WAITOK));
1746 }
1747
1748 /* See uma.h */
1749 uma_zone_t
1750 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1751                     uma_init zinit, uma_fini zfini, uma_zone_t master)
1752 {
1753         struct uma_zctor_args args;
1754
1755         args.name = name;
1756         args.size = master->uz_keg->uk_size;
1757         args.ctor = ctor;
1758         args.dtor = dtor;
1759         args.uminit = zinit;
1760         args.fini = zfini;
1761         args.align = master->uz_keg->uk_align;
1762         args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY;
1763         args.keg = master->uz_keg;
1764
1765         return (uma_zalloc_internal(zones, &args, M_WAITOK));
1766 }
1767
1768 /* See uma.h */
1769 void
1770 uma_zdestroy(uma_zone_t zone)
1771 {
1772         uma_zfree_internal(zones, zone, NULL, SKIP_NONE);
1773 }
1774
1775 /* See uma.h */
1776 void *
1777 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1778 {
1779         void *item;
1780         uma_cache_t cache;
1781         uma_bucket_t bucket;
1782         int cpu;
1783         int badness;
1784
1785         /* This is the fast path allocation */
1786 #ifdef UMA_DEBUG_ALLOC_1
1787         printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1788 #endif
1789         CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1790             zone->uz_name, flags);
1791
1792         if (!(flags & M_NOWAIT)) {
1793                 KASSERT(curthread->td_intr_nesting_level == 0,
1794                    ("malloc(M_WAITOK) in interrupt context"));
1795                 if (nosleepwithlocks) {
1796 #ifdef WITNESS
1797                         badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
1798                             NULL,
1799                             "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT",
1800                             zone->uz_name);
1801 #else
1802                         badness = 1;
1803 #endif
1804                 } else {
1805                         badness = 0;
1806 #ifdef WITNESS
1807                         WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1808                             "malloc(M_WAITOK) of \"%s\"", zone->uz_name);
1809 #endif
1810                 }
1811                 if (badness) {
1812                         flags &= ~M_WAITOK;
1813                         flags |= M_NOWAIT;
1814                 }
1815         }
1816
1817 zalloc_restart:
1818         cpu = PCPU_GET(cpuid);
1819         CPU_LOCK(cpu);
1820         cache = &zone->uz_cpu[cpu];
1821
1822 zalloc_start:
1823         bucket = cache->uc_allocbucket;
1824
1825         if (bucket) {
1826                 if (bucket->ub_cnt > 0) {
1827                         bucket->ub_cnt--;
1828                         item = bucket->ub_bucket[bucket->ub_cnt];
1829 #ifdef INVARIANTS
1830                         bucket->ub_bucket[bucket->ub_cnt] = NULL;
1831 #endif
1832                         KASSERT(item != NULL,
1833                             ("uma_zalloc: Bucket pointer mangled."));
1834                         cache->uc_allocs++;
1835 #ifdef INVARIANTS
1836                         ZONE_LOCK(zone);
1837                         uma_dbg_alloc(zone, NULL, item);
1838                         ZONE_UNLOCK(zone);
1839 #endif
1840                         CPU_UNLOCK(cpu);
1841                         if (zone->uz_ctor != NULL) {
1842                                 if (zone->uz_ctor(item, zone->uz_keg->uk_size,
1843                                     udata, flags) != 0) {
1844                                         uma_zfree_internal(zone, item, udata,
1845                                             SKIP_DTOR);
1846                                         return (NULL);
1847                                 }
1848                         }
1849                         if (flags & M_ZERO)
1850                                 bzero(item, zone->uz_keg->uk_size);
1851                         return (item);
1852                 } else if (cache->uc_freebucket) {
1853                         /*
1854                          * We have run out of items in our allocbucket.
1855                          * See if we can switch with our free bucket.
1856                          */
1857                         if (cache->uc_freebucket->ub_cnt > 0) {
1858 #ifdef UMA_DEBUG_ALLOC
1859                                 printf("uma_zalloc: Swapping empty with"
1860                                     " alloc.\n");
1861 #endif
1862                                 bucket = cache->uc_freebucket;
1863                                 cache->uc_freebucket = cache->uc_allocbucket;
1864                                 cache->uc_allocbucket = bucket;
1865
1866                                 goto zalloc_start;
1867                         }
1868                 }
1869         }
1870         ZONE_LOCK(zone);
1871         /* Since we have locked the zone we may as well send back our stats */
1872         zone->uz_allocs += cache->uc_allocs;
1873         cache->uc_allocs = 0;
1874
1875         /* Our old one is now a free bucket */
1876         if (cache->uc_allocbucket) {
1877                 KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1878                     ("uma_zalloc_arg: Freeing a non free bucket."));
1879                 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1880                     cache->uc_allocbucket, ub_link);
1881                 cache->uc_allocbucket = NULL;
1882         }
1883
1884         /* Check the free list for a new alloc bucket */
1885         if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1886                 KASSERT(bucket->ub_cnt != 0,
1887                     ("uma_zalloc_arg: Returning an empty bucket."));
1888
1889                 LIST_REMOVE(bucket, ub_link);
1890                 cache->uc_allocbucket = bucket;
1891                 ZONE_UNLOCK(zone);
1892                 goto zalloc_start;
1893         }
1894         /* We are no longer associated with this cpu!!! */
1895         CPU_UNLOCK(cpu);
1896
1897         /* Bump up our uz_count so we get here less */
1898         if (zone->uz_count < BUCKET_MAX)
1899                 zone->uz_count++;
1900
1901         /*
1902          * Now lets just fill a bucket and put it on the free list.  If that
1903          * works we'll restart the allocation from the begining.
1904          */
1905         if (uma_zalloc_bucket(zone, flags)) {
1906                 ZONE_UNLOCK(zone);
1907                 goto zalloc_restart;
1908         }
1909         ZONE_UNLOCK(zone);
1910         /*
1911          * We may not be able to get a bucket so return an actual item.
1912          */
1913 #ifdef UMA_DEBUG
1914         printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1915 #endif
1916
1917         return (uma_zalloc_internal(zone, udata, flags));
1918 }
1919
1920 static uma_slab_t
1921 uma_zone_slab(uma_zone_t zone, int flags)
1922 {
1923         uma_slab_t slab;
1924         uma_keg_t keg;
1925
1926         keg = zone->uz_keg;
1927
1928         /*
1929          * This is to prevent us from recursively trying to allocate
1930          * buckets.  The problem is that if an allocation forces us to
1931          * grab a new bucket we will call page_alloc, which will go off
1932          * and cause the vm to allocate vm_map_entries.  If we need new
1933          * buckets there too we will recurse in kmem_alloc and bad
1934          * things happen.  So instead we return a NULL bucket, and make
1935          * the code that allocates buckets smart enough to deal with it
1936          *
1937          * XXX: While we want this protection for the bucket zones so that
1938          * recursion from the VM is handled (and the calling code that
1939          * allocates buckets knows how to deal with it), we do not want
1940          * to prevent allocation from the slab header zones (slabzone
1941          * and slabrefzone) if uk_recurse is not zero for them.  The
1942          * reason is that it could lead to NULL being returned for
1943          * slab header allocations even in the M_WAITOK case, and the
1944          * caller can't handle that.
1945          */
1946         if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0)
1947                 if ((zone != slabzone) && (zone != slabrefzone))
1948                         return (NULL);
1949
1950         slab = NULL;
1951
1952         for (;;) {
1953                 /*
1954                  * Find a slab with some space.  Prefer slabs that are partially
1955                  * used over those that are totally full.  This helps to reduce
1956                  * fragmentation.
1957                  */
1958                 if (keg->uk_free != 0) {
1959                         if (!LIST_EMPTY(&keg->uk_part_slab)) {
1960                                 slab = LIST_FIRST(&keg->uk_part_slab);
1961                         } else {
1962                                 slab = LIST_FIRST(&keg->uk_free_slab);
1963                                 LIST_REMOVE(slab, us_link);
1964                                 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
1965                                     us_link);
1966                         }
1967                         return (slab);
1968                 }
1969
1970                 /*
1971                  * M_NOVM means don't ask at all!
1972                  */
1973                 if (flags & M_NOVM)
1974                         break;
1975
1976                 if (keg->uk_maxpages &&
1977                     keg->uk_pages >= keg->uk_maxpages) {
1978                         keg->uk_flags |= UMA_ZFLAG_FULL;
1979
1980                         if (flags & M_NOWAIT)
1981                                 break;
1982                         else
1983                                 msleep(keg, &keg->uk_lock, PVM,
1984                                     "zonelimit", 0);
1985                         continue;
1986                 }
1987                 keg->uk_recurse++;
1988                 slab = slab_zalloc(zone, flags);
1989                 keg->uk_recurse--;
1990
1991                 /*
1992                  * If we got a slab here it's safe to mark it partially used
1993                  * and return.  We assume that the caller is going to remove
1994                  * at least one item.
1995                  */
1996                 if (slab) {
1997                         LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
1998                         return (slab);
1999                 }
2000                 /*
2001                  * We might not have been able to get a slab but another cpu
2002                  * could have while we were unlocked.  Check again before we
2003                  * fail.
2004                  */
2005                 if (flags & M_NOWAIT)
2006                         flags |= M_NOVM;
2007         }
2008         return (slab);
2009 }
2010
2011 static void *
2012 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
2013 {
2014         uma_keg_t keg;
2015         uma_slabrefcnt_t slabref;
2016         void *item;
2017         u_int8_t freei;
2018
2019         keg = zone->uz_keg;
2020
2021         freei = slab->us_firstfree;
2022         if (keg->uk_flags & UMA_ZONE_REFCNT) {
2023                 slabref = (uma_slabrefcnt_t)slab;
2024                 slab->us_firstfree = slabref->us_freelist[freei].us_item;
2025         } else {
2026                 slab->us_firstfree = slab->us_freelist[freei].us_item;
2027         }
2028         item = slab->us_data + (keg->uk_rsize * freei);
2029
2030         slab->us_freecount--;
2031         keg->uk_free--;
2032 #ifdef INVARIANTS
2033         uma_dbg_alloc(zone, slab, item);
2034 #endif
2035         /* Move this slab to the full list */
2036         if (slab->us_freecount == 0) {
2037                 LIST_REMOVE(slab, us_link);
2038                 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2039         }
2040
2041         return (item);
2042 }
2043
2044 static int
2045 uma_zalloc_bucket(uma_zone_t zone, int flags)
2046 {
2047         uma_bucket_t bucket;
2048         uma_slab_t slab;
2049         int16_t saved;
2050         int max, origflags = flags;
2051
2052         /*
2053          * Try this zone's free list first so we don't allocate extra buckets.
2054          */
2055         if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2056                 KASSERT(bucket->ub_cnt == 0,
2057                     ("uma_zalloc_bucket: Bucket on free list is not empty."));
2058                 LIST_REMOVE(bucket, ub_link);
2059         } else {
2060                 int bflags;
2061
2062                 bflags = (flags & ~M_ZERO);
2063                 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2064                         bflags |= M_NOVM;
2065
2066                 ZONE_UNLOCK(zone);
2067                 bucket = bucket_alloc(zone->uz_count, bflags);
2068                 ZONE_LOCK(zone);
2069         }
2070
2071         if (bucket == NULL)
2072                 return (0);
2073
2074 #ifdef SMP
2075         /*
2076          * This code is here to limit the number of simultaneous bucket fills
2077          * for any given zone to the number of per cpu caches in this zone. This
2078          * is done so that we don't allocate more memory than we really need.
2079          */
2080         if (zone->uz_fills >= mp_ncpus)
2081                 goto done;
2082
2083 #endif
2084         zone->uz_fills++;
2085
2086         max = MIN(bucket->ub_entries, zone->uz_count);
2087         /* Try to keep the buckets totally full */
2088         saved = bucket->ub_cnt;
2089         while (bucket->ub_cnt < max &&
2090             (slab = uma_zone_slab(zone, flags)) != NULL) {
2091                 while (slab->us_freecount && bucket->ub_cnt < max) {
2092                         bucket->ub_bucket[bucket->ub_cnt++] =
2093                             uma_slab_alloc(zone, slab);
2094                 }
2095
2096                 /* Don't block on the next fill */
2097                 flags |= M_NOWAIT;
2098         }
2099
2100         /*
2101          * We unlock here because we need to call the zone's init.
2102          * It should be safe to unlock because the slab dealt with
2103          * above is already on the appropriate list within the keg
2104          * and the bucket we filled is not yet on any list, so we
2105          * own it.
2106          */
2107         if (zone->uz_init != NULL) {
2108                 int i;
2109
2110                 ZONE_UNLOCK(zone);
2111                 for (i = saved; i < bucket->ub_cnt; i++)
2112                         if (zone->uz_init(bucket->ub_bucket[i],
2113                             zone->uz_keg->uk_size, origflags) != 0)
2114                                 break;
2115                 /*
2116                  * If we couldn't initialize the whole bucket, put the
2117                  * rest back onto the freelist.
2118                  */
2119                 if (i != bucket->ub_cnt) {
2120                         int j;
2121
2122                         for (j = i; j < bucket->ub_cnt; j++) {
2123                                 uma_zfree_internal(zone, bucket->ub_bucket[j],
2124                                     NULL, SKIP_FINI);
2125 #ifdef INVARIANTS
2126                                 bucket->ub_bucket[j] = NULL;
2127 #endif
2128                         }
2129                         bucket->ub_cnt = i;
2130                 }
2131                 ZONE_LOCK(zone);
2132         }
2133
2134         zone->uz_fills--;
2135         if (bucket->ub_cnt != 0) {
2136                 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2137                     bucket, ub_link);
2138                 return (1);
2139         }
2140 #ifdef SMP
2141 done:
2142 #endif
2143         bucket_free(bucket);
2144
2145         return (0);
2146 }
2147 /*
2148  * Allocates an item for an internal zone
2149  *
2150  * Arguments
2151  *      zone   The zone to alloc for.
2152  *      udata  The data to be passed to the constructor.
2153  *      flags  M_WAITOK, M_NOWAIT, M_ZERO.
2154  *
2155  * Returns
2156  *      NULL if there is no memory and M_NOWAIT is set
2157  *      An item if successful
2158  */
2159
2160 static void *
2161 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
2162 {
2163         uma_keg_t keg;
2164         uma_slab_t slab;
2165         void *item;
2166
2167         item = NULL;
2168         keg = zone->uz_keg;
2169
2170 #ifdef UMA_DEBUG_ALLOC
2171         printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2172 #endif
2173         ZONE_LOCK(zone);
2174
2175         slab = uma_zone_slab(zone, flags);
2176         if (slab == NULL) {
2177                 ZONE_UNLOCK(zone);
2178                 return (NULL);
2179         }
2180
2181         item = uma_slab_alloc(zone, slab);
2182
2183         ZONE_UNLOCK(zone);
2184
2185         /*
2186          * We have to call both the zone's init (not the keg's init)
2187          * and the zone's ctor.  This is because the item is going from
2188          * a keg slab directly to the user, and the user is expecting it
2189          * to be both zone-init'd as well as zone-ctor'd.
2190          */
2191         if (zone->uz_init != NULL) {
2192                 if (zone->uz_init(item, keg->uk_size, flags) != 0) {
2193                         uma_zfree_internal(zone, item, udata, SKIP_FINI);
2194                         return (NULL);
2195                 }
2196         }
2197         if (zone->uz_ctor != NULL) {
2198                 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) {
2199                         uma_zfree_internal(zone, item, udata, SKIP_DTOR);
2200                         return (NULL);
2201                 }
2202         }
2203         if (flags & M_ZERO)
2204                 bzero(item, keg->uk_size);
2205
2206         return (item);
2207 }
2208
2209 /* See uma.h */
2210 void
2211 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2212 {
2213         uma_keg_t keg;
2214         uma_cache_t cache;
2215         uma_bucket_t bucket;
2216         int bflags;
2217         int cpu;
2218         enum zfreeskip skip;
2219
2220         /* This is the fast path free */
2221         skip = SKIP_NONE;
2222         keg = zone->uz_keg;
2223
2224 #ifdef UMA_DEBUG_ALLOC_1
2225         printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2226 #endif
2227         CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2228             zone->uz_name);
2229
2230         /*
2231          * The race here is acceptable.  If we miss it we'll just have to wait
2232          * a little longer for the limits to be reset.
2233          */
2234
2235         if (keg->uk_flags & UMA_ZFLAG_FULL)
2236                 goto zfree_internal;
2237
2238         if (zone->uz_dtor) {
2239                 zone->uz_dtor(item, keg->uk_size, udata);
2240                 skip = SKIP_DTOR;
2241         }
2242
2243 zfree_restart:
2244         cpu = PCPU_GET(cpuid);
2245         CPU_LOCK(cpu);
2246         cache = &zone->uz_cpu[cpu];
2247
2248 zfree_start:
2249         bucket = cache->uc_freebucket;
2250
2251         if (bucket) {
2252                 /*
2253                  * Do we have room in our bucket? It is OK for this uz count
2254                  * check to be slightly out of sync.
2255                  */
2256
2257                 if (bucket->ub_cnt < bucket->ub_entries) {
2258                         KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2259                             ("uma_zfree: Freeing to non free bucket index."));
2260                         bucket->ub_bucket[bucket->ub_cnt] = item;
2261                         bucket->ub_cnt++;
2262 #ifdef INVARIANTS
2263                         ZONE_LOCK(zone);
2264                         if (keg->uk_flags & UMA_ZONE_MALLOC)
2265                                 uma_dbg_free(zone, udata, item);
2266                         else
2267                                 uma_dbg_free(zone, NULL, item);
2268                         ZONE_UNLOCK(zone);
2269 #endif
2270                         CPU_UNLOCK(cpu);
2271                         return;
2272                 } else if (cache->uc_allocbucket) {
2273 #ifdef UMA_DEBUG_ALLOC
2274                         printf("uma_zfree: Swapping buckets.\n");
2275 #endif
2276                         /*
2277                          * We have run out of space in our freebucket.
2278                          * See if we can switch with our alloc bucket.
2279                          */
2280                         if (cache->uc_allocbucket->ub_cnt <
2281                             cache->uc_freebucket->ub_cnt) {
2282                                 bucket = cache->uc_freebucket;
2283                                 cache->uc_freebucket = cache->uc_allocbucket;
2284                                 cache->uc_allocbucket = bucket;
2285                                 goto zfree_start;
2286                         }
2287                 }
2288         }
2289         /*
2290          * We can get here for two reasons:
2291          *
2292          * 1) The buckets are NULL
2293          * 2) The alloc and free buckets are both somewhat full.
2294          */
2295
2296         ZONE_LOCK(zone);
2297
2298         bucket = cache->uc_freebucket;
2299         cache->uc_freebucket = NULL;
2300
2301         /* Can we throw this on the zone full list? */
2302         if (bucket != NULL) {
2303 #ifdef UMA_DEBUG_ALLOC
2304                 printf("uma_zfree: Putting old bucket on the free list.\n");
2305 #endif
2306                 /* ub_cnt is pointing to the last free item */
2307                 KASSERT(bucket->ub_cnt != 0,
2308                     ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2309                 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2310                     bucket, ub_link);
2311         }
2312         if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2313                 LIST_REMOVE(bucket, ub_link);
2314                 ZONE_UNLOCK(zone);
2315                 cache->uc_freebucket = bucket;
2316                 goto zfree_start;
2317         }
2318         /* We're done with this CPU now */
2319         CPU_UNLOCK(cpu);
2320
2321         /* And the zone.. */
2322         ZONE_UNLOCK(zone);
2323
2324 #ifdef UMA_DEBUG_ALLOC
2325         printf("uma_zfree: Allocating new free bucket.\n");
2326 #endif
2327         bflags = M_NOWAIT;
2328
2329         if (keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2330                 bflags |= M_NOVM;
2331         bucket = bucket_alloc(zone->uz_count, bflags);
2332         if (bucket) {
2333                 ZONE_LOCK(zone);
2334                 LIST_INSERT_HEAD(&zone->uz_free_bucket,
2335                     bucket, ub_link);
2336                 ZONE_UNLOCK(zone);
2337                 goto zfree_restart;
2338         }
2339
2340         /*
2341          * If nothing else caught this, we'll just do an internal free.
2342          */
2343
2344 zfree_internal:
2345
2346 #ifdef INVARIANTS
2347         /*
2348          * If we need to skip the dtor and the uma_dbg_free in
2349          * uma_zfree_internal because we've already called the dtor
2350          * above, but we ended up here, then we need to make sure
2351          * that we take care of the uma_dbg_free immediately.
2352          */
2353         if (skip) {
2354                 ZONE_LOCK(zone);
2355                 if (keg->uk_flags & UMA_ZONE_MALLOC)
2356                         uma_dbg_free(zone, udata, item);
2357                 else
2358                         uma_dbg_free(zone, NULL, item);
2359                 ZONE_UNLOCK(zone);
2360         }
2361 #endif
2362         uma_zfree_internal(zone, item, udata, skip);
2363
2364         return;
2365 }
2366
2367 /*
2368  * Frees an item to an INTERNAL zone or allocates a free bucket
2369  *
2370  * Arguments:
2371  *      zone   The zone to free to
2372  *      item   The item we're freeing
2373  *      udata  User supplied data for the dtor
2374  *      skip   Skip dtors and finis
2375  */
2376 static void
2377 uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
2378     enum zfreeskip skip)
2379 {
2380         uma_slab_t slab;
2381         uma_slabrefcnt_t slabref;
2382         uma_keg_t keg;
2383         u_int8_t *mem;
2384         u_int8_t freei;
2385
2386         keg = zone->uz_keg;
2387
2388         if (skip < SKIP_DTOR && zone->uz_dtor)
2389                 zone->uz_dtor(item, keg->uk_size, udata);
2390         if (skip < SKIP_FINI && zone->uz_fini)
2391                 zone->uz_fini(item, keg->uk_size);
2392
2393         ZONE_LOCK(zone);
2394
2395         if (!(keg->uk_flags & UMA_ZONE_MALLOC)) {
2396                 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2397                 if (keg->uk_flags & UMA_ZONE_HASH)
2398                         slab = hash_sfind(&keg->uk_hash, mem);
2399                 else {
2400                         mem += keg->uk_pgoff;
2401                         slab = (uma_slab_t)mem;
2402                 }
2403         } else {
2404                 slab = (uma_slab_t)udata;
2405         }
2406
2407         /* Do we need to remove from any lists? */
2408         if (slab->us_freecount+1 == keg->uk_ipers) {
2409                 LIST_REMOVE(slab, us_link);
2410                 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2411         } else if (slab->us_freecount == 0) {
2412                 LIST_REMOVE(slab, us_link);
2413                 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2414         }
2415
2416         /* Slab management stuff */
2417         freei = ((unsigned long)item - (unsigned long)slab->us_data)
2418                 / keg->uk_rsize;
2419
2420 #ifdef INVARIANTS
2421         if (!skip)
2422                 uma_dbg_free(zone, slab, item);
2423 #endif
2424
2425         if (keg->uk_flags & UMA_ZONE_REFCNT) {
2426                 slabref = (uma_slabrefcnt_t)slab;
2427                 slabref->us_freelist[freei].us_item = slab->us_firstfree;
2428         } else {
2429                 slab->us_freelist[freei].us_item = slab->us_firstfree;
2430         }
2431         slab->us_firstfree = freei;
2432         slab->us_freecount++;
2433
2434         /* Zone statistics */
2435         keg->uk_free++;
2436
2437         if (keg->uk_flags & UMA_ZFLAG_FULL) {
2438                 if (keg->uk_pages < keg->uk_maxpages)
2439                         keg->uk_flags &= ~UMA_ZFLAG_FULL;
2440
2441                 /* We can handle one more allocation */
2442                 wakeup_one(keg);
2443         }
2444
2445         ZONE_UNLOCK(zone);
2446 }
2447
2448 /* See uma.h */
2449 void
2450 uma_zone_set_max(uma_zone_t zone, int nitems)
2451 {
2452         uma_keg_t keg;
2453
2454         keg = zone->uz_keg;
2455         ZONE_LOCK(zone);
2456         if (keg->uk_ppera > 1)
2457                 keg->uk_maxpages = nitems * keg->uk_ppera;
2458         else
2459                 keg->uk_maxpages = nitems / keg->uk_ipers;
2460
2461         if (keg->uk_maxpages * keg->uk_ipers < nitems)
2462                 keg->uk_maxpages++;
2463
2464         ZONE_UNLOCK(zone);
2465 }
2466
2467 /* See uma.h */
2468 void
2469 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2470 {
2471         ZONE_LOCK(zone);
2472         KASSERT(zone->uz_keg->uk_pages == 0,
2473             ("uma_zone_set_init on non-empty keg"));
2474         zone->uz_keg->uk_init = uminit;
2475         ZONE_UNLOCK(zone);
2476 }
2477
2478 /* See uma.h */
2479 void
2480 uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2481 {
2482         ZONE_LOCK(zone);
2483         KASSERT(zone->uz_keg->uk_pages == 0,
2484             ("uma_zone_set_fini on non-empty keg"));
2485         zone->uz_keg->uk_fini = fini;
2486         ZONE_UNLOCK(zone);
2487 }
2488
2489 /* See uma.h */
2490 void
2491 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2492 {
2493         ZONE_LOCK(zone);
2494         KASSERT(zone->uz_keg->uk_pages == 0,
2495             ("uma_zone_set_zinit on non-empty keg"));
2496         zone->uz_init = zinit;
2497         ZONE_UNLOCK(zone);
2498 }
2499
2500 /* See uma.h */
2501 void
2502 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2503 {
2504         ZONE_LOCK(zone);
2505         KASSERT(zone->uz_keg->uk_pages == 0,
2506             ("uma_zone_set_zfini on non-empty keg"));
2507         zone->uz_fini = zfini;
2508         ZONE_UNLOCK(zone);
2509 }
2510
2511 /* See uma.h */
2512 /* XXX uk_freef is not actually used with the zone locked */
2513 void
2514 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2515 {
2516         ZONE_LOCK(zone);
2517         zone->uz_keg->uk_freef = freef;
2518         ZONE_UNLOCK(zone);
2519 }
2520
2521 /* See uma.h */
2522 /* XXX uk_allocf is not actually used with the zone locked */
2523 void
2524 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2525 {
2526         ZONE_LOCK(zone);
2527         zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2528         zone->uz_keg->uk_allocf = allocf;
2529         ZONE_UNLOCK(zone);
2530 }
2531
2532 /* See uma.h */
2533 int
2534 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
2535 {
2536         uma_keg_t keg;
2537         vm_offset_t kva;
2538         int pages;
2539
2540         keg = zone->uz_keg;
2541         pages = count / keg->uk_ipers;
2542
2543         if (pages * keg->uk_ipers < count)
2544                 pages++;
2545
2546         kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
2547
2548         if (kva == 0)
2549                 return (0);
2550         if (obj == NULL) {
2551                 obj = vm_object_allocate(OBJT_DEFAULT,
2552                     pages);
2553         } else {
2554                 VM_OBJECT_LOCK_INIT(obj, "uma object");
2555                 _vm_object_allocate(OBJT_DEFAULT,
2556                     pages, obj);
2557         }
2558         ZONE_LOCK(zone);
2559         keg->uk_kva = kva;
2560         keg->uk_obj = obj;
2561         keg->uk_maxpages = pages;
2562         keg->uk_allocf = obj_alloc;
2563         keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
2564         ZONE_UNLOCK(zone);
2565         return (1);
2566 }
2567
2568 /* See uma.h */
2569 void
2570 uma_prealloc(uma_zone_t zone, int items)
2571 {
2572         int slabs;
2573         uma_slab_t slab;
2574         uma_keg_t keg;
2575
2576         keg = zone->uz_keg;
2577         ZONE_LOCK(zone);
2578         slabs = items / keg->uk_ipers;
2579         if (slabs * keg->uk_ipers < items)
2580                 slabs++;
2581         while (slabs > 0) {
2582                 slab = slab_zalloc(zone, M_WAITOK);
2583                 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2584                 slabs--;
2585         }
2586         ZONE_UNLOCK(zone);
2587 }
2588
2589 /* See uma.h */
2590 u_int32_t *
2591 uma_find_refcnt(uma_zone_t zone, void *item)
2592 {
2593         uma_slabrefcnt_t slabref;
2594         uma_keg_t keg;
2595         u_int32_t *refcnt;
2596         int idx;
2597
2598         keg = zone->uz_keg;
2599         slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
2600             (~UMA_SLAB_MASK));
2601         KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
2602             ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
2603         idx = ((unsigned long)item - (unsigned long)slabref->us_data)
2604             / keg->uk_rsize;
2605         refcnt = &slabref->us_freelist[idx].us_refcnt;
2606         return refcnt;
2607 }
2608
2609 /* See uma.h */
2610 void
2611 uma_reclaim(void)
2612 {
2613 #ifdef UMA_DEBUG
2614         printf("UMA: vm asked us to release pages!\n");
2615 #endif
2616         bucket_enable();
2617         zone_foreach(zone_drain);
2618         /*
2619          * Some slabs may have been freed but this zone will be visited early
2620          * we visit again so that we can free pages that are empty once other
2621          * zones are drained.  We have to do the same for buckets.
2622          */
2623         zone_drain(slabzone);
2624         zone_drain(slabrefzone);
2625         bucket_zone_drain();
2626 }
2627
2628 void *
2629 uma_large_malloc(int size, int wait)
2630 {
2631         void *mem;
2632         uma_slab_t slab;
2633         u_int8_t flags;
2634
2635         slab = uma_zalloc_internal(slabzone, NULL, wait);
2636         if (slab == NULL)
2637                 return (NULL);
2638         mem = page_alloc(NULL, size, &flags, wait);
2639         if (mem) {
2640                 vsetslab((vm_offset_t)mem, slab);
2641                 slab->us_data = mem;
2642                 slab->us_flags = flags | UMA_SLAB_MALLOC;
2643                 slab->us_size = size;
2644         } else {
2645                 uma_zfree_internal(slabzone, slab, NULL, 0);
2646         }
2647
2648         return (mem);
2649 }
2650
2651 void
2652 uma_large_free(uma_slab_t slab)
2653 {
2654         vsetobj((vm_offset_t)slab->us_data, kmem_object);
2655         page_free(slab->us_data, slab->us_size, slab->us_flags);
2656         uma_zfree_internal(slabzone, slab, NULL, 0);
2657 }
2658
2659 void
2660 uma_print_stats(void)
2661 {
2662         zone_foreach(uma_print_zone);
2663 }
2664
2665 static void
2666 slab_print(uma_slab_t slab)
2667 {
2668         printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
2669                 slab->us_keg, slab->us_data, slab->us_freecount,
2670                 slab->us_firstfree);
2671 }
2672
2673 static void
2674 cache_print(uma_cache_t cache)
2675 {
2676         printf("alloc: %p(%d), free: %p(%d)\n",
2677                 cache->uc_allocbucket,
2678                 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2679                 cache->uc_freebucket,
2680                 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2681 }
2682
2683 void
2684 uma_print_zone(uma_zone_t zone)
2685 {
2686         uma_cache_t cache;
2687         uma_keg_t keg;
2688         uma_slab_t slab;
2689         int i;
2690
2691         keg = zone->uz_keg;
2692         printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2693             zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
2694             keg->uk_ipers, keg->uk_ppera,
2695             (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
2696         printf("Part slabs:\n");
2697         LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
2698                 slab_print(slab);
2699         printf("Free slabs:\n");
2700         LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
2701                 slab_print(slab);
2702         printf("Full slabs:\n");
2703         LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
2704                 slab_print(slab);
2705         for (i = 0; i <= mp_maxid; i++) {
2706                 if (CPU_ABSENT(i))
2707                         continue;
2708                 cache = &zone->uz_cpu[i];
2709                 printf("CPU %d Cache:\n", i);
2710                 cache_print(cache);
2711         }
2712 }
2713
2714 /*
2715  * Sysctl handler for vm.zone
2716  *
2717  * stolen from vm_zone.c
2718  */
2719 static int
2720 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2721 {
2722         int error, len, cnt;
2723         const int linesize = 128;       /* conservative */
2724         int totalfree;
2725         char *tmpbuf, *offset;
2726         uma_zone_t z;
2727         uma_keg_t zk;
2728         char *p;
2729         int cpu;
2730         int cachefree;
2731         uma_bucket_t bucket;
2732         uma_cache_t cache;
2733
2734         cnt = 0;
2735         mtx_lock(&uma_mtx);
2736         LIST_FOREACH(zk, &uma_kegs, uk_link) {
2737                 LIST_FOREACH(z, &zk->uk_zones, uz_link)
2738                         cnt++;
2739         }
2740         mtx_unlock(&uma_mtx);
2741         MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2742                         M_TEMP, M_WAITOK);
2743         len = snprintf(tmpbuf, linesize,
2744             "\nITEM            SIZE     LIMIT     USED    FREE  REQUESTS\n\n");
2745         if (cnt == 0)
2746                 tmpbuf[len - 1] = '\0';
2747         error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2748         if (error || cnt == 0)
2749                 goto out;
2750         offset = tmpbuf;
2751         mtx_lock(&uma_mtx);
2752         LIST_FOREACH(zk, &uma_kegs, uk_link) {
2753           LIST_FOREACH(z, &zk->uk_zones, uz_link) {
2754                 if (cnt == 0)   /* list may have changed size */
2755                         break;
2756                 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
2757                         for (cpu = 0; cpu <= mp_maxid; cpu++) {
2758                                 if (CPU_ABSENT(cpu))
2759                                         continue;
2760                                 CPU_LOCK(cpu);
2761                         }
2762                 }
2763                 ZONE_LOCK(z);
2764                 cachefree = 0;
2765                 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
2766                         for (cpu = 0; cpu <= mp_maxid; cpu++) {
2767                                 if (CPU_ABSENT(cpu))
2768                                         continue;
2769                                 cache = &z->uz_cpu[cpu];
2770                                 if (cache->uc_allocbucket != NULL)
2771                                         cachefree += cache->uc_allocbucket->ub_cnt;
2772                                 if (cache->uc_freebucket != NULL)
2773                                         cachefree += cache->uc_freebucket->ub_cnt;
2774                                 CPU_UNLOCK(cpu);
2775                         }
2776                 }
2777                 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
2778                         cachefree += bucket->ub_cnt;
2779                 }
2780                 totalfree = zk->uk_free + cachefree;
2781                 len = snprintf(offset, linesize,
2782                     "%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2783                     z->uz_name, zk->uk_size,
2784                     zk->uk_maxpages * zk->uk_ipers,
2785                     (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
2786                     totalfree,
2787                     (unsigned long long)z->uz_allocs);
2788                 ZONE_UNLOCK(z);
2789                 for (p = offset + 12; p > offset && *p == ' '; --p)
2790                         /* nothing */ ;
2791                 p[1] = ':';
2792                 cnt--;
2793                 offset += len;
2794           }
2795         }
2796         mtx_unlock(&uma_mtx);
2797         *offset++ = '\0';
2798         error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2799 out:
2800         FREE(tmpbuf, M_TEMP);
2801         return (error);
2802 }