sys/vm/uma_core.c

   1 /*-
   2  * Copyright (c) 2004, 2005,
   3  *     Bosko Milekic <bmilekic@freebsd.org>
   4  * Copyright (c) 2002, 2003, 2004, 2005,
   5  *     Jeffrey Roberson <jeff@freebsd.org>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice unmodified, this list of conditions, and the following
  12  *    disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28
  29 /*
  30  * uma_core.c  Implementation of the Universal Memory allocator
  31  *
  32  * This allocator is intended to replace the multitude of similar object caches
  33  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
  34  * effecient.  A primary design goal is to return unused memory to the rest of
  35  * the system.  This will make the system as a whole more flexible due to the
  36  * ability to move memory to subsystems which most need it instead of leaving
  37  * pools of reserved memory unused.
  38  *
  39  * The basic ideas stem from similar slab/zone based allocators whose algorithms
  40  * are well known.
  41  *
  42  */
  43
  44 /*
  45  * TODO:
  46  *      - Improve memory usage for large allocations
  47  *      - Investigate cache size adjustments
  48  */
  49
  50 #include <sys/cdefs.h>
  51 __FBSDID("$FreeBSD$");
  52
  53 /* I should really use ktr.. */
  54 /*
  55 #define UMA_DEBUG 1
  56 #define UMA_DEBUG_ALLOC 1
  57 #define UMA_DEBUG_ALLOC_1 1
  58 */
  59
  60 #include "opt_param.h"
  61 #include <sys/param.h>
  62 #include <sys/systm.h>
  63 #include <sys/kernel.h>
  64 #include <sys/types.h>
  65 #include <sys/queue.h>
  66 #include <sys/malloc.h>
  67 #include <sys/ktr.h>
  68 #include <sys/lock.h>
  69 #include <sys/sysctl.h>
  70 #include <sys/mutex.h>
  71 #include <sys/proc.h>
  72 #include <sys/smp.h>
  73 #include <sys/vmmeter.h>
  74
  75 #include <vm/vm.h>
  76 #include <vm/vm_object.h>
  77 #include <vm/vm_page.h>
  78 #include <vm/vm_param.h>
  79 #include <vm/vm_map.h>
  80 #include <vm/vm_kern.h>
  81 #include <vm/vm_extern.h>
  82 #include <vm/uma.h>
  83 #include <vm/uma_int.h>
  84 #include <vm/uma_dbg.h>
  85
  86 #include <machine/vmparam.h>
  87
  88 /*
  89  * This is the zone and keg from which all zones are spawned.  The idea is that
  90  * even the zone & keg heads are allocated from the allocator, so we use the
  91  * bss section to bootstrap us.
  92  */
  93 static struct uma_keg masterkeg;
  94 static struct uma_zone masterzone_k;
  95 static struct uma_zone masterzone_z;
  96 static uma_zone_t kegs = &masterzone_k;
  97 static uma_zone_t zones = &masterzone_z;
  98
  99 /* This is the zone from which all of uma_slab_t's are allocated. */
 100 static uma_zone_t slabzone;
 101 static uma_zone_t slabrefzone;  /* With refcounters (for UMA_ZONE_REFCNT) */
 102
 103 /*
 104  * The initial hash tables come out of this zone so they can be allocated
 105  * prior to malloc coming up.
 106  */
 107 static uma_zone_t hashzone;
 108
 109 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
 110
 111 /*
 112  * Are we allowed to allocate buckets?
 113  */
 114 static int bucketdisable = 1;
 115
 116 /* Linked list of all kegs in the system */
 117 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
 118
 119 /* This mutex protects the keg list */
 120 static struct mtx uma_mtx;
 121
 122 /* These are the pcpu cache locks */
 123 static struct mtx uma_pcpu_mtx[MAXCPU];
 124
 125 /* Linked list of boot time pages */
 126 static LIST_HEAD(,uma_slab) uma_boot_pages =
 127     LIST_HEAD_INITIALIZER(&uma_boot_pages);
 128
 129 /* Count of free boottime pages */
 130 static int uma_boot_free = 0;
 131
 132 /* Is the VM done starting up? */
 133 static int booted = 0;
 134
 135 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
 136 static u_int uma_max_ipers;
 137 static u_int uma_max_ipers_ref;
 138
 139 /*
 140  * This is the handle used to schedule events that need to happen
 141  * outside of the allocation fast path.
 142  */
 143 static struct callout uma_callout;
 144 #define UMA_TIMEOUT     20              /* Seconds for callout interval. */
 145
 146 /*
 147  * This structure is passed as the zone ctor arg so that I don't have to create
 148  * a special allocation function just for zones.
 149  */
 150 struct uma_zctor_args {
 151         char *name;
 152         size_t size;
 153         uma_ctor ctor;
 154         uma_dtor dtor;
 155         uma_init uminit;
 156         uma_fini fini;
 157         uma_keg_t keg;
 158         int align;
 159         u_int16_t flags;
 160 };
 161
 162 struct uma_kctor_args {
 163         uma_zone_t zone;
 164         size_t size;
 165         uma_init uminit;
 166         uma_fini fini;
 167         int align;
 168         u_int16_t flags;
 169 };
 170
 171 struct uma_bucket_zone {
 172         uma_zone_t      ubz_zone;
 173         char            *ubz_name;
 174         int             ubz_entries;
 175 };
 176
 177 #define BUCKET_MAX      128
 178
 179 struct uma_bucket_zone bucket_zones[] = {
 180         { NULL, "16 Bucket", 16 },
 181         { NULL, "32 Bucket", 32 },
 182         { NULL, "64 Bucket", 64 },
 183         { NULL, "128 Bucket", 128 },
 184         { NULL, NULL, 0}
 185 };
 186
 187 #define BUCKET_SHIFT    4
 188 #define BUCKET_ZONES    ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
 189
 190 /*
 191  * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
 192  * of approximately the right size.
 193  */
 194 static uint8_t bucket_size[BUCKET_ZONES];
 195
 196 enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
 197
 198 /* Prototypes.. */
 199
 200 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
 201 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
 202 static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
 203 static void page_free(void *, int, u_int8_t);
 204 static uma_slab_t slab_zalloc(uma_zone_t, int);
 205 static void cache_drain(uma_zone_t);
 206 static void bucket_drain(uma_zone_t, uma_bucket_t);
 207 static void bucket_cache_drain(uma_zone_t zone);
 208 static int keg_ctor(void *, int, void *, int);
 209 static void keg_dtor(void *, int, void *);
 210 static int zone_ctor(void *, int, void *, int);
 211 static void zone_dtor(void *, int, void *);
 212 static int zero_init(void *, int, int);
 213 static void zone_small_init(uma_zone_t zone);
 214 static void zone_large_init(uma_zone_t zone);
 215 static void zone_foreach(void (*zfunc)(uma_zone_t));
 216 static void zone_timeout(uma_zone_t zone);
 217 static int hash_alloc(struct uma_hash *);
 218 static int hash_expand(struct uma_hash *, struct uma_hash *);
 219 static void hash_free(struct uma_hash *hash);
 220 static void uma_timeout(void *);
 221 static void uma_startup3(void);
 222 static void *uma_zalloc_internal(uma_zone_t, void *, int);
 223 static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip);
 224 static void bucket_enable(void);
 225 static void bucket_init(void);
 226 static uma_bucket_t bucket_alloc(int, int);
 227 static void bucket_free(uma_bucket_t);
 228 static void bucket_zone_drain(void);
 229 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
 230 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
 231 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
 232 static void zone_drain(uma_zone_t);
 233 static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
 234     uma_fini fini, int align, u_int16_t flags);
 235
 236 void uma_print_zone(uma_zone_t);
 237 void uma_print_stats(void);
 238 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
 239
 240 #ifdef WITNESS
 241 static int nosleepwithlocks = 1;
 242 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
 243     0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
 244 #else
 245 static int nosleepwithlocks = 0;
 246 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
 247     0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
 248 #endif
 249 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
 250     NULL, 0, sysctl_vm_zone, "A", "Zone Info");
 251 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
 252
 253 /*
 254  * This routine checks to see whether or not it's safe to enable buckets.
 255  */
 256
 257 static void
 258 bucket_enable(void)
 259 {
 260         if (cnt.v_free_count < cnt.v_free_min)
 261                 bucketdisable = 1;
 262         else
 263                 bucketdisable = 0;
 264 }
 265
 266 /*
 267  * Initialize bucket_zones, the array of zones of buckets of various sizes.
 268  *
 269  * For each zone, calculate the memory required for each bucket, consisting
 270  * of the header and an array of pointers.  Initialize bucket_size[] to point
 271  * the range of appropriate bucket sizes at the zone.
 272  */
 273 static void
 274 bucket_init(void)
 275 {
 276         struct uma_bucket_zone *ubz;
 277         int i;
 278         int j;
 279
 280         for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
 281                 int size;
 282
 283                 ubz = &bucket_zones[j];
 284                 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
 285                 size += sizeof(void *) * ubz->ubz_entries;
 286                 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
 287                     NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 288                 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
 289                         bucket_size[i >> BUCKET_SHIFT] = j;
 290         }
 291 }
 292
 293 /*
 294  * Given a desired number of entries for a bucket, return the zone from which
 295  * to allocate the bucket.
 296  */
 297 static struct uma_bucket_zone *
 298 bucket_zone_lookup(int entries)
 299 {
 300         int idx;
 301
 302         idx = howmany(entries, 1 << BUCKET_SHIFT);
 303         return (&bucket_zones[bucket_size[idx]]);
 304 }
 305
 306 static uma_bucket_t
 307 bucket_alloc(int entries, int bflags)
 308 {
 309         struct uma_bucket_zone *ubz;
 310         uma_bucket_t bucket;
 311
 312         /*
 313          * This is to stop us from allocating per cpu buckets while we're
 314          * running out of UMA_BOOT_PAGES.  Otherwise, we would exhaust the
 315          * boot pages.  This also prevents us from allocating buckets in
 316          * low memory situations.
 317          */
 318         if (bucketdisable)
 319                 return (NULL);
 320
 321         ubz = bucket_zone_lookup(entries);
 322         bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
 323         if (bucket) {
 324 #ifdef INVARIANTS
 325                 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
 326 #endif
 327                 bucket->ub_cnt = 0;
 328                 bucket->ub_entries = ubz->ubz_entries;
 329         }
 330
 331         return (bucket);
 332 }
 333
 334 static void
 335 bucket_free(uma_bucket_t bucket)
 336 {
 337         struct uma_bucket_zone *ubz;
 338
 339         ubz = bucket_zone_lookup(bucket->ub_entries);
 340         uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE);
 341 }
 342
 343 static void
 344 bucket_zone_drain(void)
 345 {
 346         struct uma_bucket_zone *ubz;
 347
 348         for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
 349                 zone_drain(ubz->ubz_zone);
 350 }
 351
 352
 353 /*
 354  * Routine called by timeout which is used to fire off some time interval
 355  * based calculations.  (stats, hash size, etc.)
 356  *
 357  * Arguments:
 358  *      arg   Unused
 359  *
 360  * Returns:
 361  *      Nothing
 362  */
 363 static void
 364 uma_timeout(void *unused)
 365 {
 366         bucket_enable();
 367         zone_foreach(zone_timeout);
 368
 369         /* Reschedule this event */
 370         callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 371 }
 372
 373 /*
 374  * Routine to perform timeout driven calculations.  This expands the
 375  * hashes and does per cpu statistics aggregation.
 376  *
 377  *  Arguments:
 378  *      zone  The zone to operate on
 379  *
 380  *  Returns:
 381  *      Nothing
 382  */
 383 static void
 384 zone_timeout(uma_zone_t zone)
 385 {
 386         uma_keg_t keg;
 387         uma_cache_t cache;
 388         u_int64_t alloc;
 389         int cpu;
 390
 391         keg = zone->uz_keg;
 392         alloc = 0;
 393
 394         /*
 395          * Aggregate per cpu cache statistics back to the zone.
 396          *
 397          * XXX This should be done in the sysctl handler.
 398          *
 399          * I may rewrite this to set a flag in the per cpu cache instead of
 400          * locking.  If the flag is not cleared on the next round I will have
 401          * to lock and do it here instead so that the statistics don't get too
 402          * far out of sync.
 403          */
 404         if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) {
 405                 for (cpu = 0; cpu <= mp_maxid; cpu++) {
 406                         if (CPU_ABSENT(cpu))
 407                                 continue;
 408                         CPU_LOCK(cpu);
 409                         cache = &zone->uz_cpu[cpu];
 410                         /* Add them up, and reset */
 411                         alloc += cache->uc_allocs;
 412                         cache->uc_allocs = 0;
 413                         CPU_UNLOCK(cpu);
 414                 }
 415         }
 416
 417         /* Now push these stats back into the zone.. */
 418         ZONE_LOCK(zone);
 419         zone->uz_allocs += alloc;
 420
 421         /*
 422          * Expand the zone hash table.
 423          *
 424          * This is done if the number of slabs is larger than the hash size.
 425          * What I'm trying to do here is completely reduce collisions.  This
 426          * may be a little aggressive.  Should I allow for two collisions max?
 427          */
 428
 429         if (keg->uk_flags & UMA_ZONE_HASH &&
 430             keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
 431                 struct uma_hash newhash;
 432                 struct uma_hash oldhash;
 433                 int ret;
 434
 435                 /*
 436                  * This is so involved because allocating and freeing
 437                  * while the zone lock is held will lead to deadlock.
 438                  * I have to do everything in stages and check for
 439                  * races.
 440                  */
 441                 newhash = keg->uk_hash;
 442                 ZONE_UNLOCK(zone);
 443                 ret = hash_alloc(&newhash);
 444                 ZONE_LOCK(zone);
 445                 if (ret) {
 446                         if (hash_expand(&keg->uk_hash, &newhash)) {
 447                                 oldhash = keg->uk_hash;
 448                                 keg->uk_hash = newhash;
 449                         } else
 450                                 oldhash = newhash;
 451
 452                         ZONE_UNLOCK(zone);
 453                         hash_free(&oldhash);
 454                         ZONE_LOCK(zone);
 455                 }
 456         }
 457         ZONE_UNLOCK(zone);
 458 }
 459
 460 /*
 461  * Allocate and zero fill the next sized hash table from the appropriate
 462  * backing store.
 463  *
 464  * Arguments:
 465  *      hash  A new hash structure with the old hash size in uh_hashsize
 466  *
 467  * Returns:
 468  *      1 on sucess and 0 on failure.
 469  */
 470 static int
 471 hash_alloc(struct uma_hash *hash)
 472 {
 473         int oldsize;
 474         int alloc;
 475
 476         oldsize = hash->uh_hashsize;
 477
 478         /* We're just going to go to a power of two greater */
 479         if (oldsize)  {
 480                 hash->uh_hashsize = oldsize * 2;
 481                 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
 482                 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
 483                     M_UMAHASH, M_NOWAIT);
 484         } else {
 485                 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
 486                 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
 487                     M_WAITOK);
 488                 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
 489         }
 490         if (hash->uh_slab_hash) {
 491                 bzero(hash->uh_slab_hash, alloc);
 492                 hash->uh_hashmask = hash->uh_hashsize - 1;
 493                 return (1);
 494         }
 495
 496         return (0);
 497 }
 498
 499 /*
 500  * Expands the hash table for HASH zones.  This is done from zone_timeout
 501  * to reduce collisions.  This must not be done in the regular allocation
 502  * path, otherwise, we can recurse on the vm while allocating pages.
 503  *
 504  * Arguments:
 505  *      oldhash  The hash you want to expand
 506  *      newhash  The hash structure for the new table
 507  *
 508  * Returns:
 509  *      Nothing
 510  *
 511  * Discussion:
 512  */
 513 static int
 514 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
 515 {
 516         uma_slab_t slab;
 517         int hval;
 518         int i;
 519
 520         if (!newhash->uh_slab_hash)
 521                 return (0);
 522
 523         if (oldhash->uh_hashsize >= newhash->uh_hashsize)
 524                 return (0);
 525
 526         /*
 527          * I need to investigate hash algorithms for resizing without a
 528          * full rehash.
 529          */
 530
 531         for (i = 0; i < oldhash->uh_hashsize; i++)
 532                 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
 533                         slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
 534                         SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
 535                         hval = UMA_HASH(newhash, slab->us_data);
 536                         SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
 537                             slab, us_hlink);
 538                 }
 539
 540         return (1);
 541 }
 542
 543 /*
 544  * Free the hash bucket to the appropriate backing store.
 545  *
 546  * Arguments:
 547  *      slab_hash  The hash bucket we're freeing
 548  *      hashsize   The number of entries in that hash bucket
 549  *
 550  * Returns:
 551  *      Nothing
 552  */
 553 static void
 554 hash_free(struct uma_hash *hash)
 555 {
 556         if (hash->uh_slab_hash == NULL)
 557                 return;
 558         if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
 559                 uma_zfree_internal(hashzone,
 560                     hash->uh_slab_hash, NULL, SKIP_NONE);
 561         else
 562                 free(hash->uh_slab_hash, M_UMAHASH);
 563 }
 564
 565 /*
 566  * Frees all outstanding items in a bucket
 567  *
 568  * Arguments:
 569  *      zone   The zone to free to, must be unlocked.
 570  *      bucket The free/alloc bucket with items, cpu queue must be locked.
 571  *
 572  * Returns:
 573  *      Nothing
 574  */
 575
 576 static void
 577 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
 578 {
 579         uma_slab_t slab;
 580         int mzone;
 581         void *item;
 582
 583         if (bucket == NULL)
 584                 return;
 585
 586         slab = NULL;
 587         mzone = 0;
 588
 589         /* We have to lookup the slab again for malloc.. */
 590         if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
 591                 mzone = 1;
 592
 593         while (bucket->ub_cnt > 0)  {
 594                 bucket->ub_cnt--;
 595                 item = bucket->ub_bucket[bucket->ub_cnt];
 596 #ifdef INVARIANTS
 597                 bucket->ub_bucket[bucket->ub_cnt] = NULL;
 598                 KASSERT(item != NULL,
 599                     ("bucket_drain: botched ptr, item is NULL"));
 600 #endif
 601                 /*
 602                  * This is extremely inefficient.  The slab pointer was passed
 603                  * to uma_zfree_arg, but we lost it because the buckets don't
 604                  * hold them.  This will go away when free() gets a size passed
 605                  * to it.
 606                  */
 607                 if (mzone)
 608                         slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
 609                 uma_zfree_internal(zone, item, slab, SKIP_DTOR);
 610         }
 611 }
 612
 613 /*
 614  * Drains the per cpu caches for a zone.
 615  *
 616  * Arguments:
 617  *      zone     The zone to drain, must be unlocked.
 618  *
 619  * Returns:
 620  *      Nothing
 621  */
 622 static void
 623 cache_drain(uma_zone_t zone)
 624 {
 625         uma_cache_t cache;
 626         int cpu;
 627
 628         /*
 629          * We have to lock each cpu cache before locking the zone
 630          */
 631         for (cpu = 0; cpu <= mp_maxid; cpu++) {
 632                 if (CPU_ABSENT(cpu))
 633                         continue;
 634                 CPU_LOCK(cpu);
 635                 cache = &zone->uz_cpu[cpu];
 636                 bucket_drain(zone, cache->uc_allocbucket);
 637                 bucket_drain(zone, cache->uc_freebucket);
 638                 if (cache->uc_allocbucket != NULL)
 639                         bucket_free(cache->uc_allocbucket);
 640                 if (cache->uc_freebucket != NULL)
 641                         bucket_free(cache->uc_freebucket);
 642                 cache->uc_allocbucket = cache->uc_freebucket = NULL;
 643         }
 644         ZONE_LOCK(zone);
 645         bucket_cache_drain(zone);
 646         ZONE_UNLOCK(zone);
 647         for (cpu = 0; cpu <= mp_maxid; cpu++) {
 648                 if (CPU_ABSENT(cpu))
 649                         continue;
 650                 CPU_UNLOCK(cpu);
 651         }
 652 }
 653
 654 /*
 655  * Drain the cached buckets from a zone.  Expects a locked zone on entry.
 656  */
 657 static void
 658 bucket_cache_drain(uma_zone_t zone)
 659 {
 660         uma_bucket_t bucket;
 661
 662         /*
 663          * Drain the bucket queues and free the buckets, we just keep two per
 664          * cpu (alloc/free).
 665          */
 666         while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
 667                 LIST_REMOVE(bucket, ub_link);
 668                 ZONE_UNLOCK(zone);
 669                 bucket_drain(zone, bucket);
 670                 bucket_free(bucket);
 671                 ZONE_LOCK(zone);
 672         }
 673
 674         /* Now we do the free queue.. */
 675         while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
 676                 LIST_REMOVE(bucket, ub_link);
 677                 bucket_free(bucket);
 678         }
 679 }
 680
 681 /*
 682  * Frees pages from a zone back to the system.  This is done on demand from
 683  * the pageout daemon.
 684  *
 685  * Arguments:
 686  *      zone  The zone to free pages from
 687  *       all  Should we drain all items?
 688  *
 689  * Returns:
 690  *      Nothing.
 691  */
 692 static void
 693 zone_drain(uma_zone_t zone)
 694 {
 695         struct slabhead freeslabs = { 0 };
 696         uma_keg_t keg;
 697         uma_slab_t slab;
 698         uma_slab_t n;
 699         u_int8_t flags;
 700         u_int8_t *mem;
 701         int i;
 702
 703         keg = zone->uz_keg;
 704
 705         /*
 706          * We don't want to take pages from statically allocated zones at this
 707          * time
 708          */
 709         if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
 710                 return;
 711
 712         ZONE_LOCK(zone);
 713
 714 #ifdef UMA_DEBUG
 715         printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
 716 #endif
 717         bucket_cache_drain(zone);
 718         if (keg->uk_free == 0)
 719                 goto finished;
 720
 721         slab = LIST_FIRST(&keg->uk_free_slab);
 722         while (slab) {
 723                 n = LIST_NEXT(slab, us_link);
 724
 725                 /* We have no where to free these to */
 726                 if (slab->us_flags & UMA_SLAB_BOOT) {
 727                         slab = n;
 728                         continue;
 729                 }
 730
 731                 LIST_REMOVE(slab, us_link);
 732                 keg->uk_pages -= keg->uk_ppera;
 733                 keg->uk_free -= keg->uk_ipers;
 734
 735                 if (keg->uk_flags & UMA_ZONE_HASH)
 736                         UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
 737
 738                 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
 739
 740                 slab = n;
 741         }
 742 finished:
 743         ZONE_UNLOCK(zone);
 744
 745         while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
 746                 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
 747                 if (keg->uk_fini)
 748                         for (i = 0; i < keg->uk_ipers; i++)
 749                                 keg->uk_fini(
 750                                     slab->us_data + (keg->uk_rsize * i),
 751                                     keg->uk_size);
 752                 flags = slab->us_flags;
 753                 mem = slab->us_data;
 754
 755                 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
 756                     (keg->uk_flags & UMA_ZONE_REFCNT)) {
 757                         vm_object_t obj;
 758
 759                         if (flags & UMA_SLAB_KMEM)
 760                                 obj = kmem_object;
 761                         else
 762                                 obj = NULL;
 763                         for (i = 0; i < keg->uk_ppera; i++)
 764                                 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
 765                                     obj);
 766                 }
 767                 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 768                         uma_zfree_internal(keg->uk_slabzone, slab, NULL,
 769                             SKIP_NONE);
 770 #ifdef UMA_DEBUG
 771                 printf("%s: Returning %d bytes.\n",
 772                     zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
 773 #endif
 774                 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
 775         }
 776 }
 777
 778 /*
 779  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
 780  *
 781  * Arguments:
 782  *      zone  The zone to allocate slabs for
 783  *      wait  Shall we wait?
 784  *
 785  * Returns:
 786  *      The slab that was allocated or NULL if there is no memory and the
 787  *      caller specified M_NOWAIT.
 788  */
 789 static uma_slab_t
 790 slab_zalloc(uma_zone_t zone, int wait)
 791 {
 792         uma_slabrefcnt_t slabref;
 793         uma_slab_t slab;
 794         uma_keg_t keg;
 795         u_int8_t *mem;
 796         u_int8_t flags;
 797         int i;
 798
 799         slab = NULL;
 800         keg = zone->uz_keg;
 801
 802 #ifdef UMA_DEBUG
 803         printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
 804 #endif
 805         ZONE_UNLOCK(zone);
 806
 807         if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
 808                 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
 809                 if (slab == NULL) {
 810                         ZONE_LOCK(zone);
 811                         return NULL;
 812                 }
 813         }
 814
 815         /*
 816          * This reproduces the old vm_zone behavior of zero filling pages the
 817          * first time they are added to a zone.
 818          *
 819          * Malloced items are zeroed in uma_zalloc.
 820          */
 821
 822         if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
 823                 wait |= M_ZERO;
 824         else
 825                 wait &= ~M_ZERO;
 826
 827         mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
 828             &flags, wait);
 829         if (mem == NULL) {
 830                 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 831                         uma_zfree_internal(keg->uk_slabzone, slab, NULL, 0);
 832                 ZONE_LOCK(zone);
 833                 return (NULL);
 834         }
 835
 836         /* Point the slab into the allocated memory */
 837         if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
 838                 slab = (uma_slab_t )(mem + keg->uk_pgoff);
 839
 840         if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
 841             (keg->uk_flags & UMA_ZONE_REFCNT))
 842                 for (i = 0; i < keg->uk_ppera; i++)
 843                         vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
 844
 845         slab->us_keg = keg;
 846         slab->us_data = mem;
 847         slab->us_freecount = keg->uk_ipers;
 848         slab->us_firstfree = 0;
 849         slab->us_flags = flags;
 850
 851         if (keg->uk_flags & UMA_ZONE_REFCNT) {
 852                 slabref = (uma_slabrefcnt_t)slab;
 853                 for (i = 0; i < keg->uk_ipers; i++) {
 854                         slabref->us_freelist[i].us_refcnt = 0;
 855                         slabref->us_freelist[i].us_item = i+1;
 856                 }
 857         } else {
 858                 for (i = 0; i < keg->uk_ipers; i++)
 859                         slab->us_freelist[i].us_item = i+1;
 860         }
 861
 862         if (keg->uk_init != NULL) {
 863                 for (i = 0; i < keg->uk_ipers; i++)
 864                         if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
 865                             keg->uk_size, wait) != 0)
 866                                 break;
 867                 if (i != keg->uk_ipers) {
 868                         if (keg->uk_fini != NULL) {
 869                                 for (i--; i > -1; i--)
 870                                         keg->uk_fini(slab->us_data +
 871                                             (keg->uk_rsize * i),
 872                                             keg->uk_size);
 873                         }
 874                         if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
 875                             (keg->uk_flags & UMA_ZONE_REFCNT))
 876                                 for (i = 0; i < keg->uk_ppera; i++)
 877                                         vsetobj((vm_offset_t)mem +
 878                                             (i * PAGE_SIZE), NULL);
 879                         if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 880                                 uma_zfree_internal(keg->uk_slabzone, slab,
 881                                     NULL, SKIP_NONE);
 882                         keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
 883                             flags);
 884                         ZONE_LOCK(zone);
 885                         return (NULL);
 886                 }
 887         }
 888         ZONE_LOCK(zone);
 889
 890         if (keg->uk_flags & UMA_ZONE_HASH)
 891                 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
 892
 893         keg->uk_pages += keg->uk_ppera;
 894         keg->uk_free += keg->uk_ipers;
 895
 896         return (slab);
 897 }
 898
 899 /*
 900  * This function is intended to be used early on in place of page_alloc() so
 901  * that we may use the boot time page cache to satisfy allocations before
 902  * the VM is ready.
 903  */
 904 static void *
 905 startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
 906 {
 907         uma_keg_t keg;
 908
 909         keg = zone->uz_keg;
 910
 911         /*
 912          * Check our small startup cache to see if it has pages remaining.
 913          */
 914         mtx_lock(&uma_mtx);
 915         if (uma_boot_free != 0) {
 916                 uma_slab_t tmps;
 917
 918                 tmps = LIST_FIRST(&uma_boot_pages);
 919                 LIST_REMOVE(tmps, us_link);
 920                 uma_boot_free--;
 921                 mtx_unlock(&uma_mtx);
 922                 *pflag = tmps->us_flags;
 923                 return (tmps->us_data);
 924         }
 925         mtx_unlock(&uma_mtx);
 926         if (booted == 0)
 927                 panic("UMA: Increase UMA_BOOT_PAGES");
 928         /*
 929          * Now that we've booted reset these users to their real allocator.
 930          */
 931 #ifdef UMA_MD_SMALL_ALLOC
 932         keg->uk_allocf = uma_small_alloc;
 933 #else
 934         keg->uk_allocf = page_alloc;
 935 #endif
 936         return keg->uk_allocf(zone, bytes, pflag, wait);
 937 }
 938
 939 /*
 940  * Allocates a number of pages from the system
 941  *
 942  * Arguments:
 943  *      zone  Unused
 944  *      bytes  The number of bytes requested
 945  *      wait  Shall we wait?
 946  *
 947  * Returns:
 948  *      A pointer to the alloced memory or possibly
 949  *      NULL if M_NOWAIT is set.
 950  */
 951 static void *
 952 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
 953 {
 954         void *p;        /* Returned page */
 955
 956         *pflag = UMA_SLAB_KMEM;
 957         p = (void *) kmem_malloc(kmem_map, bytes, wait);
 958
 959         return (p);
 960 }
 961
 962 /*
 963  * Allocates a number of pages from within an object
 964  *
 965  * Arguments:
 966  *      zone   Unused
 967  *      bytes  The number of bytes requested
 968  *      wait   Shall we wait?
 969  *
 970  * Returns:
 971  *      A pointer to the alloced memory or possibly
 972  *      NULL if M_NOWAIT is set.
 973  */
 974 static void *
 975 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
 976 {
 977         vm_object_t object;
 978         vm_offset_t retkva, zkva;
 979         vm_page_t p;
 980         int pages, startpages;
 981
 982         object = zone->uz_keg->uk_obj;
 983         retkva = 0;
 984
 985         /*
 986          * This looks a little weird since we're getting one page at a time.
 987          */
 988         VM_OBJECT_LOCK(object);
 989         p = TAILQ_LAST(&object->memq, pglist);
 990         pages = p != NULL ? p->pindex + 1 : 0;
 991         startpages = pages;
 992         zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
 993         for (; bytes > 0; bytes -= PAGE_SIZE) {
 994                 p = vm_page_alloc(object, pages,
 995                     VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
 996                 if (p == NULL) {
 997                         if (pages != startpages)
 998                                 pmap_qremove(retkva, pages - startpages);
 999                         while (pages != startpages) {
1000                                 pages--;
1001                                 p = TAILQ_LAST(&object->memq, pglist);
1002                                 vm_page_lock_queues();
1003                                 vm_page_unwire(p, 0);
1004                                 vm_page_free(p);
1005                                 vm_page_unlock_queues();
1006                         }
1007                         retkva = 0;
1008                         goto done;
1009                 }
1010                 pmap_qenter(zkva, &p, 1);
1011                 if (retkva == 0)
1012                         retkva = zkva;
1013                 zkva += PAGE_SIZE;
1014                 pages += 1;
1015         }
1016 done:
1017         VM_OBJECT_UNLOCK(object);
1018         *flags = UMA_SLAB_PRIV;
1019
1020         return ((void *)retkva);
1021 }
1022
1023 /*
1024  * Frees a number of pages to the system
1025  *
1026  * Arguments:
1027  *      mem   A pointer to the memory to be freed
1028  *      size  The size of the memory being freed
1029  *      flags The original p->us_flags field
1030  *
1031  * Returns:
1032  *      Nothing
1033  */
1034 static void
1035 page_free(void *mem, int size, u_int8_t flags)
1036 {
1037         vm_map_t map;
1038
1039         if (flags & UMA_SLAB_KMEM)
1040                 map = kmem_map;
1041         else
1042                 panic("UMA: page_free used with invalid flags %d\n", flags);
1043
1044         kmem_free(map, (vm_offset_t)mem, size);
1045 }
1046
1047 /*
1048  * Zero fill initializer
1049  *
1050  * Arguments/Returns follow uma_init specifications
1051  */
1052 static int
1053 zero_init(void *mem, int size, int flags)
1054 {
1055         bzero(mem, size);
1056         return (0);
1057 }
1058
1059 /*
1060  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
1061  *
1062  * Arguments
1063  *      zone  The zone we should initialize
1064  *
1065  * Returns
1066  *      Nothing
1067  */
1068 static void
1069 zone_small_init(uma_zone_t zone)
1070 {
1071         uma_keg_t keg;
1072         u_int rsize;
1073         u_int memused;
1074         u_int wastedspace;
1075         u_int shsize;
1076
1077         keg = zone->uz_keg;
1078         KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
1079         rsize = keg->uk_size;
1080
1081         if (rsize < UMA_SMALLEST_UNIT)
1082                 rsize = UMA_SMALLEST_UNIT;
1083         if (rsize & keg->uk_align)
1084                 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1085
1086         keg->uk_rsize = rsize;
1087         keg->uk_ppera = 1;
1088
1089         if (keg->uk_flags & UMA_ZONE_REFCNT) {
1090                 rsize += UMA_FRITMREF_SZ;       /* linkage & refcnt */
1091                 shsize = sizeof(struct uma_slab_refcnt);
1092         } else {
1093                 rsize += UMA_FRITM_SZ;  /* Account for linkage */
1094                 shsize = sizeof(struct uma_slab);
1095         }
1096
1097         keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1098         KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
1099         memused = keg->uk_ipers * rsize + shsize;
1100         wastedspace = UMA_SLAB_SIZE - memused;
1101
1102         /*
1103          * We can't do OFFPAGE if we're internal or if we've been
1104          * asked to not go to the VM for buckets.  If we do this we
1105          * may end up going to the VM (kmem_map) for slabs which we
1106          * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1107          * result of UMA_ZONE_VM, which clearly forbids it.
1108          */
1109         if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1110             (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1111                 return;
1112
1113         if ((wastedspace >= UMA_MAX_WASTE) &&
1114             (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1115                 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1116                 KASSERT(keg->uk_ipers <= 255,
1117                     ("zone_small_init: keg->uk_ipers too high!"));
1118 #ifdef UMA_DEBUG
1119                 printf("UMA decided we need offpage slab headers for "
1120                     "zone: %s, calculated wastedspace = %d, "
1121                     "maximum wasted space allowed = %d, "
1122                     "calculated ipers = %d, "
1123                     "new wasted space = %d\n", zone->uz_name, wastedspace,
1124                     UMA_MAX_WASTE, keg->uk_ipers,
1125                     UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1126 #endif
1127                 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1128                 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1129                         keg->uk_flags |= UMA_ZONE_HASH;
1130         }
1131 }
1132
1133 /*
1134  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do
1135  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
1136  * more complicated.
1137  *
1138  * Arguments
1139  *      zone  The zone we should initialize
1140  *
1141  * Returns
1142  *      Nothing
1143  */
1144 static void
1145 zone_large_init(uma_zone_t zone)
1146 {
1147         uma_keg_t keg;
1148         int pages;
1149
1150         keg = zone->uz_keg;
1151
1152         KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
1153         KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1154             ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
1155
1156         pages = keg->uk_size / UMA_SLAB_SIZE;
1157
1158         /* Account for remainder */
1159         if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1160                 pages++;
1161
1162         keg->uk_ppera = pages;
1163         keg->uk_ipers = 1;
1164
1165         keg->uk_flags |= UMA_ZONE_OFFPAGE;
1166         if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1167                 keg->uk_flags |= UMA_ZONE_HASH;
1168
1169         keg->uk_rsize = keg->uk_size;
1170 }
1171
1172 /*
1173  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1174  * the keg onto the global keg list.
1175  *
1176  * Arguments/Returns follow uma_ctor specifications
1177  *      udata  Actually uma_kctor_args
1178  */
1179 static int
1180 keg_ctor(void *mem, int size, void *udata, int flags)
1181 {
1182         struct uma_kctor_args *arg = udata;
1183         uma_keg_t keg = mem;
1184         uma_zone_t zone;
1185
1186         bzero(keg, size);
1187         keg->uk_size = arg->size;
1188         keg->uk_init = arg->uminit;
1189         keg->uk_fini = arg->fini;
1190         keg->uk_align = arg->align;
1191         keg->uk_free = 0;
1192         keg->uk_pages = 0;
1193         keg->uk_flags = arg->flags;
1194         keg->uk_allocf = page_alloc;
1195         keg->uk_freef = page_free;
1196         keg->uk_recurse = 0;
1197         keg->uk_slabzone = NULL;
1198
1199         /*
1200          * The master zone is passed to us at keg-creation time.
1201          */
1202         zone = arg->zone;
1203         zone->uz_keg = keg;
1204
1205         if (arg->flags & UMA_ZONE_VM)
1206                 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1207
1208         if (arg->flags & UMA_ZONE_ZINIT)
1209                 keg->uk_init = zero_init;
1210
1211         /*
1212          * The +UMA_FRITM_SZ added to uk_size is to account for the
1213          * linkage that is added to the size in zone_small_init().  If
1214          * we don't account for this here then we may end up in
1215          * zone_small_init() with a calculated 'ipers' of 0.
1216          */
1217         if (keg->uk_flags & UMA_ZONE_REFCNT) {
1218                 if ((keg->uk_size+UMA_FRITMREF_SZ) >
1219                     (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1220                         zone_large_init(zone);
1221                 else
1222                         zone_small_init(zone);
1223         } else {
1224                 if ((keg->uk_size+UMA_FRITM_SZ) >
1225                     (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1226                         zone_large_init(zone);
1227                 else
1228                         zone_small_init(zone);
1229         }
1230
1231         if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1232                 if (keg->uk_flags & UMA_ZONE_REFCNT)
1233                         keg->uk_slabzone = slabrefzone;
1234                 else
1235                         keg->uk_slabzone = slabzone;
1236         }
1237
1238         /*
1239          * If we haven't booted yet we need allocations to go through the
1240          * startup cache until the vm is ready.
1241          */
1242         if (keg->uk_ppera == 1) {
1243 #ifdef UMA_MD_SMALL_ALLOC
1244                 keg->uk_allocf = uma_small_alloc;
1245                 keg->uk_freef = uma_small_free;
1246 #endif
1247                 if (booted == 0)
1248                         keg->uk_allocf = startup_alloc;
1249         }
1250
1251         /*
1252          * Initialize keg's lock (shared among zones) through
1253          * Master zone
1254          */
1255         zone->uz_lock = &keg->uk_lock;
1256         if (arg->flags & UMA_ZONE_MTXCLASS)
1257                 ZONE_LOCK_INIT(zone, 1);
1258         else
1259                 ZONE_LOCK_INIT(zone, 0);
1260
1261         /*
1262          * If we're putting the slab header in the actual page we need to
1263          * figure out where in each page it goes.  This calculates a right
1264          * justified offset into the memory on an ALIGN_PTR boundary.
1265          */
1266         if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1267                 u_int totsize;
1268
1269                 /* Size of the slab struct and free list */
1270                 if (keg->uk_flags & UMA_ZONE_REFCNT)
1271                         totsize = sizeof(struct uma_slab_refcnt) +
1272                             keg->uk_ipers * UMA_FRITMREF_SZ;
1273                 else
1274                         totsize = sizeof(struct uma_slab) +
1275                             keg->uk_ipers * UMA_FRITM_SZ;
1276
1277                 if (totsize & UMA_ALIGN_PTR)
1278                         totsize = (totsize & ~UMA_ALIGN_PTR) +
1279                             (UMA_ALIGN_PTR + 1);
1280                 keg->uk_pgoff = UMA_SLAB_SIZE - totsize;
1281
1282                 if (keg->uk_flags & UMA_ZONE_REFCNT)
1283                         totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1284                             + keg->uk_ipers * UMA_FRITMREF_SZ;
1285                 else
1286                         totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1287                             + keg->uk_ipers * UMA_FRITM_SZ;
1288
1289                 /*
1290                  * The only way the following is possible is if with our
1291                  * UMA_ALIGN_PTR adjustments we are now bigger than
1292                  * UMA_SLAB_SIZE.  I haven't checked whether this is
1293                  * mathematically possible for all cases, so we make
1294                  * sure here anyway.
1295                  */
1296                 if (totsize > UMA_SLAB_SIZE) {
1297                         printf("zone %s ipers %d rsize %d size %d\n",
1298                             zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1299                             keg->uk_size);
1300                         panic("UMA slab won't fit.\n");
1301                 }
1302         }
1303
1304         if (keg->uk_flags & UMA_ZONE_HASH)
1305                 hash_alloc(&keg->uk_hash);
1306
1307 #ifdef UMA_DEBUG
1308         printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1309             zone->uz_name, zone,
1310             keg->uk_size, keg->uk_ipers,
1311             keg->uk_ppera, keg->uk_pgoff);
1312 #endif
1313
1314         LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1315
1316         mtx_lock(&uma_mtx);
1317         LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1318         mtx_unlock(&uma_mtx);
1319         return (0);
1320 }
1321
1322 /*
1323  * Zone header ctor.  This initializes all fields, locks, etc.
1324  *
1325  * Arguments/Returns follow uma_ctor specifications
1326  *      udata  Actually uma_zctor_args
1327  */
1328
1329 static int
1330 zone_ctor(void *mem, int size, void *udata, int flags)
1331 {
1332         struct uma_zctor_args *arg = udata;
1333         uma_zone_t zone = mem;
1334         uma_zone_t z;
1335         uma_keg_t keg;
1336
1337         bzero(zone, size);
1338         zone->uz_name = arg->name;
1339         zone->uz_ctor = arg->ctor;
1340         zone->uz_dtor = arg->dtor;
1341         zone->uz_init = NULL;
1342         zone->uz_fini = NULL;
1343         zone->uz_allocs = 0;
1344         zone->uz_fills = zone->uz_count = 0;
1345
1346         if (arg->flags & UMA_ZONE_SECONDARY) {
1347                 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1348                 keg = arg->keg;
1349                 zone->uz_keg = keg;
1350                 zone->uz_init = arg->uminit;
1351                 zone->uz_fini = arg->fini;
1352                 zone->uz_lock = &keg->uk_lock;
1353                 mtx_lock(&uma_mtx);
1354                 ZONE_LOCK(zone);
1355                 keg->uk_flags |= UMA_ZONE_SECONDARY;
1356                 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1357                         if (LIST_NEXT(z, uz_link) == NULL) {
1358                                 LIST_INSERT_AFTER(z, zone, uz_link);
1359                                 break;
1360                         }
1361                 }
1362                 ZONE_UNLOCK(zone);
1363                 mtx_unlock(&uma_mtx);
1364         } else if (arg->keg == NULL) {
1365                 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1366                     arg->align, arg->flags) == NULL)
1367                         return (ENOMEM);
1368         } else {
1369                 struct uma_kctor_args karg;
1370                 int error;
1371
1372                 /* We should only be here from uma_startup() */
1373                 karg.size = arg->size;
1374                 karg.uminit = arg->uminit;
1375                 karg.fini = arg->fini;
1376                 karg.align = arg->align;
1377                 karg.flags = arg->flags;
1378                 karg.zone = zone;
1379                 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1380                     flags);
1381                 if (error)
1382                         return (error);
1383         }
1384         keg = zone->uz_keg;
1385         zone->uz_lock = &keg->uk_lock;
1386
1387         /*
1388          * Some internal zones don't have room allocated for the per cpu
1389          * caches.  If we're internal, bail out here.
1390          */
1391         if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1392                 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
1393                     ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1394                 return (0);
1395         }
1396
1397         if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1398                 zone->uz_count = BUCKET_MAX;
1399         else if (keg->uk_ipers <= BUCKET_MAX)
1400                 zone->uz_count = keg->uk_ipers;
1401         else
1402                 zone->uz_count = BUCKET_MAX;
1403         return (0);
1404 }
1405
1406 /*
1407  * Keg header dtor.  This frees all data, destroys locks, frees the hash
1408  * table and removes the keg from the global list.
1409  *
1410  * Arguments/Returns follow uma_dtor specifications
1411  *      udata  unused
1412  */
1413 static void
1414 keg_dtor(void *arg, int size, void *udata)
1415 {
1416         uma_keg_t keg;
1417
1418         keg = (uma_keg_t)arg;
1419         mtx_lock(&keg->uk_lock);
1420         if (keg->uk_free != 0) {
1421                 printf("Freed UMA keg was not empty (%d items). "
1422                     " Lost %d pages of memory.\n",
1423                     keg->uk_free, keg->uk_pages);
1424         }
1425         mtx_unlock(&keg->uk_lock);
1426
1427         if (keg->uk_flags & UMA_ZONE_HASH)
1428                 hash_free(&keg->uk_hash);
1429
1430         mtx_destroy(&keg->uk_lock);
1431 }
1432
1433 /*
1434  * Zone header dtor.
1435  *
1436  * Arguments/Returns follow uma_dtor specifications
1437  *      udata  unused
1438  */
1439 static void
1440 zone_dtor(void *arg, int size, void *udata)
1441 {
1442         uma_zone_t zone;
1443         uma_keg_t keg;
1444
1445         zone = (uma_zone_t)arg;
1446         keg = zone->uz_keg;
1447
1448         if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1449                 cache_drain(zone);
1450
1451         mtx_lock(&uma_mtx);
1452         zone_drain(zone);
1453         if (keg->uk_flags & UMA_ZONE_SECONDARY) {
1454                 LIST_REMOVE(zone, uz_link);
1455                 /*
1456                  * XXX there are some races here where
1457                  * the zone can be drained but zone lock
1458                  * released and then refilled before we
1459                  * remove it... we dont care for now
1460                  */
1461                 ZONE_LOCK(zone);
1462                 if (LIST_EMPTY(&keg->uk_zones))
1463                         keg->uk_flags &= ~UMA_ZONE_SECONDARY;
1464                 ZONE_UNLOCK(zone);
1465                 mtx_unlock(&uma_mtx);
1466         } else {
1467                 LIST_REMOVE(keg, uk_link);
1468                 LIST_REMOVE(zone, uz_link);
1469                 mtx_unlock(&uma_mtx);
1470                 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE);
1471         }
1472         zone->uz_keg = NULL;
1473 }
1474
1475 /*
1476  * Traverses every zone in the system and calls a callback
1477  *
1478  * Arguments:
1479  *      zfunc  A pointer to a function which accepts a zone
1480  *              as an argument.
1481  *
1482  * Returns:
1483  *      Nothing
1484  */
1485 static void
1486 zone_foreach(void (*zfunc)(uma_zone_t))
1487 {
1488         uma_keg_t keg;
1489         uma_zone_t zone;
1490
1491         mtx_lock(&uma_mtx);
1492         LIST_FOREACH(keg, &uma_kegs, uk_link) {
1493                 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1494                         zfunc(zone);
1495         }
1496         mtx_unlock(&uma_mtx);
1497 }
1498
1499 /* Public functions */
1500 /* See uma.h */
1501 void
1502 uma_startup(void *bootmem)
1503 {
1504         struct uma_zctor_args args;
1505         uma_slab_t slab;
1506         u_int slabsize;
1507         u_int objsize, totsize, wsize;
1508         int i;
1509
1510 #ifdef UMA_DEBUG
1511         printf("Creating uma keg headers zone and keg.\n");
1512 #endif
1513         /*
1514          * The general UMA lock is a recursion-allowed lock because
1515          * there is a code path where, while we're still configured
1516          * to use startup_alloc() for backend page allocations, we
1517          * may end up in uma_reclaim() which calls zone_foreach(zone_drain),
1518          * which grabs uma_mtx, only to later call into startup_alloc()
1519          * because while freeing we needed to allocate a bucket.  Since
1520          * startup_alloc() also takes uma_mtx, we need to be able to
1521          * recurse on it.
1522          */
1523         mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF | MTX_RECURSE);
1524
1525         /*
1526          * Figure out the maximum number of items-per-slab we'll have if
1527          * we're using the OFFPAGE slab header to track free items, given
1528          * all possible object sizes and the maximum desired wastage
1529          * (UMA_MAX_WASTE).
1530          *
1531          * We iterate until we find an object size for
1532          * which the calculated wastage in zone_small_init() will be
1533          * enough to warrant OFFPAGE.  Since wastedspace versus objsize
1534          * is an overall increasing see-saw function, we find the smallest
1535          * objsize such that the wastage is always acceptable for objects
1536          * with that objsize or smaller.  Since a smaller objsize always
1537          * generates a larger possible uma_max_ipers, we use this computed
1538          * objsize to calculate the largest ipers possible.  Since the
1539          * ipers calculated for OFFPAGE slab headers is always larger than
1540          * the ipers initially calculated in zone_small_init(), we use
1541          * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1542          * obtain the maximum ipers possible for offpage slab headers.
1543          *
1544          * It should be noted that ipers versus objsize is an inversly
1545          * proportional function which drops off rather quickly so as
1546          * long as our UMA_MAX_WASTE is such that the objsize we calculate
1547          * falls into the portion of the inverse relation AFTER the steep
1548          * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1549          *
1550          * Note that we have 8-bits (1 byte) to use as a freelist index
1551          * inside the actual slab header itself and this is enough to
1552          * accomodate us.  In the worst case, a UMA_SMALLEST_UNIT sized
1553          * object with offpage slab header would have ipers =
1554          * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1555          * 1 greater than what our byte-integer freelist index can
1556          * accomodate, but we know that this situation never occurs as
1557          * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1558          * that we need to go to offpage slab headers.  Or, if we do,
1559          * then we trap that condition below and panic in the INVARIANTS case.
1560          */
1561         wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1562         totsize = wsize;
1563         objsize = UMA_SMALLEST_UNIT;
1564         while (totsize >= wsize) {
1565                 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1566                     (objsize + UMA_FRITM_SZ);
1567                 totsize *= (UMA_FRITM_SZ + objsize);
1568                 objsize++;
1569         }
1570         if (objsize > UMA_SMALLEST_UNIT)
1571                 objsize--;
1572         uma_max_ipers = UMA_SLAB_SIZE / objsize;
1573
1574         wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1575         totsize = wsize;
1576         objsize = UMA_SMALLEST_UNIT;
1577         while (totsize >= wsize) {
1578                 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1579                     (objsize + UMA_FRITMREF_SZ);
1580                 totsize *= (UMA_FRITMREF_SZ + objsize);
1581                 objsize++;
1582         }
1583         if (objsize > UMA_SMALLEST_UNIT)
1584                 objsize--;
1585         uma_max_ipers_ref = UMA_SLAB_SIZE / objsize;
1586
1587         KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1588             ("uma_startup: calculated uma_max_ipers values too large!"));
1589
1590 #ifdef UMA_DEBUG
1591         printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1592         printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1593             uma_max_ipers_ref);
1594 #endif
1595
1596         /* "manually" create the initial zone */
1597         args.name = "UMA Kegs";
1598         args.size = sizeof(struct uma_keg);
1599         args.ctor = keg_ctor;
1600         args.dtor = keg_dtor;
1601         args.uminit = zero_init;
1602         args.fini = NULL;
1603         args.keg = &masterkeg;
1604         args.align = 32 - 1;
1605         args.flags = UMA_ZFLAG_INTERNAL;
1606         /* The initial zone has no Per cpu queues so it's smaller */
1607         zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1608
1609 #ifdef UMA_DEBUG
1610         printf("Filling boot free list.\n");
1611 #endif
1612         for (i = 0; i < UMA_BOOT_PAGES; i++) {
1613                 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1614                 slab->us_data = (u_int8_t *)slab;
1615                 slab->us_flags = UMA_SLAB_BOOT;
1616                 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1617                 uma_boot_free++;
1618         }
1619
1620 #ifdef UMA_DEBUG
1621         printf("Creating uma zone headers zone and keg.\n");
1622 #endif
1623         args.name = "UMA Zones";
1624         args.size = sizeof(struct uma_zone) +
1625             (sizeof(struct uma_cache) * (mp_maxid + 1));
1626         args.ctor = zone_ctor;
1627         args.dtor = zone_dtor;
1628         args.uminit = zero_init;
1629         args.fini = NULL;
1630         args.keg = NULL;
1631         args.align = 32 - 1;
1632         args.flags = UMA_ZFLAG_INTERNAL;
1633         /* The initial zone has no Per cpu queues so it's smaller */
1634         zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1635
1636 #ifdef UMA_DEBUG
1637         printf("Initializing pcpu cache locks.\n");
1638 #endif
1639         /* Initialize the pcpu cache lock set once and for all */
1640         for (i = 0; i <= mp_maxid; i++)
1641                 CPU_LOCK_INIT(i);
1642
1643 #ifdef UMA_DEBUG
1644         printf("Creating slab and hash zones.\n");
1645 #endif
1646
1647         /*
1648          * This is the max number of free list items we'll have with
1649          * offpage slabs.
1650          */
1651         slabsize = uma_max_ipers * UMA_FRITM_SZ;
1652         slabsize += sizeof(struct uma_slab);
1653
1654         /* Now make a zone for slab headers */
1655         slabzone = uma_zcreate("UMA Slabs",
1656                                 slabsize,
1657                                 NULL, NULL, NULL, NULL,
1658                                 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1659
1660         /*
1661          * We also create a zone for the bigger slabs with reference
1662          * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1663          */
1664         slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1665         slabsize += sizeof(struct uma_slab_refcnt);
1666         slabrefzone = uma_zcreate("UMA RCntSlabs",
1667                                   slabsize,
1668                                   NULL, NULL, NULL, NULL,
1669                                   UMA_ALIGN_PTR,
1670                                   UMA_ZFLAG_INTERNAL);
1671
1672         hashzone = uma_zcreate("UMA Hash",
1673             sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1674             NULL, NULL, NULL, NULL,
1675             UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1676
1677         bucket_init();
1678
1679 #ifdef UMA_MD_SMALL_ALLOC
1680         booted = 1;
1681 #endif
1682
1683 #ifdef UMA_DEBUG
1684         printf("UMA startup complete.\n");
1685 #endif
1686 }
1687
1688 /* see uma.h */
1689 void
1690 uma_startup2(void)
1691 {
1692         booted = 1;
1693         bucket_enable();
1694 #ifdef UMA_DEBUG
1695         printf("UMA startup2 complete.\n");
1696 #endif
1697 }
1698
1699 /*
1700  * Initialize our callout handle
1701  *
1702  */
1703
1704 static void
1705 uma_startup3(void)
1706 {
1707 #ifdef UMA_DEBUG
1708         printf("Starting callout.\n");
1709 #endif
1710         callout_init(&uma_callout, CALLOUT_MPSAFE);
1711         callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1712 #ifdef UMA_DEBUG
1713         printf("UMA startup3 complete.\n");
1714 #endif
1715 }
1716
1717 static uma_zone_t
1718 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1719                 int align, u_int16_t flags)
1720 {
1721         struct uma_kctor_args args;
1722
1723         args.size = size;
1724         args.uminit = uminit;
1725         args.fini = fini;
1726         args.align = align;
1727         args.flags = flags;
1728         args.zone = zone;
1729         return (uma_zalloc_internal(kegs, &args, M_WAITOK));
1730 }
1731
1732 /* See uma.h */
1733 uma_zone_t
1734 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1735                 uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1736
1737 {
1738         struct uma_zctor_args args;
1739
1740         /* This stuff is essential for the zone ctor */
1741         args.name = name;
1742         args.size = size;
1743         args.ctor = ctor;
1744         args.dtor = dtor;
1745         args.uminit = uminit;
1746         args.fini = fini;
1747         args.align = align;
1748         args.flags = flags;
1749         args.keg = NULL;
1750
1751         return (uma_zalloc_internal(zones, &args, M_WAITOK));
1752 }
1753
1754 /* See uma.h */
1755 uma_zone_t
1756 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1757                     uma_init zinit, uma_fini zfini, uma_zone_t master)
1758 {
1759         struct uma_zctor_args args;
1760
1761         args.name = name;
1762         args.size = master->uz_keg->uk_size;
1763         args.ctor = ctor;
1764         args.dtor = dtor;
1765         args.uminit = zinit;
1766         args.fini = zfini;
1767         args.align = master->uz_keg->uk_align;
1768         args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY;
1769         args.keg = master->uz_keg;
1770
1771         return (uma_zalloc_internal(zones, &args, M_WAITOK));
1772 }
1773
1774 /* See uma.h */
1775 void
1776 uma_zdestroy(uma_zone_t zone)
1777 {
1778         uma_zfree_internal(zones, zone, NULL, SKIP_NONE);
1779 }
1780
1781 /* See uma.h */
1782 void *
1783 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1784 {
1785         void *item;
1786         uma_cache_t cache;
1787         uma_bucket_t bucket;
1788         int cpu;
1789         int badness;
1790
1791         /* This is the fast path allocation */
1792 #ifdef UMA_DEBUG_ALLOC_1
1793         printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1794 #endif
1795         CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1796             zone->uz_name, flags);
1797
1798         if (!(flags & M_NOWAIT)) {
1799                 KASSERT(curthread->td_intr_nesting_level == 0,
1800                    ("malloc(M_WAITOK) in interrupt context"));
1801                 if (nosleepwithlocks) {
1802 #ifdef WITNESS
1803                         badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
1804                             NULL,
1805                             "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT",
1806                             zone->uz_name);
1807 #else
1808                         badness = 1;
1809 #endif
1810                 } else {
1811                         badness = 0;
1812 #ifdef WITNESS
1813                         WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1814                             "malloc(M_WAITOK) of \"%s\"", zone->uz_name);
1815 #endif
1816                 }
1817                 if (badness) {
1818                         flags &= ~M_WAITOK;
1819                         flags |= M_NOWAIT;
1820                 }
1821         }
1822
1823 zalloc_restart:
1824         cpu = PCPU_GET(cpuid);
1825         CPU_LOCK(cpu);
1826         cache = &zone->uz_cpu[cpu];
1827
1828 zalloc_start:
1829         bucket = cache->uc_allocbucket;
1830
1831         if (bucket) {
1832                 if (bucket->ub_cnt > 0) {
1833                         bucket->ub_cnt--;
1834                         item = bucket->ub_bucket[bucket->ub_cnt];
1835 #ifdef INVARIANTS
1836                         bucket->ub_bucket[bucket->ub_cnt] = NULL;
1837 #endif
1838                         KASSERT(item != NULL,
1839                             ("uma_zalloc: Bucket pointer mangled."));
1840                         cache->uc_allocs++;
1841 #ifdef INVARIANTS
1842                         ZONE_LOCK(zone);
1843                         uma_dbg_alloc(zone, NULL, item);
1844                         ZONE_UNLOCK(zone);
1845 #endif
1846                         CPU_UNLOCK(cpu);
1847                         if (zone->uz_ctor != NULL) {
1848                                 if (zone->uz_ctor(item, zone->uz_keg->uk_size,
1849                                     udata, flags) != 0) {
1850                                         uma_zfree_internal(zone, item, udata,
1851                                             SKIP_DTOR);
1852                                         return (NULL);
1853                                 }
1854                         }
1855                         if (flags & M_ZERO)
1856                                 bzero(item, zone->uz_keg->uk_size);
1857                         return (item);
1858                 } else if (cache->uc_freebucket) {
1859                         /*
1860                          * We have run out of items in our allocbucket.
1861                          * See if we can switch with our free bucket.
1862                          */
1863                         if (cache->uc_freebucket->ub_cnt > 0) {
1864 #ifdef UMA_DEBUG_ALLOC
1865                                 printf("uma_zalloc: Swapping empty with"
1866                                     " alloc.\n");
1867 #endif
1868                                 bucket = cache->uc_freebucket;
1869                                 cache->uc_freebucket = cache->uc_allocbucket;
1870                                 cache->uc_allocbucket = bucket;
1871
1872                                 goto zalloc_start;
1873                         }
1874                 }
1875         }
1876         ZONE_LOCK(zone);
1877         /* Since we have locked the zone we may as well send back our stats */
1878         zone->uz_allocs += cache->uc_allocs;
1879         cache->uc_allocs = 0;
1880
1881         /* Our old one is now a free bucket */
1882         if (cache->uc_allocbucket) {
1883                 KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1884                     ("uma_zalloc_arg: Freeing a non free bucket."));
1885                 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1886                     cache->uc_allocbucket, ub_link);
1887                 cache->uc_allocbucket = NULL;
1888         }
1889
1890         /* Check the free list for a new alloc bucket */
1891         if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1892                 KASSERT(bucket->ub_cnt != 0,
1893                     ("uma_zalloc_arg: Returning an empty bucket."));
1894
1895                 LIST_REMOVE(bucket, ub_link);
1896                 cache->uc_allocbucket = bucket;
1897                 ZONE_UNLOCK(zone);
1898                 goto zalloc_start;
1899         }
1900         /* We are no longer associated with this cpu!!! */
1901         CPU_UNLOCK(cpu);
1902
1903         /* Bump up our uz_count so we get here less */
1904         if (zone->uz_count < BUCKET_MAX)
1905                 zone->uz_count++;
1906
1907         /*
1908          * Now lets just fill a bucket and put it on the free list.  If that
1909          * works we'll restart the allocation from the begining.
1910          */
1911         if (uma_zalloc_bucket(zone, flags)) {
1912                 ZONE_UNLOCK(zone);
1913                 goto zalloc_restart;
1914         }
1915         ZONE_UNLOCK(zone);
1916         /*
1917          * We may not be able to get a bucket so return an actual item.
1918          */
1919 #ifdef UMA_DEBUG
1920         printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1921 #endif
1922
1923         return (uma_zalloc_internal(zone, udata, flags));
1924 }
1925
1926 static uma_slab_t
1927 uma_zone_slab(uma_zone_t zone, int flags)
1928 {
1929         uma_slab_t slab;
1930         uma_keg_t keg;
1931
1932         keg = zone->uz_keg;
1933
1934         /*
1935          * This is to prevent us from recursively trying to allocate
1936          * buckets.  The problem is that if an allocation forces us to
1937          * grab a new bucket we will call page_alloc, which will go off
1938          * and cause the vm to allocate vm_map_entries.  If we need new
1939          * buckets there too we will recurse in kmem_alloc and bad
1940          * things happen.  So instead we return a NULL bucket, and make
1941          * the code that allocates buckets smart enough to deal with it
1942          *
1943          * XXX: While we want this protection for the bucket zones so that
1944          * recursion from the VM is handled (and the calling code that
1945          * allocates buckets knows how to deal with it), we do not want
1946          * to prevent allocation from the slab header zones (slabzone
1947          * and slabrefzone) if uk_recurse is not zero for them.  The
1948          * reason is that it could lead to NULL being returned for
1949          * slab header allocations even in the M_WAITOK case, and the
1950          * caller can't handle that.
1951          */
1952         if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0)
1953                 if ((zone != slabzone) && (zone != slabrefzone))
1954                         return (NULL);
1955
1956         slab = NULL;
1957
1958         for (;;) {
1959                 /*
1960                  * Find a slab with some space.  Prefer slabs that are partially
1961                  * used over those that are totally full.  This helps to reduce
1962                  * fragmentation.
1963                  */
1964                 if (keg->uk_free != 0) {
1965                         if (!LIST_EMPTY(&keg->uk_part_slab)) {
1966                                 slab = LIST_FIRST(&keg->uk_part_slab);
1967                         } else {
1968                                 slab = LIST_FIRST(&keg->uk_free_slab);
1969                                 LIST_REMOVE(slab, us_link);
1970                                 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
1971                                     us_link);
1972                         }
1973                         return (slab);
1974                 }
1975
1976                 /*
1977                  * M_NOVM means don't ask at all!
1978                  */
1979                 if (flags & M_NOVM)
1980                         break;
1981
1982                 if (keg->uk_maxpages &&
1983                     keg->uk_pages >= keg->uk_maxpages) {
1984                         keg->uk_flags |= UMA_ZFLAG_FULL;
1985
1986                         if (flags & M_NOWAIT)
1987                                 break;
1988                         else
1989                                 msleep(keg, &keg->uk_lock, PVM,
1990                                     "zonelimit", 0);
1991                         continue;
1992                 }
1993                 keg->uk_recurse++;
1994                 slab = slab_zalloc(zone, flags);
1995                 keg->uk_recurse--;
1996
1997                 /*
1998                  * If we got a slab here it's safe to mark it partially used
1999                  * and return.  We assume that the caller is going to remove
2000                  * at least one item.
2001                  */
2002                 if (slab) {
2003                         LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2004                         return (slab);
2005                 }
2006                 /*
2007                  * We might not have been able to get a slab but another cpu
2008                  * could have while we were unlocked.  Check again before we
2009                  * fail.
2010                  */
2011                 if (flags & M_NOWAIT)
2012                         flags |= M_NOVM;
2013         }
2014         return (slab);
2015 }
2016
2017 static void *
2018 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
2019 {
2020         uma_keg_t keg;
2021         uma_slabrefcnt_t slabref;
2022         void *item;
2023         u_int8_t freei;
2024
2025         keg = zone->uz_keg;
2026
2027         freei = slab->us_firstfree;
2028         if (keg->uk_flags & UMA_ZONE_REFCNT) {
2029                 slabref = (uma_slabrefcnt_t)slab;
2030                 slab->us_firstfree = slabref->us_freelist[freei].us_item;
2031         } else {
2032                 slab->us_firstfree = slab->us_freelist[freei].us_item;
2033         }
2034         item = slab->us_data + (keg->uk_rsize * freei);
2035
2036         slab->us_freecount--;
2037         keg->uk_free--;
2038 #ifdef INVARIANTS
2039         uma_dbg_alloc(zone, slab, item);
2040 #endif
2041         /* Move this slab to the full list */
2042         if (slab->us_freecount == 0) {
2043                 LIST_REMOVE(slab, us_link);
2044                 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2045         }
2046
2047         return (item);
2048 }
2049
2050 static int
2051 uma_zalloc_bucket(uma_zone_t zone, int flags)
2052 {
2053         uma_bucket_t bucket;
2054         uma_slab_t slab;
2055         int16_t saved;
2056         int max, origflags = flags;
2057
2058         /*
2059          * Try this zone's free list first so we don't allocate extra buckets.
2060          */
2061         if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2062                 KASSERT(bucket->ub_cnt == 0,
2063                     ("uma_zalloc_bucket: Bucket on free list is not empty."));
2064                 LIST_REMOVE(bucket, ub_link);
2065         } else {
2066                 int bflags;
2067
2068                 bflags = (flags & ~M_ZERO);
2069                 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2070                         bflags |= M_NOVM;
2071
2072                 ZONE_UNLOCK(zone);
2073                 bucket = bucket_alloc(zone->uz_count, bflags);
2074                 ZONE_LOCK(zone);
2075         }
2076
2077         if (bucket == NULL)
2078                 return (0);
2079
2080 #ifdef SMP
2081         /*
2082          * This code is here to limit the number of simultaneous bucket fills
2083          * for any given zone to the number of per cpu caches in this zone. This
2084          * is done so that we don't allocate more memory than we really need.
2085          */
2086         if (zone->uz_fills >= mp_ncpus)
2087                 goto done;
2088
2089 #endif
2090         zone->uz_fills++;
2091
2092         max = MIN(bucket->ub_entries, zone->uz_count);
2093         /* Try to keep the buckets totally full */
2094         saved = bucket->ub_cnt;
2095         while (bucket->ub_cnt < max &&
2096             (slab = uma_zone_slab(zone, flags)) != NULL) {
2097                 while (slab->us_freecount && bucket->ub_cnt < max) {
2098                         bucket->ub_bucket[bucket->ub_cnt++] =
2099                             uma_slab_alloc(zone, slab);
2100                 }
2101
2102                 /* Don't block on the next fill */
2103                 flags |= M_NOWAIT;
2104         }
2105
2106         /*
2107          * We unlock here because we need to call the zone's init.
2108          * It should be safe to unlock because the slab dealt with
2109          * above is already on the appropriate list within the keg
2110          * and the bucket we filled is not yet on any list, so we
2111          * own it.
2112          */
2113         if (zone->uz_init != NULL) {
2114                 int i;
2115
2116                 ZONE_UNLOCK(zone);
2117                 for (i = saved; i < bucket->ub_cnt; i++)
2118                         if (zone->uz_init(bucket->ub_bucket[i],
2119                             zone->uz_keg->uk_size, origflags) != 0)
2120                                 break;
2121                 /*
2122                  * If we couldn't initialize the whole bucket, put the
2123                  * rest back onto the freelist.
2124                  */
2125                 if (i != bucket->ub_cnt) {
2126                         int j;
2127
2128                         for (j = i; j < bucket->ub_cnt; j++) {
2129                                 uma_zfree_internal(zone, bucket->ub_bucket[j],
2130                                     NULL, SKIP_FINI);
2131 #ifdef INVARIANTS
2132                                 bucket->ub_bucket[j] = NULL;
2133 #endif
2134                         }
2135                         bucket->ub_cnt = i;
2136                 }
2137                 ZONE_LOCK(zone);
2138         }
2139
2140         zone->uz_fills--;
2141         if (bucket->ub_cnt != 0) {
2142                 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2143                     bucket, ub_link);
2144                 return (1);
2145         }
2146 #ifdef SMP
2147 done:
2148 #endif
2149         bucket_free(bucket);
2150
2151         return (0);
2152 }
2153 /*
2154  * Allocates an item for an internal zone
2155  *
2156  * Arguments
2157  *      zone   The zone to alloc for.
2158  *      udata  The data to be passed to the constructor.
2159  *      flags  M_WAITOK, M_NOWAIT, M_ZERO.
2160  *
2161  * Returns
2162  *      NULL if there is no memory and M_NOWAIT is set
2163  *      An item if successful
2164  */
2165
2166 static void *
2167 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
2168 {
2169         uma_keg_t keg;
2170         uma_slab_t slab;
2171         void *item;
2172
2173         item = NULL;
2174         keg = zone->uz_keg;
2175
2176 #ifdef UMA_DEBUG_ALLOC
2177         printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2178 #endif
2179         ZONE_LOCK(zone);
2180
2181         slab = uma_zone_slab(zone, flags);
2182         if (slab == NULL) {
2183                 ZONE_UNLOCK(zone);
2184                 return (NULL);
2185         }
2186
2187         item = uma_slab_alloc(zone, slab);
2188
2189         ZONE_UNLOCK(zone);
2190
2191         /*
2192          * We have to call both the zone's init (not the keg's init)
2193          * and the zone's ctor.  This is because the item is going from
2194          * a keg slab directly to the user, and the user is expecting it
2195          * to be both zone-init'd as well as zone-ctor'd.
2196          */
2197         if (zone->uz_init != NULL) {
2198                 if (zone->uz_init(item, keg->uk_size, flags) != 0) {
2199                         uma_zfree_internal(zone, item, udata, SKIP_FINI);
2200                         return (NULL);
2201                 }
2202         }
2203         if (zone->uz_ctor != NULL) {
2204                 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) {
2205                         uma_zfree_internal(zone, item, udata, SKIP_DTOR);
2206                         return (NULL);
2207                 }
2208         }
2209         if (flags & M_ZERO)
2210                 bzero(item, keg->uk_size);
2211
2212         return (item);
2213 }
2214
2215 /* See uma.h */
2216 void
2217 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2218 {
2219         uma_keg_t keg;
2220         uma_cache_t cache;
2221         uma_bucket_t bucket;
2222         int bflags;
2223         int cpu;
2224         enum zfreeskip skip;
2225
2226         /* This is the fast path free */
2227         skip = SKIP_NONE;
2228         keg = zone->uz_keg;
2229
2230 #ifdef UMA_DEBUG_ALLOC_1
2231         printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2232 #endif
2233         CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2234             zone->uz_name);
2235
2236         /*
2237          * The race here is acceptable.  If we miss it we'll just have to wait
2238          * a little longer for the limits to be reset.
2239          */
2240
2241         if (keg->uk_flags & UMA_ZFLAG_FULL)
2242                 goto zfree_internal;
2243
2244         if (zone->uz_dtor) {
2245                 zone->uz_dtor(item, keg->uk_size, udata);
2246                 skip = SKIP_DTOR;
2247         }
2248
2249 zfree_restart:
2250         cpu = PCPU_GET(cpuid);
2251         CPU_LOCK(cpu);
2252         cache = &zone->uz_cpu[cpu];
2253
2254 zfree_start:
2255         bucket = cache->uc_freebucket;
2256
2257         if (bucket) {
2258                 /*
2259                  * Do we have room in our bucket? It is OK for this uz count
2260                  * check to be slightly out of sync.
2261                  */
2262
2263                 if (bucket->ub_cnt < bucket->ub_entries) {
2264                         KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2265                             ("uma_zfree: Freeing to non free bucket index."));
2266                         bucket->ub_bucket[bucket->ub_cnt] = item;
2267                         bucket->ub_cnt++;
2268 #ifdef INVARIANTS
2269                         ZONE_LOCK(zone);
2270                         if (keg->uk_flags & UMA_ZONE_MALLOC)
2271                                 uma_dbg_free(zone, udata, item);
2272                         else
2273                                 uma_dbg_free(zone, NULL, item);
2274                         ZONE_UNLOCK(zone);
2275 #endif
2276                         CPU_UNLOCK(cpu);
2277                         return;
2278                 } else if (cache->uc_allocbucket) {
2279 #ifdef UMA_DEBUG_ALLOC
2280                         printf("uma_zfree: Swapping buckets.\n");
2281 #endif
2282                         /*
2283                          * We have run out of space in our freebucket.
2284                          * See if we can switch with our alloc bucket.
2285                          */
2286                         if (cache->uc_allocbucket->ub_cnt <
2287                             cache->uc_freebucket->ub_cnt) {
2288                                 bucket = cache->uc_freebucket;
2289                                 cache->uc_freebucket = cache->uc_allocbucket;
2290                                 cache->uc_allocbucket = bucket;
2291                                 goto zfree_start;
2292                         }
2293                 }
2294         }
2295         /*
2296          * We can get here for two reasons:
2297          *
2298          * 1) The buckets are NULL
2299          * 2) The alloc and free buckets are both somewhat full.
2300          */
2301
2302         ZONE_LOCK(zone);
2303
2304         bucket = cache->uc_freebucket;
2305         cache->uc_freebucket = NULL;
2306
2307         /* Can we throw this on the zone full list? */
2308         if (bucket != NULL) {
2309 #ifdef UMA_DEBUG_ALLOC
2310                 printf("uma_zfree: Putting old bucket on the free list.\n");
2311 #endif
2312                 /* ub_cnt is pointing to the last free item */
2313                 KASSERT(bucket->ub_cnt != 0,
2314                     ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2315                 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2316                     bucket, ub_link);
2317         }
2318         if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2319                 LIST_REMOVE(bucket, ub_link);
2320                 ZONE_UNLOCK(zone);
2321                 cache->uc_freebucket = bucket;
2322                 goto zfree_start;
2323         }
2324         /* We're done with this CPU now */
2325         CPU_UNLOCK(cpu);
2326
2327         /* And the zone.. */
2328         ZONE_UNLOCK(zone);
2329
2330 #ifdef UMA_DEBUG_ALLOC
2331         printf("uma_zfree: Allocating new free bucket.\n");
2332 #endif
2333         bflags = M_NOWAIT;
2334
2335         if (keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2336                 bflags |= M_NOVM;
2337         bucket = bucket_alloc(zone->uz_count, bflags);
2338         if (bucket) {
2339                 ZONE_LOCK(zone);
2340                 LIST_INSERT_HEAD(&zone->uz_free_bucket,
2341                     bucket, ub_link);
2342                 ZONE_UNLOCK(zone);
2343                 goto zfree_restart;
2344         }
2345
2346         /*
2347          * If nothing else caught this, we'll just do an internal free.
2348          */
2349
2350 zfree_internal:
2351
2352 #ifdef INVARIANTS
2353         /*
2354          * If we need to skip the dtor and the uma_dbg_free in
2355          * uma_zfree_internal because we've already called the dtor
2356          * above, but we ended up here, then we need to make sure
2357          * that we take care of the uma_dbg_free immediately.
2358          */
2359         if (skip) {
2360                 ZONE_LOCK(zone);
2361                 if (keg->uk_flags & UMA_ZONE_MALLOC)
2362                         uma_dbg_free(zone, udata, item);
2363                 else
2364                         uma_dbg_free(zone, NULL, item);
2365                 ZONE_UNLOCK(zone);
2366         }
2367 #endif
2368         uma_zfree_internal(zone, item, udata, skip);
2369
2370         return;
2371 }
2372
2373 /*
2374  * Frees an item to an INTERNAL zone or allocates a free bucket
2375  *
2376  * Arguments:
2377  *      zone   The zone to free to
2378  *      item   The item we're freeing
2379  *      udata  User supplied data for the dtor
2380  *      skip   Skip dtors and finis
2381  */
2382 static void
2383 uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
2384     enum zfreeskip skip)
2385 {
2386         uma_slab_t slab;
2387         uma_slabrefcnt_t slabref;
2388         uma_keg_t keg;
2389         u_int8_t *mem;
2390         u_int8_t freei;
2391
2392         keg = zone->uz_keg;
2393
2394         if (skip < SKIP_DTOR && zone->uz_dtor)
2395                 zone->uz_dtor(item, keg->uk_size, udata);
2396         if (skip < SKIP_FINI && zone->uz_fini)
2397                 zone->uz_fini(item, keg->uk_size);
2398
2399         ZONE_LOCK(zone);
2400
2401         if (!(keg->uk_flags & UMA_ZONE_MALLOC)) {
2402                 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2403                 if (keg->uk_flags & UMA_ZONE_HASH)
2404                         slab = hash_sfind(&keg->uk_hash, mem);
2405                 else {
2406                         mem += keg->uk_pgoff;
2407                         slab = (uma_slab_t)mem;
2408                 }
2409         } else {
2410                 slab = (uma_slab_t)udata;
2411         }
2412
2413         /* Do we need to remove from any lists? */
2414         if (slab->us_freecount+1 == keg->uk_ipers) {
2415                 LIST_REMOVE(slab, us_link);
2416                 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2417         } else if (slab->us_freecount == 0) {
2418                 LIST_REMOVE(slab, us_link);
2419                 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2420         }
2421
2422         /* Slab management stuff */
2423         freei = ((unsigned long)item - (unsigned long)slab->us_data)
2424                 / keg->uk_rsize;
2425
2426 #ifdef INVARIANTS
2427         if (!skip)
2428                 uma_dbg_free(zone, slab, item);
2429 #endif
2430
2431         if (keg->uk_flags & UMA_ZONE_REFCNT) {
2432                 slabref = (uma_slabrefcnt_t)slab;
2433                 slabref->us_freelist[freei].us_item = slab->us_firstfree;
2434         } else {
2435                 slab->us_freelist[freei].us_item = slab->us_firstfree;
2436         }
2437         slab->us_firstfree = freei;
2438         slab->us_freecount++;
2439
2440         /* Zone statistics */
2441         keg->uk_free++;
2442
2443         if (keg->uk_flags & UMA_ZFLAG_FULL) {
2444                 if (keg->uk_pages < keg->uk_maxpages)
2445                         keg->uk_flags &= ~UMA_ZFLAG_FULL;
2446
2447                 /* We can handle one more allocation */
2448                 wakeup_one(keg);
2449         }
2450
2451         ZONE_UNLOCK(zone);
2452 }
2453
2454 /* See uma.h */
2455 void
2456 uma_zone_set_max(uma_zone_t zone, int nitems)
2457 {
2458         uma_keg_t keg;
2459
2460         keg = zone->uz_keg;
2461         ZONE_LOCK(zone);
2462         if (keg->uk_ppera > 1)
2463                 keg->uk_maxpages = nitems * keg->uk_ppera;
2464         else
2465                 keg->uk_maxpages = nitems / keg->uk_ipers;
2466
2467         if (keg->uk_maxpages * keg->uk_ipers < nitems)
2468                 keg->uk_maxpages++;
2469
2470         ZONE_UNLOCK(zone);
2471 }
2472
2473 /* See uma.h */
2474 void
2475 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2476 {
2477         ZONE_LOCK(zone);
2478         KASSERT(zone->uz_keg->uk_pages == 0,
2479             ("uma_zone_set_init on non-empty keg"));
2480         zone->uz_keg->uk_init = uminit;
2481         ZONE_UNLOCK(zone);
2482 }
2483
2484 /* See uma.h */
2485 void
2486 uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2487 {
2488         ZONE_LOCK(zone);
2489         KASSERT(zone->uz_keg->uk_pages == 0,
2490             ("uma_zone_set_fini on non-empty keg"));
2491         zone->uz_keg->uk_fini = fini;
2492         ZONE_UNLOCK(zone);
2493 }
2494
2495 /* See uma.h */
2496 void
2497 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2498 {
2499         ZONE_LOCK(zone);
2500         KASSERT(zone->uz_keg->uk_pages == 0,
2501             ("uma_zone_set_zinit on non-empty keg"));
2502         zone->uz_init = zinit;
2503         ZONE_UNLOCK(zone);
2504 }
2505
2506 /* See uma.h */
2507 void
2508 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2509 {
2510         ZONE_LOCK(zone);
2511         KASSERT(zone->uz_keg->uk_pages == 0,
2512             ("uma_zone_set_zfini on non-empty keg"));
2513         zone->uz_fini = zfini;
2514         ZONE_UNLOCK(zone);
2515 }
2516
2517 /* See uma.h */
2518 /* XXX uk_freef is not actually used with the zone locked */
2519 void
2520 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2521 {
2522         ZONE_LOCK(zone);
2523         zone->uz_keg->uk_freef = freef;
2524         ZONE_UNLOCK(zone);
2525 }
2526
2527 /* See uma.h */
2528 /* XXX uk_allocf is not actually used with the zone locked */
2529 void
2530 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2531 {
2532         ZONE_LOCK(zone);
2533         zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2534         zone->uz_keg->uk_allocf = allocf;
2535         ZONE_UNLOCK(zone);
2536 }
2537
2538 /* See uma.h */
2539 int
2540 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
2541 {
2542         uma_keg_t keg;
2543         vm_offset_t kva;
2544         int pages;
2545
2546         keg = zone->uz_keg;
2547         pages = count / keg->uk_ipers;
2548
2549         if (pages * keg->uk_ipers < count)
2550                 pages++;
2551
2552         kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
2553
2554         if (kva == 0)
2555                 return (0);
2556         if (obj == NULL) {
2557                 obj = vm_object_allocate(OBJT_DEFAULT,
2558                     pages);
2559         } else {
2560                 VM_OBJECT_LOCK_INIT(obj, "uma object");
2561                 _vm_object_allocate(OBJT_DEFAULT,
2562                     pages, obj);
2563         }
2564         ZONE_LOCK(zone);
2565         keg->uk_kva = kva;
2566         keg->uk_obj = obj;
2567         keg->uk_maxpages = pages;
2568         keg->uk_allocf = obj_alloc;
2569         keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
2570         ZONE_UNLOCK(zone);
2571         return (1);
2572 }
2573
2574 /* See uma.h */
2575 void
2576 uma_prealloc(uma_zone_t zone, int items)
2577 {
2578         int slabs;
2579         uma_slab_t slab;
2580         uma_keg_t keg;
2581
2582         keg = zone->uz_keg;
2583         ZONE_LOCK(zone);
2584         slabs = items / keg->uk_ipers;
2585         if (slabs * keg->uk_ipers < items)
2586                 slabs++;
2587         while (slabs > 0) {
2588                 slab = slab_zalloc(zone, M_WAITOK);
2589                 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2590                 slabs--;
2591         }
2592         ZONE_UNLOCK(zone);
2593 }
2594
2595 /* See uma.h */
2596 u_int32_t *
2597 uma_find_refcnt(uma_zone_t zone, void *item)
2598 {
2599         uma_slabrefcnt_t slabref;
2600         uma_keg_t keg;
2601         u_int32_t *refcnt;
2602         int idx;
2603
2604         keg = zone->uz_keg;
2605         slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
2606             (~UMA_SLAB_MASK));
2607         KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
2608             ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
2609         idx = ((unsigned long)item - (unsigned long)slabref->us_data)
2610             / keg->uk_rsize;
2611         refcnt = &slabref->us_freelist[idx].us_refcnt;
2612         return refcnt;
2613 }
2614
2615 /* See uma.h */
2616 void
2617 uma_reclaim(void)
2618 {
2619 #ifdef UMA_DEBUG
2620         printf("UMA: vm asked us to release pages!\n");
2621 #endif
2622         bucket_enable();
2623         zone_foreach(zone_drain);
2624         /*
2625          * Some slabs may have been freed but this zone will be visited early
2626          * we visit again so that we can free pages that are empty once other
2627          * zones are drained.  We have to do the same for buckets.
2628          */
2629         zone_drain(slabzone);
2630         zone_drain(slabrefzone);
2631         bucket_zone_drain();
2632 }
2633
2634 void *
2635 uma_large_malloc(int size, int wait)
2636 {
2637         void *mem;
2638         uma_slab_t slab;
2639         u_int8_t flags;
2640
2641         slab = uma_zalloc_internal(slabzone, NULL, wait);
2642         if (slab == NULL)
2643                 return (NULL);
2644         mem = page_alloc(NULL, size, &flags, wait);
2645         if (mem) {
2646                 vsetslab((vm_offset_t)mem, slab);
2647                 slab->us_data = mem;
2648                 slab->us_flags = flags | UMA_SLAB_MALLOC;
2649                 slab->us_size = size;
2650         } else {
2651                 uma_zfree_internal(slabzone, slab, NULL, 0);
2652         }
2653
2654         return (mem);
2655 }
2656
2657 void
2658 uma_large_free(uma_slab_t slab)
2659 {
2660         vsetobj((vm_offset_t)slab->us_data, kmem_object);
2661         page_free(slab->us_data, slab->us_size, slab->us_flags);
2662         uma_zfree_internal(slabzone, slab, NULL, 0);
2663 }
2664
2665 void
2666 uma_print_stats(void)
2667 {
2668         zone_foreach(uma_print_zone);
2669 }
2670
2671 static void
2672 slab_print(uma_slab_t slab)
2673 {
2674         printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
2675                 slab->us_keg, slab->us_data, slab->us_freecount,
2676                 slab->us_firstfree);
2677 }
2678
2679 static void
2680 cache_print(uma_cache_t cache)
2681 {
2682         printf("alloc: %p(%d), free: %p(%d)\n",
2683                 cache->uc_allocbucket,
2684                 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2685                 cache->uc_freebucket,
2686                 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2687 }
2688
2689 void
2690 uma_print_zone(uma_zone_t zone)
2691 {
2692         uma_cache_t cache;
2693         uma_keg_t keg;
2694         uma_slab_t slab;
2695         int i;
2696
2697         keg = zone->uz_keg;
2698         printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2699             zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
2700             keg->uk_ipers, keg->uk_ppera,
2701             (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
2702         printf("Part slabs:\n");
2703         LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
2704                 slab_print(slab);
2705         printf("Free slabs:\n");
2706         LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
2707                 slab_print(slab);
2708         printf("Full slabs:\n");
2709         LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
2710                 slab_print(slab);
2711         for (i = 0; i <= mp_maxid; i++) {
2712                 if (CPU_ABSENT(i))
2713                         continue;
2714                 cache = &zone->uz_cpu[i];
2715                 printf("CPU %d Cache:\n", i);
2716                 cache_print(cache);
2717         }
2718 }
2719
2720 /*
2721  * Sysctl handler for vm.zone
2722  *
2723  * stolen from vm_zone.c
2724  */
2725 static int
2726 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2727 {
2728         int error, len, cnt;
2729         const int linesize = 128;       /* conservative */
2730         int totalfree;
2731         char *tmpbuf, *offset;
2732         uma_zone_t z;
2733         uma_keg_t zk;
2734         char *p;
2735         int cpu;
2736         int cachefree;
2737         uma_bucket_t bucket;
2738         uma_cache_t cache;
2739
2740         cnt = 0;
2741         mtx_lock(&uma_mtx);
2742         LIST_FOREACH(zk, &uma_kegs, uk_link) {
2743                 LIST_FOREACH(z, &zk->uk_zones, uz_link)
2744                         cnt++;
2745         }
2746         mtx_unlock(&uma_mtx);
2747         MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2748                         M_TEMP, M_WAITOK);
2749         len = snprintf(tmpbuf, linesize,
2750             "\nITEM            SIZE     LIMIT     USED    FREE  REQUESTS\n\n");
2751         if (cnt == 0)
2752                 tmpbuf[len - 1] = '\0';
2753         error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2754         if (error || cnt == 0)
2755                 goto out;
2756         offset = tmpbuf;
2757         mtx_lock(&uma_mtx);
2758         LIST_FOREACH(zk, &uma_kegs, uk_link) {
2759           LIST_FOREACH(z, &zk->uk_zones, uz_link) {
2760                 if (cnt == 0)   /* list may have changed size */
2761                         break;
2762                 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
2763                         for (cpu = 0; cpu <= mp_maxid; cpu++) {
2764                                 if (CPU_ABSENT(cpu))
2765                                         continue;
2766                                 CPU_LOCK(cpu);
2767                         }
2768                 }
2769                 ZONE_LOCK(z);
2770                 cachefree = 0;
2771                 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
2772                         for (cpu = 0; cpu <= mp_maxid; cpu++) {
2773                                 if (CPU_ABSENT(cpu))
2774                                         continue;
2775                                 cache = &z->uz_cpu[cpu];
2776                                 if (cache->uc_allocbucket != NULL)
2777                                         cachefree += cache->uc_allocbucket->ub_cnt;
2778                                 if (cache->uc_freebucket != NULL)
2779                                         cachefree += cache->uc_freebucket->ub_cnt;
2780                                 CPU_UNLOCK(cpu);
2781                         }
2782                 }
2783                 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
2784                         cachefree += bucket->ub_cnt;
2785                 }
2786                 totalfree = zk->uk_free + cachefree;
2787                 len = snprintf(offset, linesize,
2788                     "%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2789                     z->uz_name, zk->uk_size,
2790                     zk->uk_maxpages * zk->uk_ipers,
2791                     (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
2792                     totalfree,
2793                     (unsigned long long)z->uz_allocs);
2794                 ZONE_UNLOCK(z);
2795                 for (p = offset + 12; p > offset && *p == ' '; --p)
2796                         /* nothing */ ;
2797                 p[1] = ':';
2798                 cnt--;
2799                 offset += len;
2800           }
2801         }
2802         mtx_unlock(&uma_mtx);
2803         *offset++ = '\0';
2804         error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2805 out:
2806         FREE(tmpbuf, M_TEMP);
2807         return (error);
2808 }