2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2002-2019 Jeffrey Roberson <jeff@FreeBSD.org>
5 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6 * Copyright (c) 2004-2006 Robert N. M. Watson
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice unmodified, this list of conditions, and the following
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * uma_core.c Implementation of the Universal Memory allocator
34 * This allocator is intended to replace the multitude of similar object caches
35 * in the standard FreeBSD kernel. The intent is to be flexible as well as
36 * efficient. A primary design goal is to return unused memory to the rest of
37 * the system. This will make the system as a whole more flexible due to the
38 * ability to move memory to subsystems which most need it instead of leaving
39 * pools of reserved memory unused.
41 * The basic ideas stem from similar slab/zone based allocators whose algorithms
48 * - Improve memory usage for large allocations
49 * - Investigate cache size adjustments
52 #include <sys/cdefs.h>
53 __FBSDID("$FreeBSD$");
56 #include "opt_param.h"
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/bitset.h>
62 #include <sys/domainset.h>
63 #include <sys/eventhandler.h>
64 #include <sys/kernel.h>
65 #include <sys/types.h>
66 #include <sys/limits.h>
67 #include <sys/queue.h>
68 #include <sys/malloc.h>
71 #include <sys/sysctl.h>
72 #include <sys/mutex.h>
74 #include <sys/random.h>
75 #include <sys/rwlock.h>
77 #include <sys/sched.h>
78 #include <sys/sleepqueue.h>
80 #include <sys/taskqueue.h>
81 #include <sys/vmmeter.h>
84 #include <vm/vm_domainset.h>
85 #include <vm/vm_object.h>
86 #include <vm/vm_page.h>
87 #include <vm/vm_pageout.h>
88 #include <vm/vm_param.h>
89 #include <vm/vm_phys.h>
90 #include <vm/vm_pagequeue.h>
91 #include <vm/vm_map.h>
92 #include <vm/vm_kern.h>
93 #include <vm/vm_extern.h>
95 #include <vm/uma_int.h>
96 #include <vm/uma_dbg.h>
100 #ifdef DEBUG_MEMGUARD
101 #include <vm/memguard.h>
105 * This is the zone and keg from which all zones are spawned.
107 static uma_zone_t kegs;
108 static uma_zone_t zones;
110 /* This is the zone from which all offpage uma_slab_ts are allocated. */
111 static uma_zone_t slabzone;
114 * The initial hash tables come out of this zone so they can be allocated
115 * prior to malloc coming up.
117 static uma_zone_t hashzone;
119 /* The boot-time adjusted value for cache line alignment. */
120 int uma_align_cache = 64 - 1;
122 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
123 static MALLOC_DEFINE(M_UMA, "UMA", "UMA Misc");
126 * Are we allowed to allocate buckets?
128 static int bucketdisable = 1;
130 /* Linked list of all kegs in the system */
131 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
133 /* Linked list of all cache-only zones in the system */
134 static LIST_HEAD(,uma_zone) uma_cachezones =
135 LIST_HEAD_INITIALIZER(uma_cachezones);
137 /* This RW lock protects the keg list */
138 static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
141 * Pointer and counter to pool of pages, that is preallocated at
142 * startup to bootstrap UMA.
144 static char *bootmem;
145 static int boot_pages;
147 static struct sx uma_reclaim_lock;
150 * kmem soft limit, initialized by uma_set_limit(). Ensure that early
151 * allocations don't trigger a wakeup of the reclaim thread.
153 unsigned long uma_kmem_limit = LONG_MAX;
154 SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_limit, CTLFLAG_RD, &uma_kmem_limit, 0,
155 "UMA kernel memory soft limit");
156 unsigned long uma_kmem_total;
157 SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0,
158 "UMA kernel memory usage");
160 /* Is the VM done starting up? */
161 static enum { BOOT_COLD = 0, BOOT_STRAPPED, BOOT_PAGEALLOC, BOOT_BUCKETS,
162 BOOT_RUNNING } booted = BOOT_COLD;
165 * This is the handle used to schedule events that need to happen
166 * outside of the allocation fast path.
168 static struct callout uma_callout;
169 #define UMA_TIMEOUT 20 /* Seconds for callout interval. */
172 * This structure is passed as the zone ctor arg so that I don't have to create
173 * a special allocation function just for zones.
175 struct uma_zctor_args {
190 struct uma_kctor_args {
199 struct uma_bucket_zone {
202 int ubz_entries; /* Number of items it can hold. */
203 int ubz_maxsize; /* Maximum allocation size per-item. */
207 * Compute the actual number of bucket entries to pack them in power
208 * of two sizes for more efficient space utilization.
210 #define BUCKET_SIZE(n) \
211 (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
213 #define BUCKET_MAX BUCKET_SIZE(256)
214 #define BUCKET_MIN BUCKET_SIZE(4)
216 struct uma_bucket_zone bucket_zones[] = {
217 { NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
218 { NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
219 { NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
220 { NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
221 { NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
222 { NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
223 { NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
224 { NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
225 { NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
230 * Flags and enumerations to be passed to internal functions.
234 SKIP_CNT = 0x00000001,
235 SKIP_DTOR = 0x00010000,
236 SKIP_FINI = 0x00020000,
241 int uma_startup_count(int);
242 void uma_startup(void *, int);
243 void uma_startup1(void);
244 void uma_startup2(void);
246 static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
247 static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
248 static void *pcpu_page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
249 static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
250 static void page_free(void *, vm_size_t, uint8_t);
251 static void pcpu_page_free(void *, vm_size_t, uint8_t);
252 static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int);
253 static void cache_drain(uma_zone_t);
254 static void bucket_drain(uma_zone_t, uma_bucket_t);
255 static void bucket_cache_reclaim(uma_zone_t zone, bool);
256 static int keg_ctor(void *, int, void *, int);
257 static void keg_dtor(void *, int, void *);
258 static int zone_ctor(void *, int, void *, int);
259 static void zone_dtor(void *, int, void *);
260 static int zero_init(void *, int, int);
261 static void keg_small_init(uma_keg_t keg);
262 static void keg_large_init(uma_keg_t keg);
263 static void zone_foreach(void (*zfunc)(uma_zone_t, void *), void *);
264 static void zone_timeout(uma_zone_t zone, void *);
265 static int hash_alloc(struct uma_hash *, u_int);
266 static int hash_expand(struct uma_hash *, struct uma_hash *);
267 static void hash_free(struct uma_hash *hash);
268 static void uma_timeout(void *);
269 static void uma_startup3(void);
270 static void *zone_alloc_item(uma_zone_t, void *, int, int);
271 static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
272 static int zone_alloc_limit(uma_zone_t zone, int count, int flags);
273 static void zone_free_limit(uma_zone_t zone, int count);
274 static void bucket_enable(void);
275 static void bucket_init(void);
276 static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
277 static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
278 static void bucket_zone_drain(void);
279 static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
280 static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
281 static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item);
282 static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
283 uma_fini fini, int align, uint32_t flags);
284 static int zone_import(void *, void **, int, int, int);
285 static void zone_release(void *, void **, int);
286 static bool cache_alloc(uma_zone_t, uma_cache_t, void *, int);
287 static bool cache_free(uma_zone_t, uma_cache_t, void *, void *, int);
289 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
290 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
291 static int sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS);
292 static int sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS);
293 static int sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS);
294 static int sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS);
295 static int sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS);
298 static inline struct noslabbits *slab_dbg_bits(uma_slab_t slab, uma_keg_t keg);
300 static bool uma_dbg_kskip(uma_keg_t keg, void *mem);
301 static bool uma_dbg_zskip(uma_zone_t zone, void *mem);
302 static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
303 static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
305 static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD, 0,
306 "Memory allocation debugging");
308 static u_int dbg_divisor = 1;
309 SYSCTL_UINT(_vm_debug, OID_AUTO, divisor,
310 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0,
311 "Debug & thrash every this item in memory allocator");
313 static counter_u64_t uma_dbg_cnt = EARLY_COUNTER;
314 static counter_u64_t uma_skip_cnt = EARLY_COUNTER;
315 SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD,
316 &uma_dbg_cnt, "memory items debugged");
317 SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD,
318 &uma_skip_cnt, "memory items skipped, not debugged");
321 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
323 SYSCTL_NODE(_vm, OID_AUTO, uma, CTLFLAG_RW, 0, "Universal Memory Allocator");
325 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
326 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
328 SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
329 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
331 static int zone_warnings = 1;
332 SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
333 "Warn when UMA zones becomes full");
336 * This routine checks to see whether or not it's safe to enable buckets.
342 KASSERT(booted >= BOOT_BUCKETS, ("Bucket enable before init"));
343 bucketdisable = vm_page_count_min();
347 * Initialize bucket_zones, the array of zones of buckets of various sizes.
349 * For each zone, calculate the memory required for each bucket, consisting
350 * of the header and an array of pointers.
355 struct uma_bucket_zone *ubz;
358 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
359 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
360 size += sizeof(void *) * ubz->ubz_entries;
361 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
362 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
363 UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | UMA_ZONE_NUMA);
368 * Given a desired number of entries for a bucket, return the zone from which
369 * to allocate the bucket.
371 static struct uma_bucket_zone *
372 bucket_zone_lookup(int entries)
374 struct uma_bucket_zone *ubz;
376 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
377 if (ubz->ubz_entries >= entries)
383 static struct uma_bucket_zone *
384 bucket_zone_max(uma_zone_t zone, int nitems)
386 struct uma_bucket_zone *ubz;
391 if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
392 /* Count the cross-domain bucket. */
396 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
397 if (ubz->ubz_entries * bpcpu * mp_ncpus > nitems)
399 if (ubz == &bucket_zones[0])
407 bucket_select(int size)
409 struct uma_bucket_zone *ubz;
411 ubz = &bucket_zones[0];
412 if (size > ubz->ubz_maxsize)
413 return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
415 for (; ubz->ubz_entries != 0; ubz++)
416 if (ubz->ubz_maxsize < size)
419 return (ubz->ubz_entries);
423 bucket_alloc(uma_zone_t zone, void *udata, int flags)
425 struct uma_bucket_zone *ubz;
429 * This is to stop us from allocating per cpu buckets while we're
430 * running out of vm.boot_pages. Otherwise, we would exhaust the
431 * boot pages. This also prevents us from allocating buckets in
432 * low memory situations.
437 * To limit bucket recursion we store the original zone flags
438 * in a cookie passed via zalloc_arg/zfree_arg. This allows the
439 * NOVM flag to persist even through deep recursions. We also
440 * store ZFLAG_BUCKET once we have recursed attempting to allocate
441 * a bucket for a bucket zone so we do not allow infinite bucket
442 * recursion. This cookie will even persist to frees of unused
443 * buckets via the allocation path or bucket allocations in the
446 if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
447 udata = (void *)(uintptr_t)zone->uz_flags;
449 if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
451 udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
453 if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
455 ubz = bucket_zone_lookup(zone->uz_bucket_size);
456 if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
458 bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
461 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
464 bucket->ub_entries = ubz->ubz_entries;
471 bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
473 struct uma_bucket_zone *ubz;
475 KASSERT(bucket->ub_cnt == 0,
476 ("bucket_free: Freeing a non free bucket."));
477 if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
478 udata = (void *)(uintptr_t)zone->uz_flags;
479 ubz = bucket_zone_lookup(bucket->ub_entries);
480 uma_zfree_arg(ubz->ubz_zone, bucket, udata);
484 bucket_zone_drain(void)
486 struct uma_bucket_zone *ubz;
488 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
489 uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN);
493 * Attempt to satisfy an allocation by retrieving a full bucket from one of the
497 zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom)
501 ZONE_LOCK_ASSERT(zone);
503 if ((bucket = TAILQ_FIRST(&zdom->uzd_buckets)) != NULL) {
504 MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
505 TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
506 zdom->uzd_nitems -= bucket->ub_cnt;
507 if (zdom->uzd_imin > zdom->uzd_nitems)
508 zdom->uzd_imin = zdom->uzd_nitems;
509 zone->uz_bkt_count -= bucket->ub_cnt;
515 * Insert a full bucket into the specified cache. The "ws" parameter indicates
516 * whether the bucket's contents should be counted as part of the zone's working
520 zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
524 ZONE_LOCK_ASSERT(zone);
525 KASSERT(!ws || zone->uz_bkt_count < zone->uz_bkt_max,
526 ("%s: zone %p overflow", __func__, zone));
529 TAILQ_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
531 TAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link);
532 zdom->uzd_nitems += bucket->ub_cnt;
533 if (ws && zdom->uzd_imax < zdom->uzd_nitems)
534 zdom->uzd_imax = zdom->uzd_nitems;
535 zone->uz_bkt_count += bucket->ub_cnt;
538 /* Pops an item out of a per-cpu cache bucket. */
540 cache_bucket_pop(uma_cache_t cache, uma_cache_bucket_t bucket)
544 CRITICAL_ASSERT(curthread);
547 item = bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt];
549 bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = NULL;
550 KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
557 /* Pushes an item into a per-cpu cache bucket. */
559 cache_bucket_push(uma_cache_t cache, uma_cache_bucket_t bucket, void *item)
562 CRITICAL_ASSERT(curthread);
563 KASSERT(bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] == NULL,
564 ("uma_zfree: Freeing to non free bucket index."));
566 bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = item;
572 * Unload a UMA bucket from a per-cpu cache.
574 static inline uma_bucket_t
575 cache_bucket_unload(uma_cache_bucket_t bucket)
579 b = bucket->ucb_bucket;
581 MPASS(b->ub_entries == bucket->ucb_entries);
582 b->ub_cnt = bucket->ucb_cnt;
583 bucket->ucb_bucket = NULL;
584 bucket->ucb_entries = bucket->ucb_cnt = 0;
590 static inline uma_bucket_t
591 cache_bucket_unload_alloc(uma_cache_t cache)
594 return (cache_bucket_unload(&cache->uc_allocbucket));
597 static inline uma_bucket_t
598 cache_bucket_unload_free(uma_cache_t cache)
601 return (cache_bucket_unload(&cache->uc_freebucket));
604 static inline uma_bucket_t
605 cache_bucket_unload_cross(uma_cache_t cache)
608 return (cache_bucket_unload(&cache->uc_crossbucket));
612 * Load a bucket into a per-cpu cache bucket.
615 cache_bucket_load(uma_cache_bucket_t bucket, uma_bucket_t b)
618 CRITICAL_ASSERT(curthread);
619 MPASS(bucket->ucb_bucket == NULL);
621 bucket->ucb_bucket = b;
622 bucket->ucb_cnt = b->ub_cnt;
623 bucket->ucb_entries = b->ub_entries;
627 cache_bucket_load_alloc(uma_cache_t cache, uma_bucket_t b)
630 cache_bucket_load(&cache->uc_allocbucket, b);
634 cache_bucket_load_free(uma_cache_t cache, uma_bucket_t b)
637 cache_bucket_load(&cache->uc_freebucket, b);
642 cache_bucket_load_cross(uma_cache_t cache, uma_bucket_t b)
645 cache_bucket_load(&cache->uc_crossbucket, b);
650 * Copy and preserve ucb_spare.
653 cache_bucket_copy(uma_cache_bucket_t b1, uma_cache_bucket_t b2)
656 b1->ucb_bucket = b2->ucb_bucket;
657 b1->ucb_entries = b2->ucb_entries;
658 b1->ucb_cnt = b2->ucb_cnt;
662 * Swap two cache buckets.
665 cache_bucket_swap(uma_cache_bucket_t b1, uma_cache_bucket_t b2)
667 struct uma_cache_bucket b3;
669 CRITICAL_ASSERT(curthread);
671 cache_bucket_copy(&b3, b1);
672 cache_bucket_copy(b1, b2);
673 cache_bucket_copy(b2, &b3);
677 zone_log_warning(uma_zone_t zone)
679 static const struct timeval warninterval = { 300, 0 };
681 if (!zone_warnings || zone->uz_warning == NULL)
684 if (ratecheck(&zone->uz_ratecheck, &warninterval))
685 printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
689 zone_maxaction(uma_zone_t zone)
692 if (zone->uz_maxaction.ta_func != NULL)
693 taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
697 * Routine called by timeout which is used to fire off some time interval
698 * based calculations. (stats, hash size, etc.)
707 uma_timeout(void *unused)
710 zone_foreach(zone_timeout, NULL);
712 /* Reschedule this event */
713 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
717 * Update the working set size estimate for the zone's bucket cache.
718 * The constants chosen here are somewhat arbitrary. With an update period of
719 * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the
723 zone_domain_update_wss(uma_zone_domain_t zdom)
727 MPASS(zdom->uzd_imax >= zdom->uzd_imin);
728 wss = zdom->uzd_imax - zdom->uzd_imin;
729 zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
730 zdom->uzd_wss = (4 * wss + zdom->uzd_wss) / 5;
734 * Routine to perform timeout driven calculations. This expands the
735 * hashes and does per cpu statistics aggregation.
740 zone_timeout(uma_zone_t zone, void *unused)
745 if ((zone->uz_flags & UMA_ZONE_HASH) == 0)
751 * Hash zones are non-numa by definition so the first domain
752 * is the only one present.
755 pages = keg->uk_domain[0].ud_pages;
758 * Expand the keg hash table.
760 * This is done if the number of slabs is larger than the hash size.
761 * What I'm trying to do here is completely reduce collisions. This
762 * may be a little aggressive. Should I allow for two collisions max?
764 if ((slabs = pages / keg->uk_ppera) > keg->uk_hash.uh_hashsize) {
765 struct uma_hash newhash;
766 struct uma_hash oldhash;
770 * This is so involved because allocating and freeing
771 * while the keg lock is held will lead to deadlock.
772 * I have to do everything in stages and check for
776 ret = hash_alloc(&newhash, 1 << fls(slabs));
779 if (hash_expand(&keg->uk_hash, &newhash)) {
780 oldhash = keg->uk_hash;
781 keg->uk_hash = newhash;
794 for (int i = 0; i < vm_ndomains; i++)
795 zone_domain_update_wss(&zone->uz_domain[i]);
800 * Allocate and zero fill the next sized hash table from the appropriate
804 * hash A new hash structure with the old hash size in uh_hashsize
807 * 1 on success and 0 on failure.
810 hash_alloc(struct uma_hash *hash, u_int size)
814 KASSERT(powerof2(size), ("hash size must be power of 2"));
815 if (size > UMA_HASH_SIZE_INIT) {
816 hash->uh_hashsize = size;
817 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
818 hash->uh_slab_hash = malloc(alloc, M_UMAHASH, M_NOWAIT);
820 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
821 hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
822 UMA_ANYDOMAIN, M_WAITOK);
823 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
825 if (hash->uh_slab_hash) {
826 bzero(hash->uh_slab_hash, alloc);
827 hash->uh_hashmask = hash->uh_hashsize - 1;
835 * Expands the hash table for HASH zones. This is done from zone_timeout
836 * to reduce collisions. This must not be done in the regular allocation
837 * path, otherwise, we can recurse on the vm while allocating pages.
840 * oldhash The hash you want to expand
841 * newhash The hash structure for the new table
849 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
851 uma_hash_slab_t slab;
855 if (!newhash->uh_slab_hash)
858 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
862 * I need to investigate hash algorithms for resizing without a
866 for (idx = 0; idx < oldhash->uh_hashsize; idx++)
867 while (!LIST_EMPTY(&oldhash->uh_slab_hash[idx])) {
868 slab = LIST_FIRST(&oldhash->uh_slab_hash[idx]);
869 LIST_REMOVE(slab, uhs_hlink);
870 hval = UMA_HASH(newhash, slab->uhs_data);
871 LIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
879 * Free the hash bucket to the appropriate backing store.
882 * slab_hash The hash bucket we're freeing
883 * hashsize The number of entries in that hash bucket
889 hash_free(struct uma_hash *hash)
891 if (hash->uh_slab_hash == NULL)
893 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
894 zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
896 free(hash->uh_slab_hash, M_UMAHASH);
900 * Frees all outstanding items in a bucket
903 * zone The zone to free to, must be unlocked.
904 * bucket The free/alloc bucket with items.
911 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
915 if (bucket == NULL || bucket->ub_cnt == 0)
919 for (i = 0; i < bucket->ub_cnt; i++)
920 zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
921 zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
922 if (zone->uz_max_items > 0)
923 zone_free_limit(zone, bucket->ub_cnt);
928 * Drains the per cpu caches for a zone.
930 * NOTE: This may only be called while the zone is being torn down, and not
931 * during normal operation. This is necessary in order that we do not have
932 * to migrate CPUs to drain the per-CPU caches.
935 * zone The zone to drain, must be unlocked.
941 cache_drain(uma_zone_t zone)
948 * XXX: It is safe to not lock the per-CPU caches, because we're
949 * tearing down the zone anyway. I.e., there will be no further use
950 * of the caches at this point.
952 * XXX: It would good to be able to assert that the zone is being
953 * torn down to prevent improper use of cache_drain().
955 * XXX: We lock the zone before passing into bucket_cache_reclaim() as
956 * it is used elsewhere. Should the tear-down path be made special
957 * there in some form?
960 cache = &zone->uz_cpu[cpu];
961 bucket = cache_bucket_unload_alloc(cache);
962 if (bucket != NULL) {
963 bucket_drain(zone, bucket);
964 bucket_free(zone, bucket, NULL);
966 bucket = cache_bucket_unload_free(cache);
967 if (bucket != NULL) {
968 bucket_drain(zone, bucket);
969 bucket_free(zone, bucket, NULL);
971 bucket = cache_bucket_unload_cross(cache);
972 if (bucket != NULL) {
973 bucket_drain(zone, bucket);
974 bucket_free(zone, bucket, NULL);
978 bucket_cache_reclaim(zone, true);
983 cache_shrink(uma_zone_t zone, void *unused)
986 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
990 zone->uz_bucket_size =
991 (zone->uz_bucket_size_min + zone->uz_bucket_size) / 2;
996 cache_drain_safe_cpu(uma_zone_t zone, void *unused)
999 uma_bucket_t b1, b2, b3;
1002 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1005 b1 = b2 = b3 = NULL;
1008 if (zone->uz_flags & UMA_ZONE_NUMA)
1009 domain = PCPU_GET(domain);
1012 cache = &zone->uz_cpu[curcpu];
1013 b1 = cache_bucket_unload_alloc(cache);
1014 if (b1 != NULL && b1->ub_cnt != 0) {
1015 zone_put_bucket(zone, &zone->uz_domain[domain], b1, false);
1018 b2 = cache_bucket_unload_free(cache);
1019 if (b2 != NULL && b2->ub_cnt != 0) {
1020 zone_put_bucket(zone, &zone->uz_domain[domain], b2, false);
1023 b3 = cache_bucket_unload_cross(cache);
1027 bucket_free(zone, b1, NULL);
1029 bucket_free(zone, b2, NULL);
1031 bucket_drain(zone, b3);
1032 bucket_free(zone, b3, NULL);
1037 * Safely drain per-CPU caches of a zone(s) to alloc bucket.
1038 * This is an expensive call because it needs to bind to all CPUs
1039 * one by one and enter a critical section on each of them in order
1040 * to safely access their cache buckets.
1041 * Zone lock must not be held on call this function.
1044 pcpu_cache_drain_safe(uma_zone_t zone)
1049 * Polite bucket sizes shrinking was not enough, shrink aggressively.
1052 cache_shrink(zone, NULL);
1054 zone_foreach(cache_shrink, NULL);
1057 thread_lock(curthread);
1058 sched_bind(curthread, cpu);
1059 thread_unlock(curthread);
1062 cache_drain_safe_cpu(zone, NULL);
1064 zone_foreach(cache_drain_safe_cpu, NULL);
1066 thread_lock(curthread);
1067 sched_unbind(curthread);
1068 thread_unlock(curthread);
1072 * Reclaim cached buckets from a zone. All buckets are reclaimed if the caller
1073 * requested a drain, otherwise the per-domain caches are trimmed to either
1074 * estimated working set size.
1077 bucket_cache_reclaim(uma_zone_t zone, bool drain)
1079 uma_zone_domain_t zdom;
1080 uma_bucket_t bucket;
1081 long target, tofree;
1084 for (i = 0; i < vm_ndomains; i++) {
1085 zdom = &zone->uz_domain[i];
1088 * If we were asked to drain the zone, we are done only once
1089 * this bucket cache is empty. Otherwise, we reclaim items in
1090 * excess of the zone's estimated working set size. If the
1091 * difference nitems - imin is larger than the WSS estimate,
1092 * then the estimate will grow at the end of this interval and
1093 * we ignore the historical average.
1095 target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems -
1097 while (zdom->uzd_nitems > target) {
1098 bucket = TAILQ_LAST(&zdom->uzd_buckets, uma_bucketlist);
1101 tofree = bucket->ub_cnt;
1102 TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link);
1103 zdom->uzd_nitems -= tofree;
1106 * Shift the bounds of the current WSS interval to avoid
1107 * perturbing the estimate.
1109 zdom->uzd_imax -= lmin(zdom->uzd_imax, tofree);
1110 zdom->uzd_imin -= lmin(zdom->uzd_imin, tofree);
1113 bucket_drain(zone, bucket);
1114 bucket_free(zone, bucket, NULL);
1120 * Shrink the zone bucket size to ensure that the per-CPU caches
1121 * don't grow too large.
1123 if (zone->uz_bucket_size > zone->uz_bucket_size_min)
1124 zone->uz_bucket_size--;
1128 keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
1134 CTR4(KTR_UMA, "keg_free_slab keg %s(%p) slab %p, returning %d bytes",
1135 keg->uk_name, keg, slab, PAGE_SIZE * keg->uk_ppera);
1137 mem = slab_data(slab, keg);
1138 flags = slab->us_flags;
1140 if (keg->uk_fini != NULL) {
1141 for (i--; i > -1; i--)
1144 * trash_fini implies that dtor was trash_dtor. trash_fini
1145 * would check that memory hasn't been modified since free,
1146 * which executed trash_dtor.
1147 * That's why we need to run uma_dbg_kskip() check here,
1148 * albeit we don't make skip check for other init/fini
1151 if (!uma_dbg_kskip(keg, slab_item(slab, keg, i)) ||
1152 keg->uk_fini != trash_fini)
1154 keg->uk_fini(slab_item(slab, keg, i), keg->uk_size);
1156 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1157 zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1158 keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
1159 uma_total_dec(PAGE_SIZE * keg->uk_ppera);
1163 * Frees pages from a keg back to the system. This is done on demand from
1164 * the pageout daemon.
1169 keg_drain(uma_keg_t keg)
1171 struct slabhead freeslabs = { 0 };
1173 uma_slab_t slab, tmp;
1177 * We don't want to take pages from statically allocated kegs at this
1180 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
1183 for (i = 0; i < vm_ndomains; i++) {
1184 CTR4(KTR_UMA, "keg_drain %s(%p) domain %d free items: %u",
1185 keg->uk_name, keg, i, dom->ud_free);
1187 dom = &keg->uk_domain[i];
1189 LIST_FOREACH_SAFE(slab, &dom->ud_free_slab, us_link, tmp) {
1190 /* We have nowhere to free these to. */
1191 if (slab->us_flags & UMA_SLAB_BOOT)
1193 if (keg->uk_flags & UMA_ZONE_HASH)
1194 UMA_HASH_REMOVE(&keg->uk_hash, slab);
1196 LIST_REMOVE(slab, us_link);
1197 LIST_INSERT_HEAD(&freeslabs, slab, us_link);
1199 dom->ud_pages -= n * keg->uk_ppera;
1200 dom->ud_free -= n * keg->uk_ipers;
1204 while ((slab = LIST_FIRST(&freeslabs)) != NULL) {
1205 LIST_REMOVE(slab, us_link);
1206 keg_free_slab(keg, slab, keg->uk_ipers);
1211 zone_reclaim(uma_zone_t zone, int waitok, bool drain)
1215 * Set draining to interlock with zone_dtor() so we can release our
1216 * locks as we go. Only dtor() should do a WAITOK call since it
1217 * is the only call that knows the structure will still be available
1221 while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) {
1222 if (waitok == M_NOWAIT)
1224 msleep(zone, &zone->uz_lock, PVM, "zonedrain", 1);
1226 zone->uz_flags |= UMA_ZFLAG_RECLAIMING;
1227 bucket_cache_reclaim(zone, drain);
1231 * The DRAINING flag protects us from being freed while
1232 * we're running. Normally the uma_rwlock would protect us but we
1233 * must be able to release and acquire the right lock for each keg.
1235 if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0)
1236 keg_drain(zone->uz_keg);
1238 zone->uz_flags &= ~UMA_ZFLAG_RECLAIMING;
1245 zone_drain(uma_zone_t zone, void *unused)
1248 zone_reclaim(zone, M_NOWAIT, true);
1252 zone_trim(uma_zone_t zone, void *unused)
1255 zone_reclaim(zone, M_NOWAIT, false);
1259 * Allocate a new slab for a keg and inserts it into the partial slab list.
1260 * The keg should be unlocked on entry. If the allocation succeeds it will
1261 * be locked on return.
1264 * flags Wait flags for the item initialization routine
1265 * aflags Wait flags for the slab allocation
1268 * The slab that was allocated or NULL if there is no memory and the
1269 * caller specified M_NOWAIT.
1272 keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int flags,
1283 KASSERT(domain >= 0 && domain < vm_ndomains,
1284 ("keg_alloc_slab: domain %d out of range", domain));
1286 allocf = keg->uk_allocf;
1289 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1290 slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, aflags);
1296 * This reproduces the old vm_zone behavior of zero filling pages the
1297 * first time they are added to a zone.
1299 * Malloced items are zeroed in uma_zalloc.
1302 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1307 if (keg->uk_flags & UMA_ZONE_NODUMP)
1310 /* zone is passed for legacy reasons. */
1311 size = keg->uk_ppera * PAGE_SIZE;
1312 mem = allocf(zone, size, domain, &sflags, aflags);
1314 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1315 zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1318 uma_total_inc(size);
1320 /* For HASH zones all pages go to the same uma_domain. */
1321 if ((keg->uk_flags & UMA_ZONE_HASH) != 0)
1324 /* Point the slab into the allocated memory */
1325 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
1326 slab = (uma_slab_t )(mem + keg->uk_pgoff);
1328 ((uma_hash_slab_t)slab)->uhs_data = mem;
1330 if (keg->uk_flags & UMA_ZONE_VTOSLAB)
1331 for (i = 0; i < keg->uk_ppera; i++)
1332 vsetzoneslab((vm_offset_t)mem + (i * PAGE_SIZE),
1335 slab->us_freecount = keg->uk_ipers;
1336 slab->us_flags = sflags;
1337 slab->us_domain = domain;
1339 BIT_FILL(keg->uk_ipers, &slab->us_free);
1341 BIT_ZERO(keg->uk_ipers, slab_dbg_bits(slab, keg));
1344 if (keg->uk_init != NULL) {
1345 for (i = 0; i < keg->uk_ipers; i++)
1346 if (keg->uk_init(slab_item(slab, keg, i),
1347 keg->uk_size, flags) != 0)
1349 if (i != keg->uk_ipers) {
1350 keg_free_slab(keg, slab, i);
1354 KEG_LOCK(keg, domain);
1356 CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)",
1357 slab, keg->uk_name, keg);
1359 if (keg->uk_flags & UMA_ZONE_HASH)
1360 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
1363 * If we got a slab here it's safe to mark it partially used
1364 * and return. We assume that the caller is going to remove
1365 * at least one item.
1367 dom = &keg->uk_domain[domain];
1368 LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
1369 dom->ud_pages += keg->uk_ppera;
1370 dom->ud_free += keg->uk_ipers;
1379 * This function is intended to be used early on in place of page_alloc() so
1380 * that we may use the boot time page cache to satisfy allocations before
1384 startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1393 * If we are in BOOT_BUCKETS or higher, than switch to real
1394 * allocator. Zones with page sized slabs switch at BOOT_PAGEALLOC.
1400 case BOOT_PAGEALLOC:
1401 if (keg->uk_ppera > 1)
1405 #ifdef UMA_MD_SMALL_ALLOC
1406 keg->uk_allocf = (keg->uk_ppera > 1) ?
1407 page_alloc : uma_small_alloc;
1409 keg->uk_allocf = page_alloc;
1411 return keg->uk_allocf(zone, bytes, domain, pflag, wait);
1415 * Check our small startup cache to see if it has pages remaining.
1417 pages = howmany(bytes, PAGE_SIZE);
1418 KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__));
1419 if (pages > boot_pages)
1420 panic("UMA zone \"%s\": Increase vm.boot_pages", zone->uz_name);
1422 printf("%s from \"%s\", %d boot pages left\n", __func__, zone->uz_name,
1426 boot_pages -= pages;
1427 bootmem += pages * PAGE_SIZE;
1428 *pflag = UMA_SLAB_BOOT;
1434 * Allocates a number of pages from the system
1437 * bytes The number of bytes requested
1438 * wait Shall we wait?
1441 * A pointer to the alloced memory or possibly
1442 * NULL if M_NOWAIT is set.
1445 page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1448 void *p; /* Returned page */
1450 *pflag = UMA_SLAB_KERNEL;
1451 p = (void *)kmem_malloc_domainset(DOMAINSET_FIXED(domain), bytes, wait);
1457 pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
1460 struct pglist alloctail;
1461 vm_offset_t addr, zkva;
1463 vm_page_t p, p_next;
1468 MPASS(bytes == (mp_maxid + 1) * PAGE_SIZE);
1470 TAILQ_INIT(&alloctail);
1471 flags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
1472 malloc2vm_flags(wait);
1473 *pflag = UMA_SLAB_KERNEL;
1474 for (cpu = 0; cpu <= mp_maxid; cpu++) {
1475 if (CPU_ABSENT(cpu)) {
1476 p = vm_page_alloc(NULL, 0, flags);
1479 p = vm_page_alloc(NULL, 0, flags);
1481 pc = pcpu_find(cpu);
1482 p = vm_page_alloc_domain(NULL, 0, pc->pc_domain, flags);
1483 if (__predict_false(p == NULL))
1484 p = vm_page_alloc(NULL, 0, flags);
1487 if (__predict_false(p == NULL))
1489 TAILQ_INSERT_TAIL(&alloctail, p, listq);
1491 if ((addr = kva_alloc(bytes)) == 0)
1494 TAILQ_FOREACH(p, &alloctail, listq) {
1495 pmap_qenter(zkva, &p, 1);
1498 return ((void*)addr);
1500 TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
1501 vm_page_unwire_noq(p);
1508 * Allocates a number of pages from within an object
1511 * bytes The number of bytes requested
1512 * wait Shall we wait?
1515 * A pointer to the alloced memory or possibly
1516 * NULL if M_NOWAIT is set.
1519 noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags,
1522 TAILQ_HEAD(, vm_page) alloctail;
1524 vm_offset_t retkva, zkva;
1525 vm_page_t p, p_next;
1528 TAILQ_INIT(&alloctail);
1531 npages = howmany(bytes, PAGE_SIZE);
1532 while (npages > 0) {
1533 p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT |
1534 VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
1535 ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK :
1539 * Since the page does not belong to an object, its
1542 TAILQ_INSERT_TAIL(&alloctail, p, listq);
1547 * Page allocation failed, free intermediate pages and
1550 TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
1551 vm_page_unwire_noq(p);
1556 *flags = UMA_SLAB_PRIV;
1557 zkva = keg->uk_kva +
1558 atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1560 TAILQ_FOREACH(p, &alloctail, listq) {
1561 pmap_qenter(zkva, &p, 1);
1565 return ((void *)retkva);
1569 * Frees a number of pages to the system
1572 * mem A pointer to the memory to be freed
1573 * size The size of the memory being freed
1574 * flags The original p->us_flags field
1580 page_free(void *mem, vm_size_t size, uint8_t flags)
1583 if ((flags & UMA_SLAB_KERNEL) == 0)
1584 panic("UMA: page_free used with invalid flags %x", flags);
1586 kmem_free((vm_offset_t)mem, size);
1590 * Frees pcpu zone allocations
1593 * mem A pointer to the memory to be freed
1594 * size The size of the memory being freed
1595 * flags The original p->us_flags field
1601 pcpu_page_free(void *mem, vm_size_t size, uint8_t flags)
1603 vm_offset_t sva, curva;
1607 MPASS(size == (mp_maxid+1)*PAGE_SIZE);
1608 sva = (vm_offset_t)mem;
1609 for (curva = sva; curva < sva + size; curva += PAGE_SIZE) {
1610 paddr = pmap_kextract(curva);
1611 m = PHYS_TO_VM_PAGE(paddr);
1612 vm_page_unwire_noq(m);
1615 pmap_qremove(sva, size >> PAGE_SHIFT);
1616 kva_free(sva, size);
1621 * Zero fill initializer
1623 * Arguments/Returns follow uma_init specifications
1626 zero_init(void *mem, int size, int flags)
1634 slab_dbg_bits(uma_slab_t slab, uma_keg_t keg)
1637 return ((void *)((char *)&slab->us_free + BITSET_SIZE(keg->uk_ipers)));
1642 * Actual size of embedded struct slab (!OFFPAGE).
1645 slab_sizeof(int nitems)
1649 s = sizeof(struct uma_slab) + BITSET_SIZE(nitems) * SLAB_BITSETS;
1650 return (roundup(s, UMA_ALIGN_PTR + 1));
1654 * Size of memory for embedded slabs (!OFFPAGE).
1657 slab_space(int nitems)
1659 return (UMA_SLAB_SIZE - slab_sizeof(nitems));
1663 * Compute the number of items that will fit in an embedded (!OFFPAGE) slab
1664 * with a given size and alignment.
1667 slab_ipers(size_t size, int align)
1673 * Compute the ideal number of items that will fit in a page and
1674 * then compute the actual number based on a bitset nitems wide.
1676 rsize = roundup(size, align + 1);
1677 nitems = UMA_SLAB_SIZE / rsize;
1678 return (slab_space(nitems) / rsize);
1682 * Finish creating a small uma keg. This calculates ipers, and the keg size.
1685 * keg The zone we should initialize
1691 keg_small_init(uma_keg_t keg)
1699 if (keg->uk_flags & UMA_ZONE_PCPU) {
1700 u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
1702 slabsize = UMA_PCPU_ALLOC_SIZE;
1703 keg->uk_ppera = ncpus;
1705 slabsize = UMA_SLAB_SIZE;
1710 * Calculate the size of each allocation (rsize) according to
1711 * alignment. If the requested size is smaller than we have
1712 * allocation bits for we round it up.
1714 rsize = keg->uk_size;
1715 if (rsize < slabsize / SLAB_MAX_SETSIZE)
1716 rsize = slabsize / SLAB_MAX_SETSIZE;
1717 if (rsize & keg->uk_align)
1718 rsize = roundup(rsize, keg->uk_align + 1);
1719 keg->uk_rsize = rsize;
1721 KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
1722 keg->uk_rsize < UMA_PCPU_ALLOC_SIZE,
1723 ("%s: size %u too large", __func__, keg->uk_rsize));
1726 * Use a pessimistic bit count for shsize. It may be possible to
1727 * squeeze one more item in for very particular sizes if we were
1728 * to loop and reduce the bitsize if there is waste.
1730 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1733 shsize = slab_sizeof(slabsize / rsize);
1735 if (rsize <= slabsize - shsize)
1736 keg->uk_ipers = (slabsize - shsize) / rsize;
1738 /* Handle special case when we have 1 item per slab, so
1739 * alignment requirement can be relaxed. */
1740 KASSERT(keg->uk_size <= slabsize - shsize,
1741 ("%s: size %u greater than slab", __func__, keg->uk_size));
1744 KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE,
1745 ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1747 memused = keg->uk_ipers * rsize + shsize;
1748 wastedspace = slabsize - memused;
1751 * We can't do OFFPAGE if we're internal or if we've been
1752 * asked to not go to the VM for buckets. If we do this we
1753 * may end up going to the VM for slabs which we do not
1754 * want to do if we're UMA_ZFLAG_CACHEONLY as a result
1755 * of UMA_ZONE_VM, which clearly forbids it.
1757 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1758 (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1762 * See if using an OFFPAGE slab will limit our waste. Only do
1763 * this if it permits more items per-slab.
1765 * XXX We could try growing slabsize to limit max waste as well.
1766 * Historically this was not done because the VM could not
1767 * efficiently handle contiguous allocations.
1769 if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
1770 (keg->uk_ipers < (slabsize / keg->uk_rsize))) {
1771 keg->uk_ipers = slabsize / keg->uk_rsize;
1772 KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE,
1773 ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1774 CTR6(KTR_UMA, "UMA decided we need offpage slab headers for "
1775 "keg: %s(%p), calculated wastedspace = %d, "
1776 "maximum wasted space allowed = %d, "
1777 "calculated ipers = %d, "
1778 "new wasted space = %d\n", keg->uk_name, keg, wastedspace,
1779 slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1780 slabsize - keg->uk_ipers * keg->uk_rsize);
1782 * If we had access to memory to embed a slab header we
1783 * also have a page structure to use vtoslab() instead of
1784 * hash to find slabs. If the zone was explicitly created
1785 * OFFPAGE we can't necessarily touch the memory.
1787 if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0)
1788 keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1791 if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1792 (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1793 keg->uk_flags |= UMA_ZONE_HASH;
1797 * Finish creating a large (> UMA_SLAB_SIZE) uma kegs. Just give in and do
1798 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1802 * keg The keg we should initialize
1808 keg_large_init(uma_keg_t keg)
1811 KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1812 KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1813 ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
1815 keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1817 keg->uk_rsize = keg->uk_size;
1819 /* Check whether we have enough space to not do OFFPAGE. */
1820 if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0 &&
1821 PAGE_SIZE * keg->uk_ppera - keg->uk_rsize <
1822 slab_sizeof(SLAB_MIN_SETSIZE)) {
1824 * We can't do OFFPAGE if we're internal, in which case
1825 * we need an extra page per allocation to contain the
1828 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) == 0)
1829 keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1834 if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1835 (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1836 keg->uk_flags |= UMA_ZONE_HASH;
1840 keg_cachespread_init(uma_keg_t keg)
1847 KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1848 ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1850 alignsize = keg->uk_align + 1;
1851 rsize = keg->uk_size;
1853 * We want one item to start on every align boundary in a page. To
1854 * do this we will span pages. We will also extend the item by the
1855 * size of align if it is an even multiple of align. Otherwise, it
1856 * would fall on the same boundary every time.
1858 if (rsize & keg->uk_align)
1859 rsize = (rsize & ~keg->uk_align) + alignsize;
1860 if ((rsize & alignsize) == 0)
1862 trailer = rsize - keg->uk_size;
1863 pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1864 pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1865 keg->uk_rsize = rsize;
1866 keg->uk_ppera = pages;
1867 keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1868 keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1869 KASSERT(keg->uk_ipers <= SLAB_MAX_SETSIZE,
1870 ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1875 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1876 * the keg onto the global keg list.
1878 * Arguments/Returns follow uma_ctor specifications
1879 * udata Actually uma_kctor_args
1882 keg_ctor(void *mem, int size, void *udata, int flags)
1884 struct uma_kctor_args *arg = udata;
1885 uma_keg_t keg = mem;
1890 keg->uk_size = arg->size;
1891 keg->uk_init = arg->uminit;
1892 keg->uk_fini = arg->fini;
1893 keg->uk_align = arg->align;
1894 keg->uk_reserve = 0;
1895 keg->uk_flags = arg->flags;
1896 keg->uk_slabzone = NULL;
1899 * We use a global round-robin policy by default. Zones with
1900 * UMA_ZONE_NUMA set will use first-touch instead, in which case the
1901 * iterator is never run.
1903 keg->uk_dr.dr_policy = DOMAINSET_RR();
1904 keg->uk_dr.dr_iter = 0;
1907 * The master zone is passed to us at keg-creation time.
1910 keg->uk_name = zone->uz_name;
1912 if (arg->flags & UMA_ZONE_VM)
1913 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1915 if (arg->flags & UMA_ZONE_ZINIT)
1916 keg->uk_init = zero_init;
1918 if (arg->flags & UMA_ZONE_MALLOC)
1919 keg->uk_flags |= UMA_ZONE_VTOSLAB;
1921 if (arg->flags & UMA_ZONE_PCPU)
1923 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1925 keg->uk_flags &= ~UMA_ZONE_PCPU;
1928 if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1929 keg_cachespread_init(keg);
1931 if (keg->uk_size > slab_space(SLAB_MIN_SETSIZE))
1932 keg_large_init(keg);
1934 keg_small_init(keg);
1938 * Sets all kegs with memory that comes from the page array to a
1939 * first-touch domain policy.
1941 #ifdef UMA_FIRSTTOUCH
1942 if ((keg->uk_flags & UMA_ZONE_HASH) == 0)
1943 keg->uk_flags |= UMA_ZONE_NUMA;
1946 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1947 keg->uk_slabzone = slabzone;
1950 * If we haven't booted yet we need allocations to go through the
1951 * startup cache until the vm is ready.
1953 if (booted < BOOT_PAGEALLOC)
1954 keg->uk_allocf = startup_alloc;
1955 #ifdef UMA_MD_SMALL_ALLOC
1956 else if (keg->uk_ppera == 1)
1957 keg->uk_allocf = uma_small_alloc;
1959 else if (keg->uk_flags & UMA_ZONE_PCPU)
1960 keg->uk_allocf = pcpu_page_alloc;
1962 keg->uk_allocf = page_alloc;
1963 #ifdef UMA_MD_SMALL_ALLOC
1964 if (keg->uk_ppera == 1)
1965 keg->uk_freef = uma_small_free;
1968 if (keg->uk_flags & UMA_ZONE_PCPU)
1969 keg->uk_freef = pcpu_page_free;
1971 keg->uk_freef = page_free;
1974 * Initialize keg's locks.
1976 for (i = 0; i < vm_ndomains; i++)
1977 KEG_LOCK_INIT(keg, i, (arg->flags & UMA_ZONE_MTXCLASS));
1980 * If we're putting the slab header in the actual page we need to
1981 * figure out where in each page it goes. See slab_sizeof
1984 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1987 shsize = slab_sizeof(keg->uk_ipers);
1988 keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - shsize;
1990 * The only way the following is possible is if with our
1991 * UMA_ALIGN_PTR adjustments we are now bigger than
1992 * UMA_SLAB_SIZE. I haven't checked whether this is
1993 * mathematically possible for all cases, so we make
1996 KASSERT(keg->uk_pgoff + shsize <= PAGE_SIZE * keg->uk_ppera,
1997 ("zone %s ipers %d rsize %d size %d slab won't fit",
1998 zone->uz_name, keg->uk_ipers, keg->uk_rsize, keg->uk_size));
2001 if (keg->uk_flags & UMA_ZONE_HASH)
2002 hash_alloc(&keg->uk_hash, 0);
2004 CTR3(KTR_UMA, "keg_ctor %p zone %s(%p)\n", keg, zone->uz_name, zone);
2006 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
2008 rw_wlock(&uma_rwlock);
2009 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
2010 rw_wunlock(&uma_rwlock);
2015 zone_alloc_counters(uma_zone_t zone, void *unused)
2018 zone->uz_allocs = counter_u64_alloc(M_WAITOK);
2019 zone->uz_frees = counter_u64_alloc(M_WAITOK);
2020 zone->uz_fails = counter_u64_alloc(M_WAITOK);
2024 zone_alloc_sysctl(uma_zone_t zone, void *unused)
2026 uma_zone_domain_t zdom;
2029 struct sysctl_oid *oid, *domainoid;
2030 int domains, i, cnt;
2031 static const char *nokeg = "cache zone";
2035 * Make a sysctl safe copy of the zone name by removing
2036 * any special characters and handling dups by appending
2039 if (zone->uz_namecnt != 0) {
2040 /* Count the number of decimal digits and '_' separator. */
2041 for (i = 1, cnt = zone->uz_namecnt; cnt != 0; i++)
2043 zone->uz_ctlname = malloc(strlen(zone->uz_name) + i + 1,
2045 sprintf(zone->uz_ctlname, "%s_%d", zone->uz_name,
2048 zone->uz_ctlname = strdup(zone->uz_name, M_UMA);
2049 for (c = zone->uz_ctlname; *c != '\0'; c++)
2050 if (strchr("./\\ -", *c) != NULL)
2054 * Basic parameters at the root.
2056 zone->uz_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_vm_uma),
2057 OID_AUTO, zone->uz_ctlname, CTLFLAG_RD, NULL, "");
2059 SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2060 "size", CTLFLAG_RD, &zone->uz_size, 0, "Allocation size");
2061 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2062 "flags", CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_MPSAFE,
2063 zone, 0, sysctl_handle_uma_zone_flags, "A",
2064 "Allocator configuration flags");
2065 SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2066 "bucket_size", CTLFLAG_RD, &zone->uz_bucket_size, 0,
2067 "Desired per-cpu cache size");
2068 SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2069 "bucket_size_max", CTLFLAG_RD, &zone->uz_bucket_size_max, 0,
2070 "Maximum allowed per-cpu cache size");
2075 if ((zone->uz_flags & UMA_ZONE_HASH) == 0)
2076 domains = vm_ndomains;
2079 oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
2080 "keg", CTLFLAG_RD, NULL, "");
2082 if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0) {
2083 SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2084 "name", CTLFLAG_RD, keg->uk_name, "Keg name");
2085 SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2086 "rsize", CTLFLAG_RD, &keg->uk_rsize, 0,
2087 "Real object size with alignment");
2088 SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2089 "ppera", CTLFLAG_RD, &keg->uk_ppera, 0,
2090 "pages per-slab allocation");
2091 SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2092 "ipers", CTLFLAG_RD, &keg->uk_ipers, 0,
2093 "items available per-slab");
2094 SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2095 "align", CTLFLAG_RD, &keg->uk_align, 0,
2096 "item alignment mask");
2097 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2098 "efficiency", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE,
2099 keg, 0, sysctl_handle_uma_slab_efficiency, "I",
2100 "Slab utilization (100 - internal fragmentation %)");
2101 domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(oid),
2102 OID_AUTO, "domain", CTLFLAG_RD, NULL, "");
2103 for (i = 0; i < domains; i++) {
2104 dom = &keg->uk_domain[i];
2105 oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
2106 OID_AUTO, VM_DOMAIN(i)->vmd_name, CTLFLAG_RD,
2108 SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2109 "pages", CTLFLAG_RD, &dom->ud_pages, 0,
2110 "Total pages currently allocated from VM");
2111 SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2112 "free", CTLFLAG_RD, &dom->ud_free, 0,
2113 "items free in the slab layer");
2116 SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2117 "name", CTLFLAG_RD, nokeg, "Keg name");
2120 * Information about zone limits.
2122 oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
2123 "limit", CTLFLAG_RD, NULL, "");
2124 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2125 "items", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
2126 zone, 0, sysctl_handle_uma_zone_items, "QU",
2127 "current number of allocated items if limit is set");
2128 SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2129 "max_items", CTLFLAG_RD, &zone->uz_max_items, 0,
2130 "Maximum number of cached items");
2131 SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2132 "sleepers", CTLFLAG_RD, &zone->uz_sleepers, 0,
2133 "Number of threads sleeping at limit");
2134 SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2135 "sleeps", CTLFLAG_RD, &zone->uz_sleeps, 0,
2136 "Total zone limit sleeps");
2137 SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2138 "bucket_max", CTLFLAG_RD, &zone->uz_bkt_max, 0,
2139 "Maximum number of items in the bucket cache");
2140 SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2141 "bucket_cnt", CTLFLAG_RD, &zone->uz_bkt_count, 0,
2142 "Number of items in the bucket cache");
2145 * Per-domain zone information.
2147 domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid),
2148 OID_AUTO, "domain", CTLFLAG_RD, NULL, "");
2149 if ((zone->uz_flags & UMA_ZONE_NUMA) == 0)
2151 for (i = 0; i < domains; i++) {
2152 zdom = &zone->uz_domain[i];
2153 oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid),
2154 OID_AUTO, VM_DOMAIN(i)->vmd_name, CTLFLAG_RD, NULL, "");
2155 SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2156 "nitems", CTLFLAG_RD, &zdom->uzd_nitems,
2157 "number of items in this domain");
2158 SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2159 "imax", CTLFLAG_RD, &zdom->uzd_imax,
2160 "maximum item count in this period");
2161 SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2162 "imin", CTLFLAG_RD, &zdom->uzd_imin,
2163 "minimum item count in this period");
2164 SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2165 "wss", CTLFLAG_RD, &zdom->uzd_wss,
2166 "Working set size");
2170 * General statistics.
2172 oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
2173 "stats", CTLFLAG_RD, NULL, "");
2174 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2175 "current", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE,
2176 zone, 1, sysctl_handle_uma_zone_cur, "I",
2177 "Current number of allocated items");
2178 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2179 "allocs", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
2180 zone, 0, sysctl_handle_uma_zone_allocs, "QU",
2181 "Total allocation calls");
2182 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2183 "frees", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
2184 zone, 0, sysctl_handle_uma_zone_frees, "QU",
2185 "Total free calls");
2186 SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2187 "fails", CTLFLAG_RD, &zone->uz_fails,
2188 "Number of allocation failures");
2189 SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
2190 "xdomain", CTLFLAG_RD, &zone->uz_xdomain, 0,
2191 "Free calls from the wrong domain");
2194 struct uma_zone_count {
2200 zone_count(uma_zone_t zone, void *arg)
2202 struct uma_zone_count *cnt;
2206 * Some zones are rapidly created with identical names and
2207 * destroyed out of order. This can lead to gaps in the count.
2208 * Use one greater than the maximum observed for this name.
2210 if (strcmp(zone->uz_name, cnt->name) == 0)
2211 cnt->count = MAX(cnt->count,
2212 zone->uz_namecnt + 1);
2216 zone_update_caches(uma_zone_t zone)
2220 for (i = 0; i <= mp_maxid; i++) {
2221 cache_set_uz_size(&zone->uz_cpu[i], zone->uz_size);
2222 cache_set_uz_flags(&zone->uz_cpu[i], zone->uz_flags);
2227 * Zone header ctor. This initializes all fields, locks, etc.
2229 * Arguments/Returns follow uma_ctor specifications
2230 * udata Actually uma_zctor_args
2233 zone_ctor(void *mem, int size, void *udata, int flags)
2235 struct uma_zone_count cnt;
2236 struct uma_zctor_args *arg = udata;
2237 uma_zone_t zone = mem;
2243 zone->uz_name = arg->name;
2244 zone->uz_ctor = arg->ctor;
2245 zone->uz_dtor = arg->dtor;
2246 zone->uz_init = NULL;
2247 zone->uz_fini = NULL;
2248 zone->uz_sleeps = 0;
2249 zone->uz_xdomain = 0;
2250 zone->uz_bucket_size = 0;
2251 zone->uz_bucket_size_min = 0;
2252 zone->uz_bucket_size_max = BUCKET_MAX;
2254 zone->uz_warning = NULL;
2255 /* The domain structures follow the cpu structures. */
2256 zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus];
2257 zone->uz_bkt_max = ULONG_MAX;
2258 timevalclear(&zone->uz_ratecheck);
2260 /* Count the number of duplicate names. */
2261 cnt.name = arg->name;
2263 zone_foreach(zone_count, &cnt);
2264 zone->uz_namecnt = cnt.count;
2265 ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
2267 for (i = 0; i < vm_ndomains; i++)
2268 TAILQ_INIT(&zone->uz_domain[i].uzd_buckets);
2271 if (arg->uminit == trash_init && arg->fini == trash_fini)
2272 zone->uz_flags |= UMA_ZFLAG_TRASH | UMA_ZFLAG_CTORDTOR;
2276 * This is a pure cache zone, no kegs.
2279 KASSERT((arg->flags & UMA_ZFLAG_CACHE) != 0,
2280 ("zone_ctor: Import specified for non-cache zone."));
2281 if (arg->flags & UMA_ZONE_VM)
2282 arg->flags |= UMA_ZFLAG_CACHEONLY;
2283 zone->uz_flags = arg->flags;
2284 zone->uz_size = arg->size;
2285 zone->uz_import = arg->import;
2286 zone->uz_release = arg->release;
2287 zone->uz_arg = arg->arg;
2288 rw_wlock(&uma_rwlock);
2289 LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
2290 rw_wunlock(&uma_rwlock);
2295 * Use the regular zone/keg/slab allocator.
2297 zone->uz_import = zone_import;
2298 zone->uz_release = zone_release;
2299 zone->uz_arg = zone;
2302 if (arg->flags & UMA_ZONE_SECONDARY) {
2303 KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
2304 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
2305 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
2306 zone->uz_init = arg->uminit;
2307 zone->uz_fini = arg->fini;
2308 zone->uz_flags |= UMA_ZONE_SECONDARY;
2309 rw_wlock(&uma_rwlock);
2311 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
2312 if (LIST_NEXT(z, uz_link) == NULL) {
2313 LIST_INSERT_AFTER(z, zone, uz_link);
2318 rw_wunlock(&uma_rwlock);
2319 } else if (keg == NULL) {
2320 if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
2321 arg->align, arg->flags)) == NULL)
2324 struct uma_kctor_args karg;
2327 /* We should only be here from uma_startup() */
2328 karg.size = arg->size;
2329 karg.uminit = arg->uminit;
2330 karg.fini = arg->fini;
2331 karg.align = arg->align;
2332 karg.flags = arg->flags;
2334 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
2340 /* Inherit properties from the keg. */
2342 zone->uz_size = keg->uk_size;
2343 zone->uz_flags |= (keg->uk_flags &
2344 (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
2347 if (__predict_true(booted == BOOT_RUNNING)) {
2348 zone_alloc_counters(zone, NULL);
2349 zone_alloc_sysctl(zone, NULL);
2351 zone->uz_allocs = EARLY_COUNTER;
2352 zone->uz_frees = EARLY_COUNTER;
2353 zone->uz_fails = EARLY_COUNTER;
2356 KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
2357 (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
2358 ("Invalid zone flag combination"));
2359 if (arg->flags & UMA_ZFLAG_INTERNAL)
2360 zone->uz_bucket_size_max = zone->uz_bucket_size = 0;
2361 if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0)
2362 zone->uz_bucket_size = BUCKET_MAX;
2363 else if ((arg->flags & UMA_ZONE_MINBUCKET) != 0)
2364 zone->uz_bucket_size_max = zone->uz_bucket_size = BUCKET_MIN;
2365 else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0)
2366 zone->uz_bucket_size = 0;
2368 zone->uz_bucket_size = bucket_select(zone->uz_size);
2369 zone->uz_bucket_size_min = zone->uz_bucket_size;
2370 if (zone->uz_dtor != NULL || zone->uz_ctor != NULL)
2371 zone->uz_flags |= UMA_ZFLAG_CTORDTOR;
2372 zone_update_caches(zone);
2378 * Keg header dtor. This frees all data, destroys locks, frees the hash
2379 * table and removes the keg from the global list.
2381 * Arguments/Returns follow uma_dtor specifications
2385 keg_dtor(void *arg, int size, void *udata)
2388 uint32_t free, pages;
2391 keg = (uma_keg_t)arg;
2393 for (i = 0; i < vm_ndomains; i++) {
2394 free += keg->uk_domain[i].ud_free;
2395 pages += keg->uk_domain[i].ud_pages;
2396 KEG_LOCK_FINI(keg, i);
2399 printf("Freed UMA keg (%s) was not empty (%u items). "
2400 " Lost %u pages of memory.\n",
2401 keg->uk_name ? keg->uk_name : "",
2404 hash_free(&keg->uk_hash);
2410 * Arguments/Returns follow uma_dtor specifications
2414 zone_dtor(void *arg, int size, void *udata)
2419 zone = (uma_zone_t)arg;
2421 sysctl_remove_oid(zone->uz_oid, 1, 1);
2423 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
2426 rw_wlock(&uma_rwlock);
2427 LIST_REMOVE(zone, uz_link);
2428 rw_wunlock(&uma_rwlock);
2430 * XXX there are some races here where
2431 * the zone can be drained but zone lock
2432 * released and then refilled before we
2433 * remove it... we dont care for now
2435 zone_reclaim(zone, M_WAITOK, true);
2437 * We only destroy kegs from non secondary/non cache zones.
2439 if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) {
2441 rw_wlock(&uma_rwlock);
2442 LIST_REMOVE(keg, uk_link);
2443 rw_wunlock(&uma_rwlock);
2444 zone_free_item(kegs, keg, NULL, SKIP_NONE);
2446 counter_u64_free(zone->uz_allocs);
2447 counter_u64_free(zone->uz_frees);
2448 counter_u64_free(zone->uz_fails);
2449 free(zone->uz_ctlname, M_UMA);
2450 ZONE_LOCK_FINI(zone);
2454 * Traverses every zone in the system and calls a callback
2457 * zfunc A pointer to a function which accepts a zone
2464 zone_foreach(void (*zfunc)(uma_zone_t, void *arg), void *arg)
2470 * Before BOOT_RUNNING we are guaranteed to be single
2471 * threaded, so locking isn't needed. Startup functions
2472 * are allowed to use M_WAITOK.
2474 if (__predict_true(booted == BOOT_RUNNING))
2475 rw_rlock(&uma_rwlock);
2476 LIST_FOREACH(keg, &uma_kegs, uk_link) {
2477 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
2480 LIST_FOREACH(zone, &uma_cachezones, uz_link)
2482 if (__predict_true(booted == BOOT_RUNNING))
2483 rw_runlock(&uma_rwlock);
2487 * Count how many pages do we need to bootstrap. VM supplies
2488 * its need in early zones in the argument, we add up our zones,
2489 * which consist of the UMA Slabs, UMA Hash and 9 Bucket zones. The
2490 * zone of zones and zone of kegs are accounted separately.
2492 #define UMA_BOOT_ZONES 11
2493 /* Zone of zones and zone of kegs have arbitrary alignment. */
2494 #define UMA_BOOT_ALIGN 32
2495 static int zsize, ksize;
2497 uma_startup_count(int vm_zones)
2502 ksize = sizeof(struct uma_keg) +
2503 (sizeof(struct uma_domain) * vm_ndomains);
2504 zsize = sizeof(struct uma_zone) +
2505 (sizeof(struct uma_cache) * (mp_maxid + 1)) +
2506 (sizeof(struct uma_zone_domain) * vm_ndomains);
2509 * Memory for the zone of kegs and its keg,
2510 * and for zone of zones.
2512 pages = howmany(roundup(zsize, CACHE_LINE_SIZE) * 2 +
2513 roundup(ksize, CACHE_LINE_SIZE), PAGE_SIZE);
2515 #ifdef UMA_MD_SMALL_ALLOC
2516 zones = UMA_BOOT_ZONES;
2518 zones = UMA_BOOT_ZONES + vm_zones;
2521 size = slab_sizeof(SLAB_MAX_SETSIZE);
2522 space = slab_space(SLAB_MAX_SETSIZE);
2524 /* Memory for the rest of startup zones, UMA and VM, ... */
2525 if (zsize > space) {
2526 /* See keg_large_init(). */
2529 ppera = howmany(roundup2(zsize, UMA_BOOT_ALIGN), PAGE_SIZE);
2530 if (PAGE_SIZE * ppera - roundup2(zsize, UMA_BOOT_ALIGN) < size)
2532 pages += (zones + vm_zones) * ppera;
2533 } else if (roundup2(zsize, UMA_BOOT_ALIGN) > space)
2534 /* See keg_small_init() special case for uk_ppera = 1. */
2537 pages += howmany(zones,
2538 space / roundup2(zsize, UMA_BOOT_ALIGN));
2540 /* ... and their kegs. Note that zone of zones allocates a keg! */
2541 pages += howmany(zones + 1,
2542 space / roundup2(ksize, UMA_BOOT_ALIGN));
2548 uma_startup(void *mem, int npages)
2550 struct uma_zctor_args args;
2551 uma_keg_t masterkeg;
2555 printf("Entering %s with %d boot pages configured\n", __func__, npages);
2558 rw_init(&uma_rwlock, "UMA lock");
2560 /* Use bootpages memory for the zone of zones and zone of kegs. */
2562 zones = (uma_zone_t)m;
2563 m += roundup(zsize, CACHE_LINE_SIZE);
2564 kegs = (uma_zone_t)m;
2565 m += roundup(zsize, CACHE_LINE_SIZE);
2566 masterkeg = (uma_keg_t)m;
2567 m += roundup(ksize, CACHE_LINE_SIZE);
2568 m = roundup(m, PAGE_SIZE);
2569 npages -= (m - (uintptr_t)mem) / PAGE_SIZE;
2572 /* "manually" create the initial zone */
2573 memset(&args, 0, sizeof(args));
2574 args.name = "UMA Kegs";
2576 args.ctor = keg_ctor;
2577 args.dtor = keg_dtor;
2578 args.uminit = zero_init;
2580 args.keg = masterkeg;
2581 args.align = UMA_BOOT_ALIGN - 1;
2582 args.flags = UMA_ZFLAG_INTERNAL;
2583 zone_ctor(kegs, zsize, &args, M_WAITOK);
2586 boot_pages = npages;
2588 args.name = "UMA Zones";
2590 args.ctor = zone_ctor;
2591 args.dtor = zone_dtor;
2592 args.uminit = zero_init;
2595 args.align = UMA_BOOT_ALIGN - 1;
2596 args.flags = UMA_ZFLAG_INTERNAL;
2597 zone_ctor(zones, zsize, &args, M_WAITOK);
2599 /* Now make a zone for slab headers */
2600 slabzone = uma_zcreate("UMA Slabs", sizeof(struct uma_hash_slab),
2601 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
2603 hashzone = uma_zcreate("UMA Hash",
2604 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
2605 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
2607 booted = BOOT_STRAPPED;
2615 printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
2617 booted = BOOT_PAGEALLOC;
2625 printf("Entering %s with %d boot pages left\n", __func__, boot_pages);
2627 sx_init(&uma_reclaim_lock, "umareclaim");
2629 booted = BOOT_BUCKETS;
2634 * Initialize our callout handle
2642 TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor);
2643 uma_dbg_cnt = counter_u64_alloc(M_WAITOK);
2644 uma_skip_cnt = counter_u64_alloc(M_WAITOK);
2646 zone_foreach(zone_alloc_counters, NULL);
2647 zone_foreach(zone_alloc_sysctl, NULL);
2648 callout_init(&uma_callout, 1);
2649 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
2650 booted = BOOT_RUNNING;
2654 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
2655 int align, uint32_t flags)
2657 struct uma_kctor_args args;
2660 args.uminit = uminit;
2662 args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
2665 return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK));
2668 /* Public functions */
2671 uma_set_align(int align)
2674 if (align != UMA_ALIGN_CACHE)
2675 uma_align_cache = align;
2680 uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
2681 uma_init uminit, uma_fini fini, int align, uint32_t flags)
2684 struct uma_zctor_args args;
2688 KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"",
2691 /* This stuff is essential for the zone ctor */
2692 memset(&args, 0, sizeof(args));
2697 args.uminit = uminit;
2701 * Inject procedures which check for memory use after free if we are
2702 * allowed to scramble the memory while it is not allocated. This
2703 * requires that: UMA is actually able to access the memory, no init
2704 * or fini procedures, no dependency on the initial value of the
2705 * memory, and no (legitimate) use of the memory after free. Note,
2706 * the ctor and dtor do not need to be empty.
2708 * XXX UMA_ZONE_OFFPAGE.
2710 if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) &&
2711 uminit == NULL && fini == NULL) {
2712 args.uminit = trash_init;
2713 args.fini = trash_fini;
2720 if (booted < BOOT_BUCKETS) {
2723 sx_slock(&uma_reclaim_lock);
2726 res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
2728 sx_sunlock(&uma_reclaim_lock);
2734 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
2735 uma_init zinit, uma_fini zfini, uma_zone_t master)
2737 struct uma_zctor_args args;
2742 keg = master->uz_keg;
2743 memset(&args, 0, sizeof(args));
2745 args.size = keg->uk_size;
2748 args.uminit = zinit;
2750 args.align = keg->uk_align;
2751 args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
2754 if (booted < BOOT_BUCKETS) {
2757 sx_slock(&uma_reclaim_lock);
2760 /* XXX Attaches only one keg of potentially many. */
2761 res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK);
2763 sx_sunlock(&uma_reclaim_lock);
2769 uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
2770 uma_init zinit, uma_fini zfini, uma_import zimport,
2771 uma_release zrelease, void *arg, int flags)
2773 struct uma_zctor_args args;
2775 memset(&args, 0, sizeof(args));
2780 args.uminit = zinit;
2782 args.import = zimport;
2783 args.release = zrelease;
2786 args.flags = flags | UMA_ZFLAG_CACHE;
2788 return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
2793 uma_zdestroy(uma_zone_t zone)
2796 sx_slock(&uma_reclaim_lock);
2797 zone_free_item(zones, zone, NULL, SKIP_NONE);
2798 sx_sunlock(&uma_reclaim_lock);
2802 uma_zwait(uma_zone_t zone)
2806 item = uma_zalloc_arg(zone, NULL, M_WAITOK);
2807 uma_zfree(zone, item);
2811 uma_zalloc_pcpu_arg(uma_zone_t zone, void *udata, int flags)
2817 MPASS(zone->uz_flags & UMA_ZONE_PCPU);
2819 item = uma_zalloc_arg(zone, udata, flags & ~M_ZERO);
2820 if (item != NULL && (flags & M_ZERO)) {
2822 for (i = 0; i <= mp_maxid; i++)
2823 bzero(zpcpu_get_cpu(item, i), zone->uz_size);
2825 bzero(item, zone->uz_size);
2832 * A stub while both regular and pcpu cases are identical.
2835 uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *udata)
2839 MPASS(zone->uz_flags & UMA_ZONE_PCPU);
2841 uma_zfree_arg(zone, item, udata);
2845 #define UMA_ALWAYS_CTORDTOR 1
2847 #define UMA_ALWAYS_CTORDTOR 0
2851 item_ctor(uma_zone_t zone, int size, void *udata, int flags, void *item)
2856 skipdbg = uma_dbg_zskip(zone, item);
2857 if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
2858 zone->uz_ctor != trash_ctor)
2859 trash_ctor(item, size, udata, flags);
2861 if (__predict_false(zone->uz_ctor != NULL) &&
2862 zone->uz_ctor(item, size, udata, flags) != 0) {
2863 counter_u64_add(zone->uz_fails, 1);
2864 zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT);
2869 uma_dbg_alloc(zone, NULL, item);
2878 item_dtor(uma_zone_t zone, void *item, int size, void *udata,
2879 enum zfreeskip skip)
2884 skipdbg = uma_dbg_zskip(zone, item);
2885 if (skip == SKIP_NONE && !skipdbg) {
2886 if ((zone->uz_flags & UMA_ZONE_MALLOC) != 0)
2887 uma_dbg_free(zone, udata, item);
2889 uma_dbg_free(zone, NULL, item);
2892 if (__predict_true(skip < SKIP_DTOR)) {
2893 if (zone->uz_dtor != NULL)
2894 zone->uz_dtor(item, size, udata);
2896 if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 &&
2897 zone->uz_dtor != trash_dtor)
2898 trash_dtor(item, size, udata);
2905 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
2907 uma_cache_bucket_t bucket;
2910 int domain, size, uz_flags;
2912 /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2913 random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
2915 /* This is the fast path allocation */
2916 CTR4(KTR_UMA, "uma_zalloc_arg thread %x zone %s(%p) flags %d",
2917 curthread, zone->uz_name, zone, flags);
2920 if (flags & M_WAITOK) {
2921 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2922 "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
2927 KASSERT((flags & M_EXEC) == 0, ("uma_zalloc_arg: called with M_EXEC"));
2928 KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
2929 ("uma_zalloc_arg: called with spinlock or critical section held"));
2930 if (zone->uz_flags & UMA_ZONE_PCPU)
2931 KASSERT((flags & M_ZERO) == 0, ("allocating from a pcpu zone "
2932 "with M_ZERO passed"));
2935 #ifdef DEBUG_MEMGUARD
2936 if (memguard_cmp_zone(zone)) {
2937 item = memguard_alloc(zone->uz_size, flags);
2939 if (zone->uz_init != NULL &&
2940 zone->uz_init(item, zone->uz_size, flags) != 0)
2942 if (zone->uz_ctor != NULL &&
2943 zone->uz_ctor(item, zone->uz_size, udata,
2945 counter_u64_add(zone->uz_fails, 1);
2946 zone->uz_fini(item, zone->uz_size);
2951 /* This is unfortunate but should not be fatal. */
2955 * If possible, allocate from the per-CPU cache. There are two
2956 * requirements for safe access to the per-CPU cache: (1) the thread
2957 * accessing the cache must not be preempted or yield during access,
2958 * and (2) the thread must not migrate CPUs without switching which
2959 * cache it accesses. We rely on a critical section to prevent
2960 * preemption and migration. We release the critical section in
2961 * order to acquire the zone mutex if we are unable to allocate from
2962 * the current cache; when we re-acquire the critical section, we
2963 * must detect and handle migration if it has occurred.
2967 cache = &zone->uz_cpu[curcpu];
2968 bucket = &cache->uc_allocbucket;
2969 size = cache_uz_size(cache);
2970 uz_flags = cache_uz_flags(cache);
2971 if (__predict_true(bucket->ucb_cnt != 0)) {
2972 item = cache_bucket_pop(cache, bucket);
2974 if (__predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0 ||
2975 UMA_ALWAYS_CTORDTOR))
2976 return (item_ctor(zone, size, udata, flags, item));
2981 } while (cache_alloc(zone, cache, udata, flags));
2985 * We can not get a bucket so try to return a single item.
2987 if (uz_flags & UMA_ZONE_NUMA)
2988 domain = PCPU_GET(domain);
2990 domain = UMA_ANYDOMAIN;
2991 return (zone_alloc_item(zone, udata, domain, flags));
2995 * Replenish an alloc bucket and possibly restore an old one. Called in
2996 * a critical section. Returns in a critical section.
2998 * A false return value indicates an allocation failure.
2999 * A true return value indicates success and the caller should retry.
3001 static __noinline bool
3002 cache_alloc(uma_zone_t zone, uma_cache_t cache, void *udata, int flags)
3004 uma_zone_domain_t zdom;
3005 uma_bucket_t bucket;
3009 CRITICAL_ASSERT(curthread);
3012 * If we have run out of items in our alloc bucket see
3013 * if we can switch with the free bucket.
3015 if (cache->uc_freebucket.ucb_cnt != 0) {
3016 cache_bucket_swap(&cache->uc_freebucket, &cache->uc_allocbucket);
3021 * Discard any empty allocation bucket while we hold no locks.
3023 bucket = cache_bucket_unload_alloc(cache);
3026 bucket_free(zone, bucket, udata);
3028 /* Short-circuit for zones without buckets and low memory. */
3029 if (zone->uz_bucket_size == 0 || bucketdisable) {
3035 * Attempt to retrieve the item from the per-CPU cache has failed, so
3036 * we must go back to the zone. This requires the zone lock, so we
3037 * must drop the critical section, then re-acquire it when we go back
3038 * to the cache. Since the critical section is released, we may be
3039 * preempted or migrate. As such, make sure not to maintain any
3040 * thread-local state specific to the cache from prior to releasing
3041 * the critical section.
3044 if (ZONE_TRYLOCK(zone) == 0) {
3045 /* Record contention to size the buckets. */
3050 /* See if we lost the race to fill the cache. */
3052 cache = &zone->uz_cpu[curcpu];
3053 if (cache->uc_allocbucket.ucb_bucket != NULL) {
3059 * Check the zone's cache of buckets.
3061 if (zone->uz_flags & UMA_ZONE_NUMA) {
3062 domain = PCPU_GET(domain);
3063 zdom = &zone->uz_domain[domain];
3065 domain = UMA_ANYDOMAIN;
3066 zdom = &zone->uz_domain[0];
3069 if ((bucket = zone_fetch_bucket(zone, zdom)) != NULL) {
3071 KASSERT(bucket->ub_cnt != 0,
3072 ("uma_zalloc_arg: Returning an empty bucket."));
3073 cache_bucket_load_alloc(cache, bucket);
3076 /* We are no longer associated with this CPU. */
3080 * We bump the uz count when the cache size is insufficient to
3081 * handle the working set.
3083 if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max)
3084 zone->uz_bucket_size++;
3088 * Fill a bucket and attempt to use it as the alloc bucket.
3090 bucket = zone_alloc_bucket(zone, udata, domain, flags);
3091 CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
3092 zone->uz_name, zone, bucket);
3093 if (bucket == NULL) {
3099 * See if we lost the race or were migrated. Cache the
3100 * initialized bucket to make this less likely or claim
3101 * the memory directly.
3105 cache = &zone->uz_cpu[curcpu];
3106 if (cache->uc_allocbucket.ucb_bucket == NULL &&
3107 ((zone->uz_flags & UMA_ZONE_NUMA) == 0 ||
3108 domain == PCPU_GET(domain))) {
3109 cache_bucket_load_alloc(cache, bucket);
3110 zdom->uzd_imax += bucket->ub_cnt;
3111 } else if (zone->uz_bkt_count >= zone->uz_bkt_max) {
3114 bucket_drain(zone, bucket);
3115 bucket_free(zone, bucket, udata);
3119 zone_put_bucket(zone, zdom, bucket, false);
3125 uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags)
3128 /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
3129 random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
3131 /* This is the fast path allocation */
3133 "uma_zalloc_domain thread %x zone %s(%p) domain %d flags %d",
3134 curthread, zone->uz_name, zone, domain, flags);
3136 if (flags & M_WAITOK) {
3137 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
3138 "uma_zalloc_domain: zone \"%s\"", zone->uz_name);
3140 KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
3141 ("uma_zalloc_domain: called with spinlock or critical section held"));
3143 return (zone_alloc_item(zone, udata, domain, flags));
3147 * Find a slab with some space. Prefer slabs that are partially used over those
3148 * that are totally full. This helps to reduce fragmentation.
3150 * If 'rr' is 1, search all domains starting from 'domain'. Otherwise check
3154 keg_first_slab(uma_keg_t keg, int domain, bool rr)
3160 KASSERT(domain >= 0 && domain < vm_ndomains,
3161 ("keg_first_slab: domain %d out of range", domain));
3162 KEG_LOCK_ASSERT(keg, domain);
3167 dom = &keg->uk_domain[domain];
3168 if (!LIST_EMPTY(&dom->ud_part_slab))
3169 return (LIST_FIRST(&dom->ud_part_slab));
3170 if (!LIST_EMPTY(&dom->ud_free_slab)) {
3171 slab = LIST_FIRST(&dom->ud_free_slab);
3172 LIST_REMOVE(slab, us_link);
3173 LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
3177 domain = (domain + 1) % vm_ndomains;
3178 } while (domain != start);
3184 * Fetch an existing slab from a free or partial list. Returns with the
3185 * keg domain lock held if a slab was found or unlocked if not.
3188 keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags)
3193 /* HASH has a single free list. */
3194 if ((keg->uk_flags & UMA_ZONE_HASH) != 0)
3197 KEG_LOCK(keg, domain);
3198 reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve;
3199 if (keg->uk_domain[domain].ud_free <= reserve ||
3200 (slab = keg_first_slab(keg, domain, rr)) == NULL) {
3201 KEG_UNLOCK(keg, domain);
3208 keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags)
3210 struct vm_domainset_iter di;
3217 * Use the keg's policy if upper layers haven't already specified a
3218 * domain (as happens with first-touch zones).
3220 * To avoid races we run the iterator with the keg lock held, but that
3221 * means that we cannot allow the vm_domainset layer to sleep. Thus,
3222 * clear M_WAITOK and handle low memory conditions locally.
3224 rr = rdomain == UMA_ANYDOMAIN;
3226 aflags = (flags & ~M_WAITOK) | M_NOWAIT;
3227 vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
3235 slab = keg_fetch_free_slab(keg, domain, rr, flags);
3240 * M_NOVM means don't ask at all!
3245 slab = keg_alloc_slab(keg, zone, domain, flags, aflags);
3248 if (!rr && (flags & M_WAITOK) == 0)
3250 if (rr && vm_domainset_iter_policy(&di, &domain) != 0) {
3251 if ((flags & M_WAITOK) != 0) {
3252 vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
3260 * We might not have been able to get a slab but another cpu
3261 * could have while we were unlocked. Check again before we
3264 if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL)
3271 slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
3277 KEG_LOCK_ASSERT(keg, slab->us_domain);
3279 dom = &keg->uk_domain[slab->us_domain];
3280 freei = BIT_FFS(keg->uk_ipers, &slab->us_free) - 1;
3281 BIT_CLR(keg->uk_ipers, freei, &slab->us_free);
3282 item = slab_item(slab, keg, freei);
3283 slab->us_freecount--;
3286 /* Move this slab to the full list */
3287 if (slab->us_freecount == 0) {
3288 LIST_REMOVE(slab, us_link);
3289 LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link);
3296 zone_import(void *arg, void **bucket, int max, int domain, int flags)
3310 /* Try to keep the buckets totally full */
3311 for (i = 0; i < max; ) {
3312 if ((slab = keg_fetch_slab(keg, zone, domain, flags)) == NULL)
3315 stripe = howmany(max, vm_ndomains);
3317 dom = &keg->uk_domain[slab->us_domain];
3318 while (slab->us_freecount && i < max) {
3319 bucket[i++] = slab_alloc_item(keg, slab);
3320 if (dom->ud_free <= keg->uk_reserve)
3324 * If the zone is striped we pick a new slab for every
3325 * N allocations. Eliminating this conditional will
3326 * instead pick a new domain for each bucket rather
3327 * than stripe within each bucket. The current option
3328 * produces more fragmentation and requires more cpu
3329 * time but yields better distribution.
3331 if ((zone->uz_flags & UMA_ZONE_NUMA) == 0 &&
3332 vm_ndomains > 1 && --stripe == 0)
3336 KEG_UNLOCK(keg, slab->us_domain);
3337 /* Don't block if we allocated any successfully. */
3346 zone_alloc_limit_hard(uma_zone_t zone, int count, int flags)
3348 uint64_t old, new, total, max;
3351 * The hard case. We're going to sleep because there were existing
3352 * sleepers or because we ran out of items. This routine enforces
3353 * fairness by keeping fifo order.
3355 * First release our ill gotten gains and make some noise.
3358 zone_free_limit(zone, count);
3359 zone_log_warning(zone);
3360 zone_maxaction(zone);
3361 if (flags & M_NOWAIT)
3365 * We need to allocate an item or set ourself as a sleeper
3366 * while the sleepq lock is held to avoid wakeup races. This
3367 * is essentially a home rolled semaphore.
3369 sleepq_lock(&zone->uz_max_items);
3370 old = zone->uz_items;
3372 MPASS(UZ_ITEMS_SLEEPERS(old) < UZ_ITEMS_SLEEPERS_MAX);
3373 /* Cache the max since we will evaluate twice. */
3374 max = zone->uz_max_items;
3375 if (UZ_ITEMS_SLEEPERS(old) != 0 ||
3376 UZ_ITEMS_COUNT(old) >= max)
3377 new = old + UZ_ITEMS_SLEEPER;
3379 new = old + MIN(count, max - old);
3380 } while (atomic_fcmpset_64(&zone->uz_items, &old, new) == 0);
3382 /* We may have successfully allocated under the sleepq lock. */
3383 if (UZ_ITEMS_SLEEPERS(new) == 0) {
3384 sleepq_release(&zone->uz_max_items);
3389 * This is in a different cacheline from uz_items so that we
3390 * don't constantly invalidate the fastpath cacheline when we
3391 * adjust item counts. This could be limited to toggling on
3394 atomic_add_32(&zone->uz_sleepers, 1);
3395 atomic_add_64(&zone->uz_sleeps, 1);
3398 * We have added ourselves as a sleeper. The sleepq lock
3399 * protects us from wakeup races. Sleep now and then retry.
3401 sleepq_add(&zone->uz_max_items, NULL, "zonelimit", 0, 0);
3402 sleepq_wait(&zone->uz_max_items, PVM);
3405 * After wakeup, remove ourselves as a sleeper and try
3406 * again. We no longer have the sleepq lock for protection.
3408 * Subract ourselves as a sleeper while attempting to add
3411 atomic_subtract_32(&zone->uz_sleepers, 1);
3412 old = atomic_fetchadd_64(&zone->uz_items,
3413 -(UZ_ITEMS_SLEEPER - count));
3414 /* We're no longer a sleeper. */
3415 old -= UZ_ITEMS_SLEEPER;
3418 * If we're still at the limit, restart. Notably do not
3419 * block on other sleepers. Cache the max value to protect
3420 * against changes via sysctl.
3422 total = UZ_ITEMS_COUNT(old);
3423 max = zone->uz_max_items;
3426 /* Truncate if necessary, otherwise wake other sleepers. */
3427 if (total + count > max) {
3428 zone_free_limit(zone, total + count - max);
3429 count = max - total;
3430 } else if (total + count < max && UZ_ITEMS_SLEEPERS(old) != 0)
3431 wakeup_one(&zone->uz_max_items);
3438 * Allocate 'count' items from our max_items limit. Returns the number
3439 * available. If M_NOWAIT is not specified it will sleep until at least
3440 * one item can be allocated.
3443 zone_alloc_limit(uma_zone_t zone, int count, int flags)
3448 max = zone->uz_max_items;
3452 * We expect normal allocations to succeed with a simple
3455 old = atomic_fetchadd_64(&zone->uz_items, count);
3456 if (__predict_true(old + count <= max))
3460 * If we had some items and no sleepers just return the
3461 * truncated value. We have to release the excess space
3462 * though because that may wake sleepers who weren't woken
3463 * because we were temporarily over the limit.
3466 zone_free_limit(zone, (old + count) - max);
3469 return (zone_alloc_limit_hard(zone, count, flags));
3473 * Free a number of items back to the limit.
3476 zone_free_limit(uma_zone_t zone, int count)
3483 * In the common case we either have no sleepers or
3484 * are still over the limit and can just return.
3486 old = atomic_fetchadd_64(&zone->uz_items, -count);
3487 if (__predict_true(UZ_ITEMS_SLEEPERS(old) == 0 ||
3488 UZ_ITEMS_COUNT(old) - count >= zone->uz_max_items))
3492 * Moderate the rate of wakeups. Sleepers will continue
3493 * to generate wakeups if necessary.
3495 wakeup_one(&zone->uz_max_items);
3499 zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
3501 uma_bucket_t bucket;
3504 CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain);
3506 /* Avoid allocs targeting empty domains. */
3507 if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
3508 domain = UMA_ANYDOMAIN;
3510 if (zone->uz_max_items > 0)
3511 maxbucket = zone_alloc_limit(zone, zone->uz_bucket_size,
3514 maxbucket = zone->uz_bucket_size;
3518 /* Don't wait for buckets, preserve caller's NOVM setting. */
3519 bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
3520 if (bucket == NULL) {
3525 bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
3526 MIN(maxbucket, bucket->ub_entries), domain, flags);
3529 * Initialize the memory if necessary.
3531 if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
3534 for (i = 0; i < bucket->ub_cnt; i++)
3535 if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
3539 * If we couldn't initialize the whole bucket, put the
3540 * rest back onto the freelist.
3542 if (i != bucket->ub_cnt) {
3543 zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
3544 bucket->ub_cnt - i);
3546 bzero(&bucket->ub_bucket[i],
3547 sizeof(void *) * (bucket->ub_cnt - i));
3553 cnt = bucket->ub_cnt;
3554 if (bucket->ub_cnt == 0) {
3555 bucket_free(zone, bucket, udata);
3556 counter_u64_add(zone->uz_fails, 1);
3560 if (zone->uz_max_items > 0 && cnt < maxbucket)
3561 zone_free_limit(zone, maxbucket - cnt);
3567 * Allocates a single item from a zone.
3570 * zone The zone to alloc for.
3571 * udata The data to be passed to the constructor.
3572 * domain The domain to allocate from or UMA_ANYDOMAIN.
3573 * flags M_WAITOK, M_NOWAIT, M_ZERO.
3576 * NULL if there is no memory and M_NOWAIT is set
3577 * An item if successful
3581 zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
3585 if (zone->uz_max_items > 0 && zone_alloc_limit(zone, 1, flags) == 0)
3588 /* Avoid allocs targeting empty domains. */
3589 if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
3590 domain = UMA_ANYDOMAIN;
3592 if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
3596 * We have to call both the zone's init (not the keg's init)
3597 * and the zone's ctor. This is because the item is going from
3598 * a keg slab directly to the user, and the user is expecting it
3599 * to be both zone-init'd as well as zone-ctor'd.
3601 if (zone->uz_init != NULL) {
3602 if (zone->uz_init(item, zone->uz_size, flags) != 0) {
3603 zone_free_item(zone, item, udata, SKIP_FINI | SKIP_CNT);
3607 item = item_ctor(zone, zone->uz_size, udata, flags, item);
3611 counter_u64_add(zone->uz_allocs, 1);
3612 CTR3(KTR_UMA, "zone_alloc_item item %p from %s(%p)", item,
3613 zone->uz_name, zone);
3618 counter_u64_add(zone->uz_fails, 1);
3620 if (zone->uz_max_items > 0)
3621 zone_free_limit(zone, 1);
3622 CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
3623 zone->uz_name, zone);
3630 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
3633 uma_cache_bucket_t bucket;
3634 int domain, itemdomain, uz_flags;
3636 /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
3637 random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
3639 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
3642 KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
3643 ("uma_zfree_arg: called with spinlock or critical section held"));
3645 /* uma_zfree(..., NULL) does nothing, to match free(9). */
3648 #ifdef DEBUG_MEMGUARD
3649 if (is_memguard_addr(item)) {
3650 if (zone->uz_dtor != NULL)
3651 zone->uz_dtor(item, zone->uz_size, udata);
3652 if (zone->uz_fini != NULL)
3653 zone->uz_fini(item, zone->uz_size);
3654 memguard_free(item);
3660 * We are accessing the per-cpu cache without a critical section to
3661 * fetch size and flags. This is acceptable, if we are preempted we
3662 * will simply read another cpu's line.
3664 cache = &zone->uz_cpu[curcpu];
3665 uz_flags = cache_uz_flags(cache);
3666 if (__predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0 ||
3667 UMA_ALWAYS_CTORDTOR))
3668 item_dtor(zone, item, cache_uz_size(cache), udata, SKIP_NONE);
3671 * The race here is acceptable. If we miss it we'll just have to wait
3672 * a little longer for the limits to be reset.
3674 if (__predict_false(uz_flags & UMA_ZFLAG_LIMIT)) {
3675 if (zone->uz_sleepers > 0)
3680 * If possible, free to the per-CPU cache. There are two
3681 * requirements for safe access to the per-CPU cache: (1) the thread
3682 * accessing the cache must not be preempted or yield during access,
3683 * and (2) the thread must not migrate CPUs without switching which
3684 * cache it accesses. We rely on a critical section to prevent
3685 * preemption and migration. We release the critical section in
3686 * order to acquire the zone mutex if we are unable to free to the
3687 * current cache; when we re-acquire the critical section, we must
3688 * detect and handle migration if it has occurred.
3690 domain = itemdomain = 0;
3693 cache = &zone->uz_cpu[curcpu];
3694 bucket = &cache->uc_allocbucket;
3696 if ((uz_flags & UMA_ZONE_NUMA) != 0) {
3697 itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
3698 domain = PCPU_GET(domain);
3700 if ((uz_flags & UMA_ZONE_NUMA) != 0 && domain != itemdomain) {
3701 bucket = &cache->uc_crossbucket;
3706 * Try to free into the allocbucket first to give LIFO ordering
3707 * for cache-hot datastructures. Spill over into the freebucket
3708 * if necessary. Alloc will swap them if one runs dry.
3710 if (__predict_false(bucket->ucb_cnt >= bucket->ucb_entries))
3711 bucket = &cache->uc_freebucket;
3712 if (__predict_true(bucket->ucb_cnt < bucket->ucb_entries)) {
3713 cache_bucket_push(cache, bucket, item);
3717 } while (cache_free(zone, cache, udata, item, itemdomain));
3721 * If nothing else caught this, we'll just do an internal free.
3724 zone_free_item(zone, item, udata, SKIP_DTOR);
3728 zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata,
3729 int domain, int itemdomain)
3731 uma_zone_domain_t zdom;
3735 * Buckets coming from the wrong domain will be entirely for the
3736 * only other domain on two domain systems. In this case we can
3737 * simply cache them. Otherwise we need to sort them back to
3738 * correct domains by freeing the contents to the slab layer.
3740 if (domain != itemdomain && vm_ndomains > 2) {
3742 "uma_zfree: zone %s(%p) draining cross bucket %p",
3743 zone->uz_name, zone, bucket);
3744 bucket_drain(zone, bucket);
3745 bucket_free(zone, bucket, udata);
3750 * Attempt to save the bucket in the zone's domain bucket cache.
3752 * We bump the uz count when the cache size is insufficient to
3753 * handle the working set.
3755 if (ZONE_TRYLOCK(zone) == 0) {
3756 /* Record contention to size the buckets. */
3758 if (zone->uz_bucket_size < zone->uz_bucket_size_max)
3759 zone->uz_bucket_size++;
3763 "uma_zfree: zone %s(%p) putting bucket %p on free list",
3764 zone->uz_name, zone, bucket);
3765 /* ub_cnt is pointing to the last free item */
3766 KASSERT(bucket->ub_cnt == bucket->ub_entries,
3767 ("uma_zfree: Attempting to insert partial bucket onto the full list.\n"));
3768 if (zone->uz_bkt_count >= zone->uz_bkt_max) {
3770 bucket_drain(zone, bucket);
3771 bucket_free(zone, bucket, udata);
3773 zdom = &zone->uz_domain[itemdomain];
3774 zone_put_bucket(zone, zdom, bucket, true);
3780 * Populate a free or cross bucket for the current cpu cache. Free any
3781 * existing full bucket either to the zone cache or back to the slab layer.
3783 * Enters and returns in a critical section. false return indicates that
3784 * we can not satisfy this free in the cache layer. true indicates that
3785 * the caller should retry.
3787 static __noinline bool
3788 cache_free(uma_zone_t zone, uma_cache_t cache, void *udata, void *item,
3791 uma_bucket_t bucket;
3794 CRITICAL_ASSERT(curthread);
3796 if (zone->uz_bucket_size == 0 || bucketdisable)
3799 cache = &zone->uz_cpu[curcpu];
3802 * NUMA domains need to free to the correct zdom. When XDOMAIN
3803 * is enabled this is the zdom of the item and the bucket may be
3804 * the cross bucket if they do not match.
3806 if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
3808 domain = PCPU_GET(domain);
3810 itemdomain = domain = PCPU_GET(domain);
3813 itemdomain = domain = 0;
3815 if (domain != itemdomain) {
3816 bucket = cache_bucket_unload_cross(cache);
3818 atomic_add_64(&zone->uz_xdomain, bucket->ub_cnt);
3821 bucket = cache_bucket_unload_free(cache);
3824 /* We are no longer associated with this CPU. */
3828 zone_free_bucket(zone, bucket, udata, domain, itemdomain);
3830 bucket = bucket_alloc(zone, udata, M_NOWAIT);
3831 CTR3(KTR_UMA, "uma_zfree: zone %s(%p) allocated bucket %p",
3832 zone->uz_name, zone, bucket);
3836 cache = &zone->uz_cpu[curcpu];
3839 * Check to see if we should be populating the cross bucket. If it
3840 * is already populated we will fall through and attempt to populate
3843 if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) {
3844 domain = PCPU_GET(domain);
3845 if (domain != itemdomain &&
3846 cache->uc_crossbucket.ucb_bucket == NULL) {
3847 cache_bucket_load_cross(cache, bucket);
3853 * We may have lost the race to fill the bucket or switched CPUs.
3855 if (cache->uc_freebucket.ucb_bucket != NULL) {
3857 bucket_free(zone, bucket, udata);
3860 cache_bucket_load_free(cache, bucket);
3866 uma_zfree_domain(uma_zone_t zone, void *item, void *udata)
3869 /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
3870 random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA);
3872 CTR2(KTR_UMA, "uma_zfree_domain thread %x zone %s", curthread,
3875 KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
3876 ("uma_zfree_domain: called with spinlock or critical section held"));
3878 /* uma_zfree(..., NULL) does nothing, to match free(9). */
3881 zone_free_item(zone, item, udata, SKIP_NONE);
3885 slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item)
3892 KEG_LOCK_ASSERT(keg, slab->us_domain);
3894 /* Do we need to remove from any lists? */
3895 dom = &keg->uk_domain[slab->us_domain];
3896 if (slab->us_freecount+1 == keg->uk_ipers) {
3897 LIST_REMOVE(slab, us_link);
3898 LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
3899 } else if (slab->us_freecount == 0) {
3900 LIST_REMOVE(slab, us_link);
3901 LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
3904 /* Slab management. */
3905 freei = slab_item_index(slab, keg, item);
3906 BIT_SET(keg->uk_ipers, freei, &slab->us_free);
3907 slab->us_freecount++;
3909 /* Keg statistics. */
3914 zone_release(void *arg, void **bucket, int cnt)
3927 if (__predict_false((zone->uz_flags & UMA_ZONE_HASH) != 0))
3928 lock = KEG_LOCK(keg, 0);
3929 for (i = 0; i < cnt; i++) {
3931 if (__predict_true((zone->uz_flags & UMA_ZONE_VTOSLAB) != 0)) {
3932 slab = vtoslab((vm_offset_t)item);
3934 mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
3935 if ((zone->uz_flags & UMA_ZONE_HASH) != 0)
3936 slab = hash_sfind(&keg->uk_hash, mem);
3938 slab = (uma_slab_t)(mem + keg->uk_pgoff);
3940 if (lock != KEG_LOCKPTR(keg, slab->us_domain)) {
3943 lock = KEG_LOCK(keg, slab->us_domain);
3945 slab_free_item(zone, slab, item);
3952 * Frees a single item to any zone.
3955 * zone The zone to free to
3956 * item The item we're freeing
3957 * udata User supplied data for the dtor
3958 * skip Skip dtors and finis
3961 zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
3964 item_dtor(zone, item, zone->uz_size, udata, skip);
3966 if (skip < SKIP_FINI && zone->uz_fini)
3967 zone->uz_fini(item, zone->uz_size);
3969 zone->uz_release(zone->uz_arg, &item, 1);
3971 if (skip & SKIP_CNT)
3974 counter_u64_add(zone->uz_frees, 1);
3976 if (zone->uz_max_items > 0)
3977 zone_free_limit(zone, 1);
3982 uma_zone_set_max(uma_zone_t zone, int nitems)
3984 struct uma_bucket_zone *ubz;
3988 * XXX This can misbehave if the zone has any allocations with
3989 * no limit and a limit is imposed. There is currently no
3990 * way to clear a limit.
3993 ubz = bucket_zone_max(zone, nitems);
3994 count = ubz != NULL ? ubz->ubz_entries : 0;
3995 zone->uz_bucket_size_max = zone->uz_bucket_size = count;
3996 if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
3997 zone->uz_bucket_size_min = zone->uz_bucket_size_max;
3998 zone->uz_max_items = nitems;
3999 zone->uz_flags |= UMA_ZFLAG_LIMIT;
4000 zone_update_caches(zone);
4001 /* We may need to wake waiters. */
4002 wakeup(&zone->uz_max_items);
4010 uma_zone_set_maxcache(uma_zone_t zone, int nitems)
4012 struct uma_bucket_zone *ubz;
4016 ubz = bucket_zone_max(zone, nitems);
4020 if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
4021 /* Count the cross-domain bucket. */
4024 nitems -= ubz->ubz_entries * bpcpu * mp_ncpus;
4025 zone->uz_bucket_size_max = ubz->ubz_entries;
4027 zone->uz_bucket_size_max = zone->uz_bucket_size = 0;
4029 if (zone->uz_bucket_size_min > zone->uz_bucket_size_max)
4030 zone->uz_bucket_size_min = zone->uz_bucket_size_max;
4031 zone->uz_bkt_max = nitems;
4037 uma_zone_get_max(uma_zone_t zone)
4041 nitems = atomic_load_64(&zone->uz_max_items);
4048 uma_zone_set_warning(uma_zone_t zone, const char *warning)
4051 ZONE_ASSERT_COLD(zone);
4052 zone->uz_warning = warning;
4057 uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
4060 ZONE_ASSERT_COLD(zone);
4061 TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
4066 uma_zone_get_cur(uma_zone_t zone)
4071 nitems = counter_u64_fetch(zone->uz_allocs) -
4072 counter_u64_fetch(zone->uz_frees);
4074 nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs) -
4075 atomic_load_64(&zone->uz_cpu[i].uc_frees);
4077 return (nitems < 0 ? 0 : nitems);
4081 uma_zone_get_allocs(uma_zone_t zone)
4086 nitems = counter_u64_fetch(zone->uz_allocs);
4088 nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs);
4094 uma_zone_get_frees(uma_zone_t zone)
4099 nitems = counter_u64_fetch(zone->uz_frees);
4101 nitems += atomic_load_64(&zone->uz_cpu[i].uc_frees);
4108 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
4113 KEG_ASSERT_COLD(keg);
4114 keg->uk_init = uminit;
4119 uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
4124 KEG_ASSERT_COLD(keg);
4125 keg->uk_fini = fini;
4130 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
4133 ZONE_ASSERT_COLD(zone);
4134 zone->uz_init = zinit;
4139 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
4142 ZONE_ASSERT_COLD(zone);
4143 zone->uz_fini = zfini;
4148 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
4153 KEG_ASSERT_COLD(keg);
4154 keg->uk_freef = freef;
4159 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
4164 KEG_ASSERT_COLD(keg);
4165 keg->uk_allocf = allocf;
4170 uma_zone_reserve(uma_zone_t zone, int items)
4175 KEG_ASSERT_COLD(keg);
4176 keg->uk_reserve = items;
4181 uma_zone_reserve_kva(uma_zone_t zone, int count)
4188 KEG_ASSERT_COLD(keg);
4189 ZONE_ASSERT_COLD(zone);
4191 pages = count / keg->uk_ipers;
4192 if (pages * keg->uk_ipers < count)
4194 pages *= keg->uk_ppera;
4196 #ifdef UMA_MD_SMALL_ALLOC
4197 if (keg->uk_ppera > 1) {
4201 kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
4208 MPASS(keg->uk_kva == 0);
4211 zone->uz_max_items = pages * keg->uk_ipers;
4212 #ifdef UMA_MD_SMALL_ALLOC
4213 keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
4215 keg->uk_allocf = noobj_alloc;
4217 keg->uk_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
4218 zone->uz_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE;
4219 zone_update_caches(zone);
4227 uma_prealloc(uma_zone_t zone, int items)
4229 struct vm_domainset_iter di;
4233 int aflags, domain, slabs;
4236 slabs = items / keg->uk_ipers;
4237 if (slabs * keg->uk_ipers < items)
4239 while (slabs-- > 0) {
4241 vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
4244 slab = keg_alloc_slab(keg, zone, domain, M_WAITOK,
4247 dom = &keg->uk_domain[slab->us_domain];
4248 LIST_REMOVE(slab, us_link);
4249 LIST_INSERT_HEAD(&dom->ud_free_slab, slab,
4251 KEG_UNLOCK(keg, slab->us_domain);
4254 if (vm_domainset_iter_policy(&di, &domain) != 0)
4255 vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
4262 uma_reclaim(int req)
4265 CTR0(KTR_UMA, "UMA: vm asked us to release pages!");
4266 sx_xlock(&uma_reclaim_lock);
4270 case UMA_RECLAIM_TRIM:
4271 zone_foreach(zone_trim, NULL);
4273 case UMA_RECLAIM_DRAIN:
4274 case UMA_RECLAIM_DRAIN_CPU:
4275 zone_foreach(zone_drain, NULL);
4276 if (req == UMA_RECLAIM_DRAIN_CPU) {
4277 pcpu_cache_drain_safe(NULL);
4278 zone_foreach(zone_drain, NULL);
4282 panic("unhandled reclamation request %d", req);
4286 * Some slabs may have been freed but this zone will be visited early
4287 * we visit again so that we can free pages that are empty once other
4288 * zones are drained. We have to do the same for buckets.
4290 zone_drain(slabzone, NULL);
4291 bucket_zone_drain();
4292 sx_xunlock(&uma_reclaim_lock);
4295 static volatile int uma_reclaim_needed;
4298 uma_reclaim_wakeup(void)
4301 if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0)
4302 wakeup(uma_reclaim);
4306 uma_reclaim_worker(void *arg __unused)
4310 sx_xlock(&uma_reclaim_lock);
4311 while (atomic_load_int(&uma_reclaim_needed) == 0)
4312 sx_sleep(uma_reclaim, &uma_reclaim_lock, PVM, "umarcl",
4314 sx_xunlock(&uma_reclaim_lock);
4315 EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
4316 uma_reclaim(UMA_RECLAIM_DRAIN_CPU);
4317 atomic_store_int(&uma_reclaim_needed, 0);
4318 /* Don't fire more than once per-second. */
4319 pause("umarclslp", hz);
4325 uma_zone_reclaim(uma_zone_t zone, int req)
4329 case UMA_RECLAIM_TRIM:
4330 zone_trim(zone, NULL);
4332 case UMA_RECLAIM_DRAIN:
4333 zone_drain(zone, NULL);
4335 case UMA_RECLAIM_DRAIN_CPU:
4336 pcpu_cache_drain_safe(zone);
4337 zone_drain(zone, NULL);
4340 panic("unhandled reclamation request %d", req);
4346 uma_zone_exhausted(uma_zone_t zone)
4349 return (atomic_load_32(&zone->uz_sleepers) > 0);
4356 return (uma_kmem_limit);
4360 uma_set_limit(unsigned long limit)
4363 uma_kmem_limit = limit;
4370 return (atomic_load_long(&uma_kmem_total));
4377 return (uma_kmem_limit - uma_size());
4382 * Generate statistics across both the zone and its per-cpu cache's. Return
4383 * desired statistics if the pointer is non-NULL for that statistic.
4385 * Note: does not update the zone statistics, as it can't safely clear the
4386 * per-CPU cache statistic.
4390 uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
4391 uint64_t *freesp, uint64_t *sleepsp, uint64_t *xdomainp)
4394 uint64_t allocs, frees, sleeps, xdomain;
4397 allocs = frees = sleeps = xdomain = 0;
4400 cache = &z->uz_cpu[cpu];
4401 cachefree += cache->uc_allocbucket.ucb_cnt;
4402 cachefree += cache->uc_freebucket.ucb_cnt;
4403 xdomain += cache->uc_crossbucket.ucb_cnt;
4404 cachefree += cache->uc_crossbucket.ucb_cnt;
4405 allocs += cache->uc_allocs;
4406 frees += cache->uc_frees;
4408 allocs += counter_u64_fetch(z->uz_allocs);
4409 frees += counter_u64_fetch(z->uz_frees);
4410 sleeps += z->uz_sleeps;
4411 xdomain += z->uz_xdomain;
4412 if (cachefreep != NULL)
4413 *cachefreep = cachefree;
4414 if (allocsp != NULL)
4418 if (sleepsp != NULL)
4420 if (xdomainp != NULL)
4421 *xdomainp = xdomain;
4426 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
4433 rw_rlock(&uma_rwlock);
4434 LIST_FOREACH(kz, &uma_kegs, uk_link) {
4435 LIST_FOREACH(z, &kz->uk_zones, uz_link)
4438 LIST_FOREACH(z, &uma_cachezones, uz_link)
4441 rw_runlock(&uma_rwlock);
4442 return (sysctl_handle_int(oidp, &count, 0, req));
4446 uma_vm_zone_stats(struct uma_type_header *uth, uma_zone_t z, struct sbuf *sbuf,
4447 struct uma_percpu_stat *ups, bool internal)
4449 uma_zone_domain_t zdom;
4454 for (i = 0; i < vm_ndomains; i++) {
4455 zdom = &z->uz_domain[i];
4456 uth->uth_zone_free += zdom->uzd_nitems;
4458 uth->uth_allocs = counter_u64_fetch(z->uz_allocs);
4459 uth->uth_frees = counter_u64_fetch(z->uz_frees);
4460 uth->uth_fails = counter_u64_fetch(z->uz_fails);
4461 uth->uth_sleeps = z->uz_sleeps;
4462 uth->uth_xdomain = z->uz_xdomain;
4465 * While it is not normally safe to access the cache bucket pointers
4466 * while not on the CPU that owns the cache, we only allow the pointers
4467 * to be exchanged without the zone lock held, not invalidated, so
4468 * accept the possible race associated with bucket exchange during
4469 * monitoring. Use atomic_load_ptr() to ensure that the bucket pointers
4470 * are loaded only once.
4472 for (i = 0; i < mp_maxid + 1; i++) {
4473 bzero(&ups[i], sizeof(*ups));
4474 if (internal || CPU_ABSENT(i))
4476 cache = &z->uz_cpu[i];
4477 ups[i].ups_cache_free += cache->uc_allocbucket.ucb_cnt;
4478 ups[i].ups_cache_free += cache->uc_freebucket.ucb_cnt;
4479 ups[i].ups_cache_free += cache->uc_crossbucket.ucb_cnt;
4480 ups[i].ups_allocs = cache->uc_allocs;
4481 ups[i].ups_frees = cache->uc_frees;
4486 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
4488 struct uma_stream_header ush;
4489 struct uma_type_header uth;
4490 struct uma_percpu_stat *ups;
4495 uint32_t kfree, pages;
4496 int count, error, i;
4498 error = sysctl_wire_old_buffer(req, 0);
4501 sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
4502 sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
4503 ups = malloc((mp_maxid + 1) * sizeof(*ups), M_TEMP, M_WAITOK);
4506 rw_rlock(&uma_rwlock);
4507 LIST_FOREACH(kz, &uma_kegs, uk_link) {
4508 LIST_FOREACH(z, &kz->uk_zones, uz_link)
4512 LIST_FOREACH(z, &uma_cachezones, uz_link)
4516 * Insert stream header.
4518 bzero(&ush, sizeof(ush));
4519 ush.ush_version = UMA_STREAM_VERSION;
4520 ush.ush_maxcpus = (mp_maxid + 1);
4521 ush.ush_count = count;
4522 (void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
4524 LIST_FOREACH(kz, &uma_kegs, uk_link) {
4526 for (i = 0; i < vm_ndomains; i++) {
4527 kfree += kz->uk_domain[i].ud_free;
4528 pages += kz->uk_domain[i].ud_pages;
4530 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
4531 bzero(&uth, sizeof(uth));
4533 strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
4534 uth.uth_align = kz->uk_align;
4535 uth.uth_size = kz->uk_size;
4536 uth.uth_rsize = kz->uk_rsize;
4537 if (z->uz_max_items > 0) {
4538 items = UZ_ITEMS_COUNT(z->uz_items);
4539 uth.uth_pages = (items / kz->uk_ipers) *
4542 uth.uth_pages = pages;
4543 uth.uth_maxpages = (z->uz_max_items / kz->uk_ipers) *
4545 uth.uth_limit = z->uz_max_items;
4546 uth.uth_keg_free = kfree;
4549 * A zone is secondary is it is not the first entry
4550 * on the keg's zone list.
4552 if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
4553 (LIST_FIRST(&kz->uk_zones) != z))
4554 uth.uth_zone_flags = UTH_ZONE_SECONDARY;
4555 uma_vm_zone_stats(&uth, z, &sbuf, ups,
4556 kz->uk_flags & UMA_ZFLAG_INTERNAL);
4558 (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
4559 for (i = 0; i < mp_maxid + 1; i++)
4560 (void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
4563 LIST_FOREACH(z, &uma_cachezones, uz_link) {
4564 bzero(&uth, sizeof(uth));
4566 strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
4567 uth.uth_size = z->uz_size;
4568 uma_vm_zone_stats(&uth, z, &sbuf, ups, false);
4570 (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
4571 for (i = 0; i < mp_maxid + 1; i++)
4572 (void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
4575 rw_runlock(&uma_rwlock);
4576 error = sbuf_finish(&sbuf);
4583 sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
4585 uma_zone_t zone = *(uma_zone_t *)arg1;
4588 max = uma_zone_get_max(zone);
4589 error = sysctl_handle_int(oidp, &max, 0, req);
4590 if (error || !req->newptr)
4593 uma_zone_set_max(zone, max);
4599 sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
4605 * Some callers want to add sysctls for global zones that
4606 * may not yet exist so they pass a pointer to a pointer.
4609 zone = *(uma_zone_t *)arg1;
4612 cur = uma_zone_get_cur(zone);
4613 return (sysctl_handle_int(oidp, &cur, 0, req));
4617 sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS)
4619 uma_zone_t zone = arg1;
4622 cur = uma_zone_get_allocs(zone);
4623 return (sysctl_handle_64(oidp, &cur, 0, req));
4627 sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS)
4629 uma_zone_t zone = arg1;
4632 cur = uma_zone_get_frees(zone);
4633 return (sysctl_handle_64(oidp, &cur, 0, req));
4637 sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS)
4640 uma_zone_t zone = arg1;
4643 sbuf_new_for_sysctl(&sbuf, NULL, 0, req);
4644 if (zone->uz_flags != 0)
4645 sbuf_printf(&sbuf, "0x%b", zone->uz_flags, PRINT_UMA_ZFLAGS);
4647 sbuf_printf(&sbuf, "0");
4648 error = sbuf_finish(&sbuf);
4655 sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS)
4657 uma_keg_t keg = arg1;
4658 int avail, effpct, total;
4660 total = keg->uk_ppera * PAGE_SIZE;
4661 if ((keg->uk_flags & UMA_ZONE_OFFPAGE) != 0)
4662 total += slab_sizeof(SLAB_MAX_SETSIZE);
4664 * We consider the client's requested size and alignment here, not the
4665 * real size determination uk_rsize, because we also adjust the real
4666 * size for internal implementation reasons (max bitset size).
4668 avail = keg->uk_ipers * roundup2(keg->uk_size, keg->uk_align + 1);
4669 if ((keg->uk_flags & UMA_ZONE_PCPU) != 0)
4670 avail *= mp_maxid + 1;
4671 effpct = 100 * avail / total;
4672 return (sysctl_handle_int(oidp, &effpct, 0, req));
4676 sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS)
4678 uma_zone_t zone = arg1;
4681 cur = UZ_ITEMS_COUNT(atomic_load_64(&zone->uz_items));
4682 return (sysctl_handle_64(oidp, &cur, 0, req));
4687 uma_dbg_getslab(uma_zone_t zone, void *item)
4694 * It is safe to return the slab here even though the
4695 * zone is unlocked because the item's allocation state
4696 * essentially holds a reference.
4698 mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
4699 if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0)
4701 if (zone->uz_flags & UMA_ZONE_VTOSLAB)
4702 return (vtoslab((vm_offset_t)mem));
4704 if ((keg->uk_flags & UMA_ZONE_HASH) == 0)
4705 return ((uma_slab_t)(mem + keg->uk_pgoff));
4707 slab = hash_sfind(&keg->uk_hash, mem);
4714 uma_dbg_zskip(uma_zone_t zone, void *mem)
4717 if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0)
4720 return (uma_dbg_kskip(zone->uz_keg, mem));
4724 uma_dbg_kskip(uma_keg_t keg, void *mem)
4728 if (dbg_divisor == 0)
4731 if (dbg_divisor == 1)
4734 idx = (uintptr_t)mem >> PAGE_SHIFT;
4735 if (keg->uk_ipers > 1) {
4736 idx *= keg->uk_ipers;
4737 idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize;
4740 if ((idx / dbg_divisor) * dbg_divisor != idx) {
4741 counter_u64_add(uma_skip_cnt, 1);
4744 counter_u64_add(uma_dbg_cnt, 1);
4750 * Set up the slab's freei data such that uma_dbg_free can function.
4754 uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
4760 slab = uma_dbg_getslab(zone, item);
4762 panic("uma: item %p did not belong to zone %s\n",
4763 item, zone->uz_name);
4766 freei = slab_item_index(slab, keg, item);
4768 if (BIT_ISSET(keg->uk_ipers, freei, slab_dbg_bits(slab, keg)))
4769 panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
4770 item, zone, zone->uz_name, slab, freei);
4771 BIT_SET_ATOMIC(keg->uk_ipers, freei, slab_dbg_bits(slab, keg));
4775 * Verifies freed addresses. Checks for alignment, valid slab membership
4776 * and duplicate frees.
4780 uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
4786 slab = uma_dbg_getslab(zone, item);
4788 panic("uma: Freed item %p did not belong to zone %s\n",
4789 item, zone->uz_name);
4792 freei = slab_item_index(slab, keg, item);
4794 if (freei >= keg->uk_ipers)
4795 panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
4796 item, zone, zone->uz_name, slab, freei);
4798 if (slab_item(slab, keg, freei) != item)
4799 panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
4800 item, zone, zone->uz_name, slab, freei);
4802 if (!BIT_ISSET(keg->uk_ipers, freei, slab_dbg_bits(slab, keg)))
4803 panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
4804 item, zone, zone->uz_name, slab, freei);
4806 BIT_CLR_ATOMIC(keg->uk_ipers, freei, slab_dbg_bits(slab, keg));
4808 #endif /* INVARIANTS */
4812 get_uma_stats(uma_keg_t kz, uma_zone_t z, uint64_t *allocs, uint64_t *used,
4813 uint64_t *sleeps, long *cachefree, uint64_t *xdomain)
4818 if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
4819 *allocs = counter_u64_fetch(z->uz_allocs);
4820 frees = counter_u64_fetch(z->uz_frees);
4821 *sleeps = z->uz_sleeps;
4825 uma_zone_sumstat(z, cachefree, allocs, &frees, sleeps,
4827 for (i = 0; i < vm_ndomains; i++) {
4828 *cachefree += z->uz_domain[i].uzd_nitems;
4829 if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
4830 (LIST_FIRST(&kz->uk_zones) != z)))
4831 *cachefree += kz->uk_domain[i].ud_free;
4833 *used = *allocs - frees;
4834 return (((int64_t)*used + *cachefree) * kz->uk_size);
4837 DB_SHOW_COMMAND(uma, db_show_uma)
4839 const char *fmt_hdr, *fmt_entry;
4842 uint64_t allocs, used, sleeps, xdomain;
4844 /* variables for sorting */
4846 uma_zone_t cur_zone, last_zone;
4847 int64_t cur_size, last_size, size;
4850 /* /i option produces machine-parseable CSV output */
4851 if (modif[0] == 'i') {
4852 fmt_hdr = "%s,%s,%s,%s,%s,%s,%s,%s,%s\n";
4853 fmt_entry = "\"%s\",%ju,%jd,%ld,%ju,%ju,%u,%jd,%ju\n";
4855 fmt_hdr = "%18s %6s %7s %7s %11s %7s %7s %10s %8s\n";
4856 fmt_entry = "%18s %6ju %7jd %7ld %11ju %7ju %7u %10jd %8ju\n";
4859 db_printf(fmt_hdr, "Zone", "Size", "Used", "Free", "Requests",
4860 "Sleeps", "Bucket", "Total Mem", "XFree");
4862 /* Sort the zones with largest size first. */
4864 last_size = INT64_MAX;
4869 LIST_FOREACH(kz, &uma_kegs, uk_link) {
4870 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
4872 * In the case of size ties, print out zones
4873 * in the order they are encountered. That is,
4874 * when we encounter the most recently output
4875 * zone, we have already printed all preceding
4876 * ties, and we must print all following ties.
4878 if (z == last_zone) {
4882 size = get_uma_stats(kz, z, &allocs, &used,
4883 &sleeps, &cachefree, &xdomain);
4884 if (size > cur_size && size < last_size + ties)
4892 if (cur_zone == NULL)
4895 size = get_uma_stats(cur_keg, cur_zone, &allocs, &used,
4896 &sleeps, &cachefree, &xdomain);
4897 db_printf(fmt_entry, cur_zone->uz_name,
4898 (uintmax_t)cur_keg->uk_size, (intmax_t)used, cachefree,
4899 (uintmax_t)allocs, (uintmax_t)sleeps,
4900 (unsigned)cur_zone->uz_bucket_size, (intmax_t)size,
4905 last_zone = cur_zone;
4906 last_size = cur_size;
4910 DB_SHOW_COMMAND(umacache, db_show_umacache)
4913 uint64_t allocs, frees;
4917 db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
4918 "Requests", "Bucket");
4919 LIST_FOREACH(z, &uma_cachezones, uz_link) {
4920 uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL, NULL);
4921 for (i = 0; i < vm_ndomains; i++)
4922 cachefree += z->uz_domain[i].uzd_nitems;
4923 db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
4924 z->uz_name, (uintmax_t)z->uz_size,
4925 (intmax_t)(allocs - frees), cachefree,
4926 (uintmax_t)allocs, z->uz_bucket_size);