From b35b4a9a9c1e9d045ea3e1686ae5506118aa2cb8 Mon Sep 17 00:00:00 2001 From: mav Date: Mon, 16 Apr 2018 03:38:37 +0000 Subject: [PATCH] MFC r329759: 9018 Replace kmem_cache_reap_now() with kmem_cache_reap_soon() illumos/illumos-gate@36a64e62848b51ac5a9a5216e894ec723cfef14e To prevent kmem_cache reaping from blocking other system resources, turn kmem_cache_reap_now() (which blocks) into kmem_cache_reap_soon(). Callers to kmem_cache_reap_soon() should use kmem_cache_reap_active(), which exploits #9017's new taskq_empty(). Reviewed by: Bryan Cantrill Reviewed by: Dan McDonald Reviewed by: Matthew Ahrens Reviewed by: Yuri Pankov Author: Tim Kordas FreeBSD does not use taskqueue for kmem caches reaping, so this change is less dramatic then it is on Illumos, just limiting reaping to 1 time per second. It may possibly be improved later, if needed. --- .../lib/libzpool/common/sys/zfs_context.h | 3 +- .../opensolaris/kern/opensolaris_kmem.c | 25 +++++++++- sys/cddl/compat/opensolaris/sys/kmem.h | 3 +- .../opensolaris/uts/common/fs/zfs/arc.c | 47 ++++++++++++++----- 4 files changed, 63 insertions(+), 15 deletions(-) diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h index 1f09e35bc6e..5d75a8aa5cf 100644 --- a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h +++ b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h @@ -364,7 +364,8 @@ extern void cv_broadcast(kcondvar_t *cv); #define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f) #define kmem_cache_free(_c, _b) umem_cache_free(_c, _b) #define kmem_debugging() 0 -#define kmem_cache_reap_now(_c) /* nothing */ +#define kmem_cache_reap_active() (B_FALSE) +#define kmem_cache_reap_soon(_c) /* nothing */ #define kmem_cache_set_move(_c, _cb) /* nothing */ #define POINTER_INVALIDATE(_pp) /* nothing */ #define POINTER_IS_VALID(_p) 0 diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c b/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c index 46eae5559c6..e057aa64db2 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c @@ -212,9 +212,30 @@ kmem_cache_free(kmem_cache_t *cache, void *buf) #endif } +/* + * Allow our caller to determine if there are running reaps. + * + * This call is very conservative and may return B_TRUE even when + * reaping activity isn't active. If it returns B_FALSE, then reaping + * activity is definitely inactive. + */ +boolean_t +kmem_cache_reap_active(void) +{ + + return (B_FALSE); +} + +/* + * Reap (almost) everything soon. + * + * Note: this does not wait for the reap-tasks to complete. Caller + * should use kmem_cache_reap_active() (above) and/or moderation to + * avoid scheduling too many reap-tasks. + */ #ifdef _KERNEL void -kmem_cache_reap_now(kmem_cache_t *cache) +kmem_cache_reap_soon(kmem_cache_t *cache) { #ifndef KMEM_DEBUG zone_drain(cache->kc_zone); @@ -228,7 +249,7 @@ kmem_reap(void) } #else void -kmem_cache_reap_now(kmem_cache_t *cache __unused) +kmem_cache_reap_soon(kmem_cache_t *cache __unused) { } diff --git a/sys/cddl/compat/opensolaris/sys/kmem.h b/sys/cddl/compat/opensolaris/sys/kmem.h index 75837d29e20..5ec961b197a 100644 --- a/sys/cddl/compat/opensolaris/sys/kmem.h +++ b/sys/cddl/compat/opensolaris/sys/kmem.h @@ -73,7 +73,8 @@ kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align, void kmem_cache_destroy(kmem_cache_t *cache); void *kmem_cache_alloc(kmem_cache_t *cache, int flags); void kmem_cache_free(kmem_cache_t *cache, void *buf); -void kmem_cache_reap_now(kmem_cache_t *cache); +boolean_t kmem_cache_reap_active(void); +void kmem_cache_reap_soon(kmem_cache_t *); void kmem_reap(void); int kmem_debugging(void); void *calloc(size_t n, size_t s); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index 29b866b655f..0c5311895ae 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. All rights reserved. @@ -309,6 +309,9 @@ int zfs_arc_evict_batch_limit = 10; /* number of seconds before growing cache again */ static int arc_grow_retry = 60; +/* number of milliseconds before attempting a kmem-cache-reap */ +static int arc_kmem_cache_reap_retry_ms = 1000; + /* shift of arc_c for calculating overflow limit in arc_get_data_impl */ int zfs_arc_overflow_shift = 8; @@ -4414,21 +4417,31 @@ arc_kmem_reap_now(void) #endif #endif + /* + * If a kmem reap is already active, don't schedule more. We must + * check for this because kmem_cache_reap_soon() won't actually + * block on the cache being reaped (this is to prevent callers from + * becoming implicitly blocked by a system-wide kmem reap -- which, + * on a system with many, many full magazines, can take minutes). + */ + if (kmem_cache_reap_active()) + return; + for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) { if (zio_buf_cache[i] != prev_cache) { prev_cache = zio_buf_cache[i]; - kmem_cache_reap_now(zio_buf_cache[i]); + kmem_cache_reap_soon(zio_buf_cache[i]); } if (zio_data_buf_cache[i] != prev_data_cache) { prev_data_cache = zio_data_buf_cache[i]; - kmem_cache_reap_now(zio_data_buf_cache[i]); + kmem_cache_reap_soon(zio_data_buf_cache[i]); } } - kmem_cache_reap_now(abd_chunk_cache); - kmem_cache_reap_now(buf_cache); - kmem_cache_reap_now(hdr_full_cache); - kmem_cache_reap_now(hdr_l2only_cache); - kmem_cache_reap_now(range_seg_cache); + kmem_cache_reap_soon(abd_chunk_cache); + kmem_cache_reap_soon(buf_cache); + kmem_cache_reap_soon(hdr_full_cache); + kmem_cache_reap_soon(hdr_l2only_cache); + kmem_cache_reap_soon(range_seg_cache); #ifdef illumos if (zio_arena != NULL) { @@ -4463,6 +4476,7 @@ static void arc_reclaim_thread(void *unused __unused) { hrtime_t growtime = 0; + hrtime_t kmem_reap_time = 0; callb_cpr_t cpr; CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG); @@ -4496,7 +4510,7 @@ arc_reclaim_thread(void *unused __unused) int64_t free_memory = arc_available_memory(); if (free_memory < 0) { - + hrtime_t curtime = gethrtime(); arc_no_grow = B_TRUE; arc_warm = B_TRUE; @@ -4504,9 +4518,20 @@ arc_reclaim_thread(void *unused __unused) * Wait at least zfs_grow_retry (default 60) seconds * before considering growing. */ - growtime = gethrtime() + SEC2NSEC(arc_grow_retry); + growtime = curtime + SEC2NSEC(arc_grow_retry); - arc_kmem_reap_now(); + /* + * Wait at least arc_kmem_cache_reap_retry_ms + * between arc_kmem_reap_now() calls. Without + * this check it is possible to end up in a + * situation where we spend lots of time + * reaping caches, while we're near arc_c_min. + */ + if (curtime >= kmem_reap_time) { + arc_kmem_reap_now(); + kmem_reap_time = gethrtime() + + MSEC2NSEC(arc_kmem_cache_reap_retry_ms); + } /* * If we are still low on memory, shrink the ARC -- 2.45.0