summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2018-04-16 03:38:37 +0000
committermav <mav@FreeBSD.org>2018-04-16 03:38:37 +0000
commitb35b4a9a9c1e9d045ea3e1686ae5506118aa2cb8 (patch)
tree0b716b60b8f17bc296a65f11049ac9dabb56c6e3
parentdd705d8676b7508bc8c2f5e9e5c5319a698b4a3e (diff)
downloadFreeBSD-src-b35b4a9a9c1e9d045ea3e1686ae5506118aa2cb8.zip
FreeBSD-src-b35b4a9a9c1e9d045ea3e1686ae5506118aa2cb8.tar.gz
MFC r329759:
9018 Replace kmem_cache_reap_now() with kmem_cache_reap_soon() illumos/illumos-gate@36a64e62848b51ac5a9a5216e894ec723cfef14e To prevent kmem_cache reaping from blocking other system resources, turn kmem_cache_reap_now() (which blocks) into kmem_cache_reap_soon(). Callers to kmem_cache_reap_soon() should use kmem_cache_reap_active(), which exploits #9017's new taskq_empty(). Reviewed by: Bryan Cantrill <bryan@joyent.com> Reviewed by: Dan McDonald <danmcd@joyent.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Yuri Pankov <yuripv@yuripv.net> Author: Tim Kordas <tim.kordas@joyent.com> FreeBSD does not use taskqueue for kmem caches reaping, so this change is less dramatic then it is on Illumos, just limiting reaping to 1 time per second. It may possibly be improved later, if needed.
-rw-r--r--cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h3
-rw-r--r--sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c25
-rw-r--r--sys/cddl/compat/opensolaris/sys/kmem.h3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c47
4 files changed, 63 insertions, 15 deletions
diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
index 1f09e35..5d75a8a 100644
--- a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
+++ b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
@@ -364,7 +364,8 @@ extern void cv_broadcast(kcondvar_t *cv);
#define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
#define kmem_cache_free(_c, _b) umem_cache_free(_c, _b)
#define kmem_debugging() 0
-#define kmem_cache_reap_now(_c) /* nothing */
+#define kmem_cache_reap_active() (B_FALSE)
+#define kmem_cache_reap_soon(_c) /* nothing */
#define kmem_cache_set_move(_c, _cb) /* nothing */
#define POINTER_INVALIDATE(_pp) /* nothing */
#define POINTER_IS_VALID(_p) 0
diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c b/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
index 46eae55..e057aa6 100644
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
@@ -212,9 +212,30 @@ kmem_cache_free(kmem_cache_t *cache, void *buf)
#endif
}
+/*
+ * Allow our caller to determine if there are running reaps.
+ *
+ * This call is very conservative and may return B_TRUE even when
+ * reaping activity isn't active. If it returns B_FALSE, then reaping
+ * activity is definitely inactive.
+ */
+boolean_t
+kmem_cache_reap_active(void)
+{
+
+ return (B_FALSE);
+}
+
+/*
+ * Reap (almost) everything soon.
+ *
+ * Note: this does not wait for the reap-tasks to complete. Caller
+ * should use kmem_cache_reap_active() (above) and/or moderation to
+ * avoid scheduling too many reap-tasks.
+ */
#ifdef _KERNEL
void
-kmem_cache_reap_now(kmem_cache_t *cache)
+kmem_cache_reap_soon(kmem_cache_t *cache)
{
#ifndef KMEM_DEBUG
zone_drain(cache->kc_zone);
@@ -228,7 +249,7 @@ kmem_reap(void)
}
#else
void
-kmem_cache_reap_now(kmem_cache_t *cache __unused)
+kmem_cache_reap_soon(kmem_cache_t *cache __unused)
{
}
diff --git a/sys/cddl/compat/opensolaris/sys/kmem.h b/sys/cddl/compat/opensolaris/sys/kmem.h
index 75837d2..5ec961b 100644
--- a/sys/cddl/compat/opensolaris/sys/kmem.h
+++ b/sys/cddl/compat/opensolaris/sys/kmem.h
@@ -73,7 +73,8 @@ kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
void kmem_cache_destroy(kmem_cache_t *cache);
void *kmem_cache_alloc(kmem_cache_t *cache, int flags);
void kmem_cache_free(kmem_cache_t *cache, void *buf);
-void kmem_cache_reap_now(kmem_cache_t *cache);
+boolean_t kmem_cache_reap_active(void);
+void kmem_cache_reap_soon(kmem_cache_t *);
void kmem_reap(void);
int kmem_debugging(void);
void *calloc(size_t n, size_t s);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
index 29b866b..0c53118 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2018, Joyent, Inc.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc. All rights reserved.
@@ -309,6 +309,9 @@ int zfs_arc_evict_batch_limit = 10;
/* number of seconds before growing cache again */
static int arc_grow_retry = 60;
+/* number of milliseconds before attempting a kmem-cache-reap */
+static int arc_kmem_cache_reap_retry_ms = 1000;
+
/* shift of arc_c for calculating overflow limit in arc_get_data_impl */
int zfs_arc_overflow_shift = 8;
@@ -4414,21 +4417,31 @@ arc_kmem_reap_now(void)
#endif
#endif
+ /*
+ * If a kmem reap is already active, don't schedule more. We must
+ * check for this because kmem_cache_reap_soon() won't actually
+ * block on the cache being reaped (this is to prevent callers from
+ * becoming implicitly blocked by a system-wide kmem reap -- which,
+ * on a system with many, many full magazines, can take minutes).
+ */
+ if (kmem_cache_reap_active())
+ return;
+
for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
if (zio_buf_cache[i] != prev_cache) {
prev_cache = zio_buf_cache[i];
- kmem_cache_reap_now(zio_buf_cache[i]);
+ kmem_cache_reap_soon(zio_buf_cache[i]);
}
if (zio_data_buf_cache[i] != prev_data_cache) {
prev_data_cache = zio_data_buf_cache[i];
- kmem_cache_reap_now(zio_data_buf_cache[i]);
+ kmem_cache_reap_soon(zio_data_buf_cache[i]);
}
}
- kmem_cache_reap_now(abd_chunk_cache);
- kmem_cache_reap_now(buf_cache);
- kmem_cache_reap_now(hdr_full_cache);
- kmem_cache_reap_now(hdr_l2only_cache);
- kmem_cache_reap_now(range_seg_cache);
+ kmem_cache_reap_soon(abd_chunk_cache);
+ kmem_cache_reap_soon(buf_cache);
+ kmem_cache_reap_soon(hdr_full_cache);
+ kmem_cache_reap_soon(hdr_l2only_cache);
+ kmem_cache_reap_soon(range_seg_cache);
#ifdef illumos
if (zio_arena != NULL) {
@@ -4463,6 +4476,7 @@ static void
arc_reclaim_thread(void *unused __unused)
{
hrtime_t growtime = 0;
+ hrtime_t kmem_reap_time = 0;
callb_cpr_t cpr;
CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG);
@@ -4496,7 +4510,7 @@ arc_reclaim_thread(void *unused __unused)
int64_t free_memory = arc_available_memory();
if (free_memory < 0) {
-
+ hrtime_t curtime = gethrtime();
arc_no_grow = B_TRUE;
arc_warm = B_TRUE;
@@ -4504,9 +4518,20 @@ arc_reclaim_thread(void *unused __unused)
* Wait at least zfs_grow_retry (default 60) seconds
* before considering growing.
*/
- growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
+ growtime = curtime + SEC2NSEC(arc_grow_retry);
- arc_kmem_reap_now();
+ /*
+ * Wait at least arc_kmem_cache_reap_retry_ms
+ * between arc_kmem_reap_now() calls. Without
+ * this check it is possible to end up in a
+ * situation where we spend lots of time
+ * reaping caches, while we're near arc_c_min.
+ */
+ if (curtime >= kmem_reap_time) {
+ arc_kmem_reap_now();
+ kmem_reap_time = gethrtime() +
+ MSEC2NSEC(arc_kmem_cache_reap_retry_ms);
+ }
/*
* If we are still low on memory, shrink the ARC
OpenPOWER on IntegriCloud