summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c91
1 files changed, 42 insertions, 49 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
index fdb8260..8f06559 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c
@@ -40,17 +40,20 @@
#define TRIM_ZIO_END(vd, offset, size) (offset + \
P2ROUNDUP(size, 1ULL << vd->vdev_top->vdev_ashift))
-#define TRIM_MAP_SINC(tm, size) \
- atomic_add_64(&(tm)->tm_bytes, (size))
+/* Maximal segment size for ATA TRIM. */
+#define TRIM_MAP_SIZE_FACTOR (512 << 16)
-#define TRIM_MAP_SDEC(tm, size) \
- atomic_add_64(&(tm)->tm_bytes, -(size))
+#define TRIM_MAP_SEGS(size) (1 + (size) / TRIM_MAP_SIZE_FACTOR)
-#define TRIM_MAP_QINC(tm) \
- atomic_inc_64(&(tm)->tm_pending); \
+#define TRIM_MAP_ADD(tm, ts) do { \
+ list_insert_tail(&(tm)->tm_head, (ts)); \
+ (tm)->tm_pending += TRIM_MAP_SEGS((ts)->ts_end - (ts)->ts_start); \
+} while (0)
-#define TRIM_MAP_QDEC(tm) \
- atomic_dec_64(&(tm)->tm_pending);
+#define TRIM_MAP_REM(tm, ts) do { \
+ list_remove(&(tm)->tm_head, (ts)); \
+ (tm)->tm_pending -= TRIM_MAP_SEGS((ts)->ts_end - (ts)->ts_start); \
+} while (0)
typedef struct trim_map {
list_t tm_head; /* List of segments sorted by txg. */
@@ -60,7 +63,6 @@ typedef struct trim_map {
list_t tm_pending_writes; /* Writes blocked on in-flight frees. */
kmutex_t tm_lock;
uint64_t tm_pending; /* Count of pending TRIMs. */
- uint64_t tm_bytes; /* Total size in bytes of queued TRIMs. */
} trim_map_t;
typedef struct trim_seg {
@@ -74,13 +76,10 @@ typedef struct trim_seg {
extern boolean_t zfs_trim_enabled;
-static u_int trim_txg_delay = 32;
-static u_int trim_timeout = 30;
-static u_int trim_max_interval = 1;
-/* Limit outstanding TRIMs to 2G (max size for a single TRIM request) */
-static uint64_t trim_vdev_max_bytes = 2147483648;
-/* Limit outstanding TRIMs to 64 (max ranges for a single TRIM request) */
-static u_int trim_vdev_max_pending = 64;
+static u_int trim_txg_delay = 32; /* Keep deleted data up to 32 TXG */
+static u_int trim_timeout = 30; /* Keep deleted data up to 30s */
+static u_int trim_max_interval = 1; /* 1s delays between TRIMs */
+static u_int trim_vdev_max_pending = 10000; /* Keep up to 10K segments */
SYSCTL_DECL(_vfs_zfs);
SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RD, 0, "ZFS TRIM");
@@ -94,9 +93,6 @@ SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, max_interval, CTLFLAG_RWTUN,
"Maximum interval between TRIM queue processing (seconds)");
SYSCTL_DECL(_vfs_zfs_vdev);
-SYSCTL_QUAD(_vfs_zfs_vdev, OID_AUTO, trim_max_bytes, CTLFLAG_RWTUN,
- &trim_vdev_max_bytes, 0,
- "Maximum pending TRIM bytes for a vdev");
SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, trim_max_pending, CTLFLAG_RWTUN,
&trim_vdev_max_pending, 0,
"Maximum pending TRIM segments for a vdev");
@@ -189,10 +185,8 @@ trim_map_destroy(vdev_t *vd)
mutex_enter(&tm->tm_lock);
while ((ts = list_head(&tm->tm_head)) != NULL) {
avl_remove(&tm->tm_queued_frees, ts);
- list_remove(&tm->tm_head, ts);
+ TRIM_MAP_REM(tm, ts);
kmem_free(ts, sizeof (*ts));
- TRIM_MAP_SDEC(tm, ts->ts_end - ts->ts_start);
- TRIM_MAP_QDEC(tm);
}
mutex_exit(&tm->tm_lock);
@@ -237,40 +231,34 @@ trim_map_segment_add(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg)
merge_after = (ts_after != NULL && ts_after->ts_start == end);
if (merge_before && merge_after) {
- TRIM_MAP_SINC(tm, ts_after->ts_start - ts_before->ts_end);
- TRIM_MAP_QDEC(tm);
avl_remove(&tm->tm_queued_frees, ts_before);
- list_remove(&tm->tm_head, ts_before);
+ TRIM_MAP_REM(tm, ts_before);
+ TRIM_MAP_REM(tm, ts_after);
ts_after->ts_start = ts_before->ts_start;
ts_after->ts_txg = txg;
ts_after->ts_time = time;
- list_remove(&tm->tm_head, ts_after);
- list_insert_tail(&tm->tm_head, ts_after);
+ TRIM_MAP_ADD(tm, ts_after);
kmem_free(ts_before, sizeof (*ts_before));
} else if (merge_before) {
- TRIM_MAP_SINC(tm, end - ts_before->ts_end);
+ TRIM_MAP_REM(tm, ts_before);
ts_before->ts_end = end;
ts_before->ts_txg = txg;
ts_before->ts_time = time;
- list_remove(&tm->tm_head, ts_before);
- list_insert_tail(&tm->tm_head, ts_before);
+ TRIM_MAP_ADD(tm, ts_before);
} else if (merge_after) {
- TRIM_MAP_SINC(tm, ts_after->ts_start - start);
+ TRIM_MAP_REM(tm, ts_after);
ts_after->ts_start = start;
ts_after->ts_txg = txg;
ts_after->ts_time = time;
- list_remove(&tm->tm_head, ts_after);
- list_insert_tail(&tm->tm_head, ts_after);
+ TRIM_MAP_ADD(tm, ts_after);
} else {
- TRIM_MAP_SINC(tm, end - start);
- TRIM_MAP_QINC(tm);
ts = kmem_alloc(sizeof (*ts), KM_SLEEP);
ts->ts_start = start;
ts->ts_end = end;
ts->ts_txg = txg;
ts->ts_time = time;
avl_insert(&tm->tm_queued_frees, ts, where);
- list_insert_tail(&tm->tm_head, ts);
+ TRIM_MAP_ADD(tm, ts);
}
}
@@ -286,7 +274,7 @@ trim_map_segment_remove(trim_map_t *tm, trim_seg_t *ts, uint64_t start,
left_over = (ts->ts_start < start);
right_over = (ts->ts_end > end);
- TRIM_MAP_SDEC(tm, end - start);
+ TRIM_MAP_REM(tm, ts);
if (left_over && right_over) {
nts = kmem_alloc(sizeof (*nts), KM_SLEEP);
nts->ts_start = end;
@@ -295,16 +283,16 @@ trim_map_segment_remove(trim_map_t *tm, trim_seg_t *ts, uint64_t start,
nts->ts_time = ts->ts_time;
ts->ts_end = start;
avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER);
- list_insert_after(&tm->tm_head, ts, nts);
- TRIM_MAP_QINC(tm);
+ TRIM_MAP_ADD(tm, ts);
+ TRIM_MAP_ADD(tm, nts);
} else if (left_over) {
ts->ts_end = start;
+ TRIM_MAP_ADD(tm, ts);
} else if (right_over) {
ts->ts_start = end;
+ TRIM_MAP_ADD(tm, ts);
} else {
avl_remove(&tm->tm_queued_frees, ts);
- list_remove(&tm->tm_head, ts);
- TRIM_MAP_QDEC(tm);
kmem_free(ts, sizeof (*ts));
}
}
@@ -423,7 +411,8 @@ trim_map_write_done(zio_t *zio)
* the first element's time is not greater than time argument
*/
static trim_seg_t *
-trim_map_first(trim_map_t *tm, uint64_t txg, uint64_t txgsafe, hrtime_t time)
+trim_map_first(trim_map_t *tm, uint64_t txg, uint64_t txgsafe, hrtime_t time,
+ boolean_t force)
{
trim_seg_t *ts;
@@ -432,9 +421,7 @@ trim_map_first(trim_map_t *tm, uint64_t txg, uint64_t txgsafe, hrtime_t time)
ts = list_head(&tm->tm_head);
if (ts != NULL && ts->ts_txg <= txgsafe &&
- (ts->ts_txg <= txg || ts->ts_time <= time ||
- tm->tm_bytes > trim_vdev_max_bytes ||
- tm->tm_pending > trim_vdev_max_pending))
+ (ts->ts_txg <= txg || ts->ts_time <= time || force))
return (ts);
return (NULL);
}
@@ -445,6 +432,7 @@ trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
trim_map_t *tm = vd->vdev_trimmap;
trim_seg_t *ts;
uint64_t size, offset, txgtarget, txgsafe;
+ int64_t hard, soft;
hrtime_t timelimit;
ASSERT(vd->vdev_ops->vdev_op_leaf);
@@ -465,16 +453,19 @@ trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
}
mutex_enter(&tm->tm_lock);
+ hard = 0;
+ if (tm->tm_pending > trim_vdev_max_pending)
+ hard = (tm->tm_pending - trim_vdev_max_pending) / 4;
+ soft = P2ROUNDUP(hard + tm->tm_pending / trim_timeout + 1, 64);
/* Loop until we have sent all outstanding free's */
- while ((ts = trim_map_first(tm, txgtarget, txgsafe, timelimit))
+ while (soft > 0 &&
+ (ts = trim_map_first(tm, txgtarget, txgsafe, timelimit, hard > 0))
!= NULL) {
- list_remove(&tm->tm_head, ts);
+ TRIM_MAP_REM(tm, ts);
avl_remove(&tm->tm_queued_frees, ts);
avl_add(&tm->tm_inflight_frees, ts);
size = ts->ts_end - ts->ts_start;
offset = ts->ts_start;
- TRIM_MAP_SDEC(tm, size);
- TRIM_MAP_QDEC(tm);
/*
* We drop the lock while we call zio_nowait as the IO
* scheduler can result in a different IO being run e.g.
@@ -484,6 +475,8 @@ trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
zio_nowait(zio_trim(zio, spa, vd, offset, size));
+ soft -= TRIM_MAP_SEGS(size);
+ hard -= TRIM_MAP_SEGS(size);
mutex_enter(&tm->tm_lock);
}
mutex_exit(&tm->tm_lock);
OpenPOWER on IntegriCloud