diff options
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c | 91 |
1 files changed, 42 insertions, 49 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c index fdb8260..8f06559 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c @@ -40,17 +40,20 @@ #define TRIM_ZIO_END(vd, offset, size) (offset + \ P2ROUNDUP(size, 1ULL << vd->vdev_top->vdev_ashift)) -#define TRIM_MAP_SINC(tm, size) \ - atomic_add_64(&(tm)->tm_bytes, (size)) +/* Maximal segment size for ATA TRIM. */ +#define TRIM_MAP_SIZE_FACTOR (512 << 16) -#define TRIM_MAP_SDEC(tm, size) \ - atomic_add_64(&(tm)->tm_bytes, -(size)) +#define TRIM_MAP_SEGS(size) (1 + (size) / TRIM_MAP_SIZE_FACTOR) -#define TRIM_MAP_QINC(tm) \ - atomic_inc_64(&(tm)->tm_pending); \ +#define TRIM_MAP_ADD(tm, ts) do { \ + list_insert_tail(&(tm)->tm_head, (ts)); \ + (tm)->tm_pending += TRIM_MAP_SEGS((ts)->ts_end - (ts)->ts_start); \ +} while (0) -#define TRIM_MAP_QDEC(tm) \ - atomic_dec_64(&(tm)->tm_pending); +#define TRIM_MAP_REM(tm, ts) do { \ + list_remove(&(tm)->tm_head, (ts)); \ + (tm)->tm_pending -= TRIM_MAP_SEGS((ts)->ts_end - (ts)->ts_start); \ +} while (0) typedef struct trim_map { list_t tm_head; /* List of segments sorted by txg. */ @@ -60,7 +63,6 @@ typedef struct trim_map { list_t tm_pending_writes; /* Writes blocked on in-flight frees. */ kmutex_t tm_lock; uint64_t tm_pending; /* Count of pending TRIMs. */ - uint64_t tm_bytes; /* Total size in bytes of queued TRIMs. */ } trim_map_t; typedef struct trim_seg { @@ -74,13 +76,10 @@ typedef struct trim_seg { extern boolean_t zfs_trim_enabled; -static u_int trim_txg_delay = 32; -static u_int trim_timeout = 30; -static u_int trim_max_interval = 1; -/* Limit outstanding TRIMs to 2G (max size for a single TRIM request) */ -static uint64_t trim_vdev_max_bytes = 2147483648; -/* Limit outstanding TRIMs to 64 (max ranges for a single TRIM request) */ -static u_int trim_vdev_max_pending = 64; +static u_int trim_txg_delay = 32; /* Keep deleted data up to 32 TXG */ +static u_int trim_timeout = 30; /* Keep deleted data up to 30s */ +static u_int trim_max_interval = 1; /* 1s delays between TRIMs */ +static u_int trim_vdev_max_pending = 10000; /* Keep up to 10K segments */ SYSCTL_DECL(_vfs_zfs); SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RD, 0, "ZFS TRIM"); @@ -94,9 +93,6 @@ SYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, max_interval, CTLFLAG_RWTUN, "Maximum interval between TRIM queue processing (seconds)"); SYSCTL_DECL(_vfs_zfs_vdev); -SYSCTL_QUAD(_vfs_zfs_vdev, OID_AUTO, trim_max_bytes, CTLFLAG_RWTUN, - &trim_vdev_max_bytes, 0, - "Maximum pending TRIM bytes for a vdev"); SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, trim_max_pending, CTLFLAG_RWTUN, &trim_vdev_max_pending, 0, "Maximum pending TRIM segments for a vdev"); @@ -189,10 +185,8 @@ trim_map_destroy(vdev_t *vd) mutex_enter(&tm->tm_lock); while ((ts = list_head(&tm->tm_head)) != NULL) { avl_remove(&tm->tm_queued_frees, ts); - list_remove(&tm->tm_head, ts); + TRIM_MAP_REM(tm, ts); kmem_free(ts, sizeof (*ts)); - TRIM_MAP_SDEC(tm, ts->ts_end - ts->ts_start); - TRIM_MAP_QDEC(tm); } mutex_exit(&tm->tm_lock); @@ -237,40 +231,34 @@ trim_map_segment_add(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg) merge_after = (ts_after != NULL && ts_after->ts_start == end); if (merge_before && merge_after) { - TRIM_MAP_SINC(tm, ts_after->ts_start - ts_before->ts_end); - TRIM_MAP_QDEC(tm); avl_remove(&tm->tm_queued_frees, ts_before); - list_remove(&tm->tm_head, ts_before); + TRIM_MAP_REM(tm, ts_before); + TRIM_MAP_REM(tm, ts_after); ts_after->ts_start = ts_before->ts_start; ts_after->ts_txg = txg; ts_after->ts_time = time; - list_remove(&tm->tm_head, ts_after); - list_insert_tail(&tm->tm_head, ts_after); + TRIM_MAP_ADD(tm, ts_after); kmem_free(ts_before, sizeof (*ts_before)); } else if (merge_before) { - TRIM_MAP_SINC(tm, end - ts_before->ts_end); + TRIM_MAP_REM(tm, ts_before); ts_before->ts_end = end; ts_before->ts_txg = txg; ts_before->ts_time = time; - list_remove(&tm->tm_head, ts_before); - list_insert_tail(&tm->tm_head, ts_before); + TRIM_MAP_ADD(tm, ts_before); } else if (merge_after) { - TRIM_MAP_SINC(tm, ts_after->ts_start - start); + TRIM_MAP_REM(tm, ts_after); ts_after->ts_start = start; ts_after->ts_txg = txg; ts_after->ts_time = time; - list_remove(&tm->tm_head, ts_after); - list_insert_tail(&tm->tm_head, ts_after); + TRIM_MAP_ADD(tm, ts_after); } else { - TRIM_MAP_SINC(tm, end - start); - TRIM_MAP_QINC(tm); ts = kmem_alloc(sizeof (*ts), KM_SLEEP); ts->ts_start = start; ts->ts_end = end; ts->ts_txg = txg; ts->ts_time = time; avl_insert(&tm->tm_queued_frees, ts, where); - list_insert_tail(&tm->tm_head, ts); + TRIM_MAP_ADD(tm, ts); } } @@ -286,7 +274,7 @@ trim_map_segment_remove(trim_map_t *tm, trim_seg_t *ts, uint64_t start, left_over = (ts->ts_start < start); right_over = (ts->ts_end > end); - TRIM_MAP_SDEC(tm, end - start); + TRIM_MAP_REM(tm, ts); if (left_over && right_over) { nts = kmem_alloc(sizeof (*nts), KM_SLEEP); nts->ts_start = end; @@ -295,16 +283,16 @@ trim_map_segment_remove(trim_map_t *tm, trim_seg_t *ts, uint64_t start, nts->ts_time = ts->ts_time; ts->ts_end = start; avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER); - list_insert_after(&tm->tm_head, ts, nts); - TRIM_MAP_QINC(tm); + TRIM_MAP_ADD(tm, ts); + TRIM_MAP_ADD(tm, nts); } else if (left_over) { ts->ts_end = start; + TRIM_MAP_ADD(tm, ts); } else if (right_over) { ts->ts_start = end; + TRIM_MAP_ADD(tm, ts); } else { avl_remove(&tm->tm_queued_frees, ts); - list_remove(&tm->tm_head, ts); - TRIM_MAP_QDEC(tm); kmem_free(ts, sizeof (*ts)); } } @@ -423,7 +411,8 @@ trim_map_write_done(zio_t *zio) * the first element's time is not greater than time argument */ static trim_seg_t * -trim_map_first(trim_map_t *tm, uint64_t txg, uint64_t txgsafe, hrtime_t time) +trim_map_first(trim_map_t *tm, uint64_t txg, uint64_t txgsafe, hrtime_t time, + boolean_t force) { trim_seg_t *ts; @@ -432,9 +421,7 @@ trim_map_first(trim_map_t *tm, uint64_t txg, uint64_t txgsafe, hrtime_t time) ts = list_head(&tm->tm_head); if (ts != NULL && ts->ts_txg <= txgsafe && - (ts->ts_txg <= txg || ts->ts_time <= time || - tm->tm_bytes > trim_vdev_max_bytes || - tm->tm_pending > trim_vdev_max_pending)) + (ts->ts_txg <= txg || ts->ts_time <= time || force)) return (ts); return (NULL); } @@ -445,6 +432,7 @@ trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd) trim_map_t *tm = vd->vdev_trimmap; trim_seg_t *ts; uint64_t size, offset, txgtarget, txgsafe; + int64_t hard, soft; hrtime_t timelimit; ASSERT(vd->vdev_ops->vdev_op_leaf); @@ -465,16 +453,19 @@ trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd) } mutex_enter(&tm->tm_lock); + hard = 0; + if (tm->tm_pending > trim_vdev_max_pending) + hard = (tm->tm_pending - trim_vdev_max_pending) / 4; + soft = P2ROUNDUP(hard + tm->tm_pending / trim_timeout + 1, 64); /* Loop until we have sent all outstanding free's */ - while ((ts = trim_map_first(tm, txgtarget, txgsafe, timelimit)) + while (soft > 0 && + (ts = trim_map_first(tm, txgtarget, txgsafe, timelimit, hard > 0)) != NULL) { - list_remove(&tm->tm_head, ts); + TRIM_MAP_REM(tm, ts); avl_remove(&tm->tm_queued_frees, ts); avl_add(&tm->tm_inflight_frees, ts); size = ts->ts_end - ts->ts_start; offset = ts->ts_start; - TRIM_MAP_SDEC(tm, size); - TRIM_MAP_QDEC(tm); /* * We drop the lock while we call zio_nowait as the IO * scheduler can result in a different IO being run e.g. @@ -484,6 +475,8 @@ trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd) zio_nowait(zio_trim(zio, spa, vd, offset, size)); + soft -= TRIM_MAP_SEGS(size); + hard -= TRIM_MAP_SEGS(size); mutex_enter(&tm->tm_lock); } mutex_exit(&tm->tm_lock); |