summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2012-09-23 19:40:58 +0000
committerpjd <pjd@FreeBSD.org>2012-09-23 19:40:58 +0000
commit618888b019e4005dec19c1688531e25d6ff63c76 (patch)
tree85c57e3b0f3bd89091304524a42b74d8be588cd6 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
parent3d431ee1496a9253b224fdc00c445d0ab59e1eb3 (diff)
downloadFreeBSD-src-618888b019e4005dec19c1688531e25d6ff63c76.zip
FreeBSD-src-618888b019e4005dec19c1688531e25d6ff63c76.tar.gz
Add TRIM support.
The code builds a map of regions that were freed. On every write the code consults the map and eventually removes ranges that were freed before, but are now overwritten. Freed blocks are not TRIMed immediately. There is a tunable that defines how many txg we should wait with TRIMming freed blocks (64 by default). There is a low priority thread that TRIMs ranges when the time comes. During TRIM we keep in-flight ranges on a list to detect colliding writes - we have to delay writes that collide with in-flight TRIMs in case something will be reordered and write will reached the disk before the TRIM. We don't have to do the same for in-flight writes, as colliding writes just remove ranges to TRIM. Sponsored by: multiplay.co.uk This work includes some important fixes and some improvements obtained from the zfsonlinux project, including TRIMming entire vdevs on pool create/add/attach and on pool import for spare and cache vdevs. Obtained from: zfsonlinux Submitted by: Etienne Dechamps <etienne.dechamps@ovh.net>
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c56
1 files changed, 41 insertions, 15 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
index b2aada7..d69d9cc 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
@@ -49,14 +49,17 @@ struct g_class zfs_vdev_class = {
DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
-/*
- * Don't send BIO_FLUSH.
- */
+SYSCTL_DECL(_vfs_zfs_vdev);
+/* Don't send BIO_FLUSH. */
static int vdev_geom_bio_flush_disable = 0;
TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
-SYSCTL_DECL(_vfs_zfs_vdev);
SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
&vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
+/* Don't send BIO_DELETE. */
+static int vdev_geom_bio_delete_disable = 0;
+TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
+SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
+ &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
static void
vdev_geom_orphan(struct g_consumer *cp)
@@ -499,8 +502,8 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
*ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
/*
- * Clear the nowritecache bit, so that on a vdev_reopen() we will
- * try again.
+ * Clear the nowritecache settings, so that on a vdev_reopen()
+ * we will try again.
*/
vd->vdev_nowritecache = B_FALSE;
@@ -546,6 +549,15 @@ vdev_geom_io_intr(struct bio *bp)
*/
vd->vdev_nowritecache = B_TRUE;
}
+ if (bp->bio_cmd == BIO_DELETE && bp->bio_error == ENOTSUP) {
+ /*
+ * If we get ENOTSUP, we know that no future
+ * attempts will ever succeed. In this case we
+ * set a persistent bit so that we don't bother
+ * with the ioctl in the future.
+ */
+ vd->vdev_notrim = B_TRUE;
+ }
if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
/*
* If provider's error is set we assume it is being
@@ -588,17 +600,21 @@ vdev_geom_io_start(zio_t *zio)
}
switch (zio->io_cmd) {
-
case DKIOCFLUSHWRITECACHE:
-
if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
break;
-
if (vd->vdev_nowritecache) {
zio->io_error = ENOTSUP;
break;
}
-
+ goto sendreq;
+ case DKIOCTRIM:
+ if (vdev_geom_bio_delete_disable)
+ break;
+ if (vd->vdev_notrim) {
+ zio->io_error = ENOTSUP;
+ break;
+ }
goto sendreq;
default:
zio->io_error = ENOTSUP;
@@ -623,11 +639,21 @@ sendreq:
bp->bio_length = zio->io_size;
break;
case ZIO_TYPE_IOCTL:
- bp->bio_cmd = BIO_FLUSH;
- bp->bio_flags |= BIO_ORDERED;
- bp->bio_data = NULL;
- bp->bio_offset = cp->provider->mediasize;
- bp->bio_length = 0;
+ switch (zio->io_cmd) {
+ case DKIOCFLUSHWRITECACHE:
+ bp->bio_cmd = BIO_FLUSH;
+ bp->bio_flags |= BIO_ORDERED;
+ bp->bio_data = NULL;
+ bp->bio_offset = cp->provider->mediasize;
+ bp->bio_length = 0;
+ break;
+ case DKIOCTRIM:
+ bp->bio_cmd = BIO_DELETE;
+ bp->bio_data = NULL;
+ bp->bio_offset = zio->io_offset;
+ bp->bio_length = zio->io_size;
+ break;
+ }
break;
}
bp->bio_done = vdev_geom_io_intr;
OpenPOWER on IntegriCloud