diff options
author | pjd <pjd@FreeBSD.org> | 2012-09-23 19:40:58 +0000 |
---|---|---|
committer | pjd <pjd@FreeBSD.org> | 2012-09-23 19:40:58 +0000 |
commit | 618888b019e4005dec19c1688531e25d6ff63c76 (patch) | |
tree | 85c57e3b0f3bd89091304524a42b74d8be588cd6 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c | |
parent | 3d431ee1496a9253b224fdc00c445d0ab59e1eb3 (diff) | |
download | FreeBSD-src-618888b019e4005dec19c1688531e25d6ff63c76.zip FreeBSD-src-618888b019e4005dec19c1688531e25d6ff63c76.tar.gz |
Add TRIM support.
The code builds a map of regions that were freed. On every write the
code consults the map and eventually removes ranges that were freed
before, but are now overwritten.
Freed blocks are not TRIMed immediately. There is a tunable that defines
how many txg we should wait with TRIMming freed blocks (64 by default).
There is a low priority thread that TRIMs ranges when the time comes.
During TRIM we keep in-flight ranges on a list to detect colliding
writes - we have to delay writes that collide with in-flight TRIMs in
case something will be reordered and write will reached the disk before
the TRIM. We don't have to do the same for in-flight writes, as
colliding writes just remove ranges to TRIM.
Sponsored by: multiplay.co.uk
This work includes some important fixes and some improvements obtained
from the zfsonlinux project, including TRIMming entire vdevs on pool
create/add/attach and on pool import for spare and cache vdevs.
Obtained from: zfsonlinux
Submitted by: Etienne Dechamps <etienne.dechamps@ovh.net>
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c')
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c | 56 |
1 files changed, 41 insertions, 15 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c index b2aada7..d69d9cc 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -49,14 +49,17 @@ struct g_class zfs_vdev_class = { DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev); -/* - * Don't send BIO_FLUSH. - */ +SYSCTL_DECL(_vfs_zfs_vdev); +/* Don't send BIO_FLUSH. */ static int vdev_geom_bio_flush_disable = 0; TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable); -SYSCTL_DECL(_vfs_zfs_vdev); SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW, &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH"); +/* Don't send BIO_DELETE. */ +static int vdev_geom_bio_delete_disable = 0; +TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable); +SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW, + &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE"); static void vdev_geom_orphan(struct g_consumer *cp) @@ -499,8 +502,8 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, *ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1; /* - * Clear the nowritecache bit, so that on a vdev_reopen() we will - * try again. + * Clear the nowritecache settings, so that on a vdev_reopen() + * we will try again. */ vd->vdev_nowritecache = B_FALSE; @@ -546,6 +549,15 @@ vdev_geom_io_intr(struct bio *bp) */ vd->vdev_nowritecache = B_TRUE; } + if (bp->bio_cmd == BIO_DELETE && bp->bio_error == ENOTSUP) { + /* + * If we get ENOTSUP, we know that no future + * attempts will ever succeed. In this case we + * set a persistent bit so that we don't bother + * with the ioctl in the future. + */ + vd->vdev_notrim = B_TRUE; + } if (zio->io_error == EIO && !vd->vdev_remove_wanted) { /* * If provider's error is set we assume it is being @@ -588,17 +600,21 @@ vdev_geom_io_start(zio_t *zio) } switch (zio->io_cmd) { - case DKIOCFLUSHWRITECACHE: - if (zfs_nocacheflush || vdev_geom_bio_flush_disable) break; - if (vd->vdev_nowritecache) { zio->io_error = ENOTSUP; break; } - + goto sendreq; + case DKIOCTRIM: + if (vdev_geom_bio_delete_disable) + break; + if (vd->vdev_notrim) { + zio->io_error = ENOTSUP; + break; + } goto sendreq; default: zio->io_error = ENOTSUP; @@ -623,11 +639,21 @@ sendreq: bp->bio_length = zio->io_size; break; case ZIO_TYPE_IOCTL: - bp->bio_cmd = BIO_FLUSH; - bp->bio_flags |= BIO_ORDERED; - bp->bio_data = NULL; - bp->bio_offset = cp->provider->mediasize; - bp->bio_length = 0; + switch (zio->io_cmd) { + case DKIOCFLUSHWRITECACHE: + bp->bio_cmd = BIO_FLUSH; + bp->bio_flags |= BIO_ORDERED; + bp->bio_data = NULL; + bp->bio_offset = cp->provider->mediasize; + bp->bio_length = 0; + break; + case DKIOCTRIM: + bp->bio_cmd = BIO_DELETE; + bp->bio_data = NULL; + bp->bio_offset = zio->io_offset; + bp->bio_length = zio->io_size; + break; + } break; } bp->bio_done = vdev_geom_io_intr; |