summaryrefslogtreecommitdiffstats
path: root/sys/geom
diff options
context:
space:
mode:
authorsmh <smh@FreeBSD.org>2013-10-23 09:54:58 +0000
committersmh <smh@FreeBSD.org>2013-10-23 09:54:58 +0000
commit5c7a6f5d929fa744b3a959b187d32c752a5ea7d5 (patch)
treea33b2d074ed120b5235664c972b7d26e8b93d3c2 /sys/geom
parent212cc4c51f1283090add4cfee918f109c27544e7 (diff)
downloadFreeBSD-src-5c7a6f5d929fa744b3a959b187d32c752a5ea7d5.zip
FreeBSD-src-5c7a6f5d929fa744b3a959b187d32c752a5ea7d5.tar.gz
Improve ZFS N-way mirror read performance by using load and locality
information. The existing algorithm selects a preferred leaf vdev based on offset of the zio request modulo the number of members in the mirror. It assumes the devices are of equal performance and that spreading the requests randomly over both drives will be sufficient to saturate them. In practice this results in the leaf vdevs being under utilized. The new algorithm takes into the following additional factors: * Load of the vdevs (number outstanding I/O requests) * The locality of last queued I/O vs the new I/O request. Within the locality calculation additional knowledge about the underlying vdev is considered such as; is the device backing the vdev a rotating media device. This results in performance increases across the board as well as significant increases for predominantly streaming loads and for configurations which don't have evenly performing devices. The following are results from a setup with 3 Way Mirror with 2 x HD's and 1 x SSD from a basic test running multiple parrallel dd's. With pre-fetch disabled (vfs.zfs.prefetch_disable=1): == Stripe Balanced (default) == Read 15360MB using bs: 1048576, readers: 3, took 161 seconds @ 95 MB/s == Load Balanced (zfslinux) == Read 15360MB using bs: 1048576, readers: 3, took 297 seconds @ 51 MB/s == Load Balanced (locality freebsd) == Read 15360MB using bs: 1048576, readers: 3, took 54 seconds @ 284 MB/s With pre-fetch enabled (vfs.zfs.prefetch_disable=0): == Stripe Balanced (default) == Read 15360MB using bs: 1048576, readers: 3, took 91 seconds @ 168 MB/s == Load Balanced (zfslinux) == Read 15360MB using bs: 1048576, readers: 3, took 108 seconds @ 142 MB/s == Load Balanced (locality freebsd) == Read 15360MB using bs: 1048576, readers: 3, took 48 seconds @ 320 MB/s In addition to the performance changes the code was also restructured, with the help of Justin Gibbs, to provide a more logical flow which also ensures vdevs loads are only calculated from the set of valid candidates. The following additional sysctls where added to allow the administrator to tune the behaviour of the load algorithm: * vfs.zfs.vdev.mirror.rotating_inc * vfs.zfs.vdev.mirror.rotating_seek_inc * vfs.zfs.vdev.mirror.rotating_seek_offset * vfs.zfs.vdev.mirror.non_rotating_inc * vfs.zfs.vdev.mirror.non_rotating_seek_inc These changes where based on work started by the zfsonlinux developers: https://github.com/zfsonlinux/zfs/pull/1487 Reviewed by: gibbs, mav, will MFC after: 2 weeks Sponsored by: Multiplay
Diffstat (limited to 'sys/geom')
-rw-r--r--sys/geom/geom.h1
-rw-r--r--sys/geom/geom_disk.c19
-rw-r--r--sys/geom/geom_disk.h4
-rw-r--r--sys/geom/geom_subr.c7
4 files changed, 22 insertions, 9 deletions
diff --git a/sys/geom/geom.h b/sys/geom/geom.h
index 1c1fdb03..f313d02 100644
--- a/sys/geom/geom.h
+++ b/sys/geom/geom.h
@@ -274,6 +274,7 @@ int g_handleattr(struct bio *bp, const char *attribute, const void *val,
int len);
int g_handleattr_int(struct bio *bp, const char *attribute, int val);
int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val);
+int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val);
int g_handleattr_str(struct bio *bp, const char *attribute, const char *str);
struct g_consumer * g_new_consumer(struct g_geom *gp);
struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...)
diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c
index 02a2769..d7fa43a 100644
--- a/sys/geom/geom_disk.c
+++ b/sys/geom/geom_disk.c
@@ -387,22 +387,25 @@ g_disk_start(struct bio *bp)
break;
else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident))
break;
- else if (g_handleattr(bp, "GEOM::hba_vendor",
- &dp->d_hba_vendor, 2))
+ else if (g_handleattr_uint16_t(bp, "GEOM::hba_vendor",
+ dp->d_hba_vendor))
break;
- else if (g_handleattr(bp, "GEOM::hba_device",
- &dp->d_hba_device, 2))
+ else if (g_handleattr_uint16_t(bp, "GEOM::hba_device",
+ dp->d_hba_device))
break;
- else if (g_handleattr(bp, "GEOM::hba_subvendor",
- &dp->d_hba_subvendor, 2))
+ else if (g_handleattr_uint16_t(bp, "GEOM::hba_subvendor",
+ dp->d_hba_subvendor))
break;
- else if (g_handleattr(bp, "GEOM::hba_subdevice",
- &dp->d_hba_subdevice, 2))
+ else if (g_handleattr_uint16_t(bp, "GEOM::hba_subdevice",
+ dp->d_hba_subdevice))
break;
else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
g_disk_kerneldump(bp, dp);
else if (!strcmp(bp->bio_attribute, "GEOM::setstate"))
g_disk_setstate(bp, sc);
+ else if (g_handleattr_uint16_t(bp, "GEOM::rotation_rate",
+ dp->d_rotation_rate))
+ break;
else
error = ENOIOCTL;
break;
diff --git a/sys/geom/geom_disk.h b/sys/geom/geom_disk.h
index 53feb1f..3c983ee 100644
--- a/sys/geom/geom_disk.h
+++ b/sys/geom/geom_disk.h
@@ -97,6 +97,7 @@ struct disk {
uint16_t d_hba_device;
uint16_t d_hba_subvendor;
uint16_t d_hba_subdevice;
+ uint16_t d_rotation_rate;
/* Fields private to the driver */
void *d_drv1;
@@ -121,7 +122,8 @@ int disk_resize(struct disk *dp, int flag);
#define DISK_VERSION_01 0x5856105a
#define DISK_VERSION_02 0x5856105b
#define DISK_VERSION_03 0x5856105c
-#define DISK_VERSION DISK_VERSION_03
+#define DISK_VERSION_04 0x5856105d
+#define DISK_VERSION DISK_VERSION_04
#endif /* _KERNEL */
#endif /* _GEOM_GEOM_DISK_H_ */
diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c
index ecbf0b8..dd8c4ab 100644
--- a/sys/geom/geom_subr.c
+++ b/sys/geom/geom_subr.c
@@ -951,6 +951,13 @@ g_handleattr_int(struct bio *bp, const char *attribute, int val)
}
int
+g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val)
+{
+
+ return (g_handleattr(bp, attribute, &val, sizeof val));
+}
+
+int
g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val)
{
OpenPOWER on IntegriCloud