summaryrefslogtreecommitdiffstats
path: root/sys/dev/md
diff options
context:
space:
mode:
authorphk <phk@FreeBSD.org>2004-03-10 20:41:09 +0000
committerphk <phk@FreeBSD.org>2004-03-10 20:41:09 +0000
commit0f56e66e2f53df9e66c87c4c703a093c7926dc1c (patch)
tree59ebe2a7ed44e7865042f27309af6372e0e6c948 /sys/dev/md
parent1ea153590eb705528aa8cdf33a034b97c6cd57ba (diff)
downloadFreeBSD-src-0f56e66e2f53df9e66c87c4c703a093c7926dc1c.zip
FreeBSD-src-0f56e66e2f53df9e66c87c4c703a093c7926dc1c.tar.gz
Fix a long-standing deadlock issue with vnode backed md(4) devices:
On vnode backed md(4) devices over a certain, currently undetermined size relative to the buffer cache our "lemming-syncer" can provoke a buffer starvation which puts the md thread to sleep on wdrain. This generally tends to grind the entire system to a stop because the event that is supposed to wake up the thread will not happen until a fair bit of the piled up I/O requests in the system finish, and since a lot of those are on a md(4) vnode backed device which is currently waiting on wdrain until a fair amount of the piled up ... you get the picture. The cure is to issue all VOP_WRITES on the vnode backing the device with IO_SYNC. In addition to more closely emulating a real disk device with a non-lying write-cache, this makes the writes exempt from rate-limited (there to avoid starving the buffer cache) and consequently prevents the deadlock. Unfortunately performance takes a hit. Add "async" option to give people who know what they are doing the old behaviour.
Diffstat (limited to 'sys/dev/md')
-rw-r--r--sys/dev/md/md.c8
1 files changed, 5 insertions, 3 deletions
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index 0d3d918..6e345da 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -503,13 +503,15 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
if (bp->bio_cmd == BIO_READ) {
vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
error = VOP_READ(sc->vnode, &auio, IO_DIRECT, sc->cred);
+ VOP_UNLOCK(sc->vnode, 0, curthread);
} else {
(void) vn_start_write(sc->vnode, &mp, V_WAIT);
vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
- error = VOP_WRITE(sc->vnode, &auio, 0, sc->cred);
+ error = VOP_WRITE(sc->vnode, &auio,
+ sc->flags & MD_ASYNC ? 0 : IO_SYNC, sc->cred);
+ VOP_UNLOCK(sc->vnode, 0, curthread);
vn_finished_write(mp);
}
- VOP_UNLOCK(sc->vnode, 0, curthread);
bp->bio_resid = auio.uio_resid;
return (error);
}
@@ -938,7 +940,7 @@ mdcreate_vnode(struct md_ioctl *mdio, struct thread *td)
if (mdio->md_fwheads != 0)
sc->fwheads = mdio->md_fwheads;
sc->type = MD_VNODE;
- sc->flags = mdio->md_options & MD_FORCE;
+ sc->flags = mdio->md_options & (MD_FORCE | MD_ASYNC);
if (!(flags & FWRITE))
sc->flags |= MD_READONLY;
sc->secsize = DEV_BSIZE;
OpenPOWER on IntegriCloud