summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2012-12-28 23:08:30 +0000
committerkib <kib@FreeBSD.org>2012-12-28 23:08:30 +0000
commit4247222fcc643b60a3b7a9f859bc22994ca75f4b (patch)
tree17461ae8de8771b3b7726b88cdeb957aaf3e054e
parentbd8e345ca3212f607bc453c1c860e364601c1522 (diff)
downloadFreeBSD-src-4247222fcc643b60a3b7a9f859bc22994ca75f4b.zip
FreeBSD-src-4247222fcc643b60a3b7a9f859bc22994ca75f4b.tar.gz
Make it possible to atomically resume writes on the mount and account
the write start, by adding a variation of the vfs_write_resume(9) which accepts flags. Use the new function to prevent a deadlock between parallel suspension and snapshotting a UFS mount. The ffs_snapshot() code performed vfs_write_resume() followed by vn_start_write() while owning the snaplock. If the suspension intervene between resume and vn_start_write(), the deadlock occured after the suspending thread tried to lock the snaplock, most typically during the write in the ffs_copyonwrite(). Reported and tested by: Andreas Longwitz <longwitz@incore.de> Reviewed by: mckusick MFC after: 2 weeks X-MFC-note: make the vfs_write_resume(9) function a macro after the MFC, in HEAD
-rw-r--r--sys/kern/vfs_vnops.c79
-rw-r--r--sys/sys/vnode.h3
-rw-r--r--sys/ufs/ffs/ffs_snapshot.c3
3 files changed, 56 insertions, 29 deletions
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 3f65b05..cf49ecb 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1434,6 +1434,40 @@ vn_closefile(fp, td)
* proceed. If a suspend request is in progress, we wait until the
* suspension is over, and then proceed.
*/
+static int
+vn_start_write_locked(struct mount *mp, int flags)
+{
+ int error;
+
+ mtx_assert(MNT_MTX(mp), MA_OWNED);
+ error = 0;
+
+ /*
+ * Check on status of suspension.
+ */
+ if ((curthread->td_pflags & TDP_IGNSUSP) == 0 ||
+ mp->mnt_susp_owner != curthread) {
+ while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
+ if (flags & V_NOWAIT) {
+ error = EWOULDBLOCK;
+ goto unlock;
+ }
+ error = msleep(&mp->mnt_flag, MNT_MTX(mp),
+ (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
+ if (error)
+ goto unlock;
+ }
+ }
+ if (flags & V_XSLEEP)
+ goto unlock;
+ mp->mnt_writeopcount++;
+unlock:
+ if (error != 0 || (flags & V_XSLEEP) != 0)
+ MNT_REL(mp);
+ MNT_IUNLOCK(mp);
+ return (error);
+}
+
int
vn_start_write(vp, mpp, flags)
struct vnode *vp;
@@ -1470,30 +1504,7 @@ vn_start_write(vp, mpp, flags)
if (vp == NULL)
MNT_REF(mp);
- /*
- * Check on status of suspension.
- */
- if ((curthread->td_pflags & TDP_IGNSUSP) == 0 ||
- mp->mnt_susp_owner != curthread) {
- while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
- if (flags & V_NOWAIT) {
- error = EWOULDBLOCK;
- goto unlock;
- }
- error = msleep(&mp->mnt_flag, MNT_MTX(mp),
- (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
- if (error)
- goto unlock;
- }
- }
- if (flags & V_XSLEEP)
- goto unlock;
- mp->mnt_writeopcount++;
-unlock:
- if (error != 0 || (flags & V_XSLEEP) != 0)
- MNT_REL(mp);
- MNT_IUNLOCK(mp);
- return (error);
+ return (vn_start_write_locked(mp, flags));
}
/*
@@ -1639,8 +1650,7 @@ vfs_write_suspend(mp)
* Request a filesystem to resume write operations.
*/
void
-vfs_write_resume(mp)
- struct mount *mp;
+vfs_write_resume_flags(struct mount *mp, int flags)
{
MNT_ILOCK(mp);
@@ -1652,10 +1662,25 @@ vfs_write_resume(mp)
wakeup(&mp->mnt_writeopcount);
wakeup(&mp->mnt_flag);
curthread->td_pflags &= ~TDP_IGNSUSP;
+ if ((flags & VR_START_WRITE) != 0) {
+ MNT_REF(mp);
+ mp->mnt_writeopcount++;
+ }
MNT_IUNLOCK(mp);
VFS_SUSP_CLEAN(mp);
- } else
+ } else if ((flags & VR_START_WRITE) != 0) {
+ MNT_REF(mp);
+ vn_start_write_locked(mp, 0);
+ } else {
MNT_IUNLOCK(mp);
+ }
+}
+
+void
+vfs_write_resume(struct mount *mp)
+{
+
+ vfs_write_resume_flags(mp, 0);
}
/*
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 42f9e5f..4371b40 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -392,6 +392,8 @@ extern int vttoif_tab[];
#define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */
#define V_XSLEEP 0x0004 /* vn_start_write: just return after sleep */
+#define VR_START_WRITE 0x0001 /* vfs_write_resume: start write atomically */
+
#define VREF(vp) vref(vp)
#ifdef DIAGNOSTIC
@@ -701,6 +703,7 @@ int vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio);
int vfs_cache_lookup(struct vop_lookup_args *ap);
void vfs_timestamp(struct timespec *);
void vfs_write_resume(struct mount *mp);
+void vfs_write_resume_flags(struct mount *mp, int flags);
int vfs_write_suspend(struct mount *mp);
int vop_stdbmap(struct vop_bmap_args *);
int vop_stdfsync(struct vop_fsync_args *);
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index e528509..25ad79c 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -687,8 +687,7 @@ out1:
/*
* Resume operation on filesystem.
*/
- vfs_write_resume(vp->v_mount);
- vn_start_write(NULL, &wrtmp, V_WAIT);
+ vfs_write_resume_flags(vp->v_mount, VR_START_WRITE);
if (collectsnapstats && starttime.tv_sec > 0) {
nanotime(&endtime);
timespecsub(&endtime, &starttime);
OpenPOWER on IntegriCloud