summaryrefslogtreecommitdiffstats
path: root/sys/kern/vfs_bio.c
diff options
context:
space:
mode:
authormckusick <mckusick@FreeBSD.org>2012-03-01 18:45:25 +0000
committermckusick <mckusick@FreeBSD.org>2012-03-01 18:45:25 +0000
commitb23f922edf4716e695316239d309412148f0303e (patch)
treeebfc9a06ad19eb63f16a4673856b37491f30a5ac /sys/kern/vfs_bio.c
parent806e8c58722e448287fcca80db3f3b8585bad093 (diff)
downloadFreeBSD-src-b23f922edf4716e695316239d309412148f0303e.zip
FreeBSD-src-b23f922edf4716e695316239d309412148f0303e.tar.gz
This change avoids a kernel deadlock on "snaplk" when using
snapshots on UFS filesystems running with journaled soft updates. This is the first of several bugs that need to be fixed before removing the restriction added in -r230250 to prevent the use of snapshots on filesystems running with journaled soft updates. The deadlock occurs when holding the snapshot lock (snaplk) and then trying to flush an inode via ffs_update(). We become blocked by another process trying to flush a different inode contained in the same inode block that we need. It holds the inode block for which we are waiting locked. When it tries to write the inode block, it gets blocked waiting for the our snaplk when it calls ffs_copyonwrite() to see if the inode block needs to be copied in our snapshot. The most obvious place that this deadlock arises is in the ffs_copyonwrite() routine when it updates critical metadata in a snapshot and tries to write it out before proceeding. The fix here is to write the data and indirect block pointer for the snapshot, but to skip the call to ffs_update() to write the snapshot inode. To ensure that we will never have to update a pointer in the inode itself, the ffs_snapshot() routine that creates the snapshot has to ensure that all the direct blocks are allocated as part of the creation of the snapshot. A less obvious place that this deadlock occurs is when we hold the snaplk because we are deleting a snapshot. In the course of doing the deletion, we need to allocate various soft update dependency structures and allocate some journal space. If we hit a resource limit while doing this we decrease the resources in use by flushing out an existing dirty file to get it to give up the soft dependency resources that it holds. The flush can cause an ffs_update() to be done on the inode for the file that we have selected to flush resulting in the same deadlock as described above when the inode that we have chosen to flush resides in the same inode block as the snapshot inode that we hold. The fix is to defer cleaning up any time that the inode on which we are operating is a snapshot. Help and review by: Jeff Roberson Tested by: Peter Holm MFC (to 9 only) after: 2 weeks
Diffstat (limited to 'sys/kern/vfs_bio.c')
-rw-r--r--sys/kern/vfs_bio.c36
1 files changed, 15 insertions, 21 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index c0a80f3..4eac76d 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -782,21 +782,6 @@ bremfreel(struct buf *bp)
}
}
-
-/*
- * Get a buffer with the specified data. Look in the cache first. We
- * must clear BIO_ERROR and B_INVAL prior to initiating I/O. If B_CACHE
- * is set, the buffer is valid and we do not have to do anything ( see
- * getblk() ). This is really just a special case of breadn().
- */
-int
-bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred,
- struct buf **bpp)
-{
-
- return (breadn(vp, blkno, size, 0, 0, 0, cred, bpp));
-}
-
/*
* Attempt to initiate asynchronous I/O on read-ahead blocks. We must
* clear BIO_ERROR and B_INVAL prior to initiating I/O . If B_CACHE is set,
@@ -834,19 +819,28 @@ breada(struct vnode * vp, daddr_t * rablkno, int * rabsize,
}
/*
- * Operates like bread, but also starts asynchronous I/O on
- * read-ahead blocks.
+ * Entry point for bread() and breadn() via #defines in sys/buf.h.
+ *
+ * Get a buffer with the specified data. Look in the cache first. We
+ * must clear BIO_ERROR and B_INVAL prior to initiating I/O. If B_CACHE
+ * is set, the buffer is valid and we do not have to do anything, see
+ * getblk(). Also starts asynchronous I/O on read-ahead blocks.
*/
int
-breadn(struct vnode * vp, daddr_t blkno, int size,
- daddr_t * rablkno, int *rabsize,
- int cnt, struct ucred * cred, struct buf **bpp)
+breadn_flags(struct vnode * vp, daddr_t blkno, int size,
+ daddr_t * rablkno, int *rabsize, int cnt,
+ struct ucred * cred, int flags, struct buf **bpp)
{
struct buf *bp;
int rv = 0, readwait = 0;
CTR3(KTR_BUF, "breadn(%p, %jd, %d)", vp, blkno, size);
- *bpp = bp = getblk(vp, blkno, size, 0, 0, 0);
+ /*
+ * Can only return NULL if GB_LOCK_NOWAIT flag is specified.
+ */
+ *bpp = bp = getblk(vp, blkno, size, 0, 0, flags);
+ if (bp == NULL)
+ return (EBUSY);
/* if not found in cache, do some I/O */
if ((bp->b_flags & B_CACHE) == 0) {
OpenPOWER on IntegriCloud