summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2010-06-08 17:54:28 +0000
committerkib <kib@FreeBSD.org>2010-06-08 17:54:28 +0000
commit317abde3725dd5587befd19326dbb86efdaed6fd (patch)
treece721d22f60a1202a7cf7daeb70ebb7c20101794
parent9a18b596b8a1d9c7a937bd5ed83be888524ab341 (diff)
downloadFreeBSD-src-317abde3725dd5587befd19326dbb86efdaed6fd.zip
FreeBSD-src-317abde3725dd5587befd19326dbb86efdaed6fd.tar.gz
Reorganize the code in bdwrite() which handles move of dirtiness
from the buffer pages to buffer. Combine the code to set buffer dirty range (previously in vfs_setdirty()) and to clean the pages (vfs_clean_pages()) into new function vfs_clean_pages_dirty_buf(). Now the vm object lock is acquired only once. Drain the VPO_BUSY bit of the buffer pages before setting valid and clean bits in vfs_clean_pages_dirty_buf() with new helper vfs_drain_busy_pages(). pmap_clear_modify() asserts that page is not busy. In vfs_busy_pages(), move the wait for draining of VPO_BUSY before the dirtyness handling, to follow the structure of vfs_clean_pages_dirty_buf(). Reported and tested by: pho Suggested and reviewed by: alc MFC after: 2 weeks
-rw-r--r--sys/kern/vfs_bio.c135
1 files changed, 65 insertions, 70 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index bbced87..5429b14 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -102,8 +102,8 @@ static void vm_hold_load_pages(struct buf *bp, vm_offset_t from,
static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m);
static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off,
vm_page_t m);
-static void vfs_clean_pages(struct buf *bp);
-static void vfs_setdirty(struct buf *bp);
+static void vfs_drain_busy_pages(struct buf *bp);
+static void vfs_clean_pages_dirty_buf(struct buf *bp);
static void vfs_setdirty_locked_object(struct buf *bp);
static void vfs_vmio_release(struct buf *bp);
static int vfs_bio_clcheck(struct vnode *vp, int size,
@@ -1025,18 +1025,17 @@ bdwrite(struct buf *bp)
}
/*
- * Set the *dirty* buffer range based upon the VM system dirty pages.
- */
- vfs_setdirty(bp);
-
- /*
- * We need to do this here to satisfy the vnode_pager and the
- * pageout daemon, so that it thinks that the pages have been
- * "cleaned". Note that since the pages are in a delayed write
- * buffer -- the VFS layer "will" see that the pages get written
- * out on the next sync, or perhaps the cluster will be completed.
+ * Set the *dirty* buffer range based upon the VM system dirty
+ * pages.
+ *
+ * Mark the buffer pages as clean. We need to do this here to
+ * satisfy the vnode_pager and the pageout daemon, so that it
+ * thinks that the pages have been "cleaned". Note that since
+ * the pages are in a delayed write buffer -- the VFS layer
+ * "will" see that the pages get written out on the next sync,
+ * or perhaps the cluster will be completed.
*/
- vfs_clean_pages(bp);
+ vfs_clean_pages_dirty_buf(bp);
bqrelse(bp);
/*
@@ -2398,31 +2397,44 @@ notinmem:
}
/*
- * vfs_setdirty:
- *
- * Sets the dirty range for a buffer based on the status of the dirty
- * bits in the pages comprising the buffer.
+ * Set the dirty range for a buffer based on the status of the dirty
+ * bits in the pages comprising the buffer. The range is limited
+ * to the size of the buffer.
*
- * The range is limited to the size of the buffer.
+ * Tell the VM system that the pages associated with this buffer
+ * are clean. This is used for delayed writes where the data is
+ * going to go to disk eventually without additional VM intevention.
*
- * This routine is primarily used by NFS, but is generalized for the
- * B_VMIO case.
+ * Note that while we only really need to clean through to b_bcount, we
+ * just go ahead and clean through to b_bufsize.
*/
static void
-vfs_setdirty(struct buf *bp)
+vfs_clean_pages_dirty_buf(struct buf *bp)
{
+ vm_ooffset_t foff, noff, eoff;
+ vm_page_t m;
+ int i;
- /*
- * Degenerate case - empty buffer
- */
- if (bp->b_bufsize == 0)
+ if ((bp->b_flags & B_VMIO) == 0 || bp->b_bufsize == 0)
return;
- if ((bp->b_flags & B_VMIO) == 0)
- return;
+ foff = bp->b_offset;
+ KASSERT(bp->b_offset != NOOFFSET,
+ ("vfs_clean_pages_dirty_buf: no buffer offset"));
VM_OBJECT_LOCK(bp->b_bufobj->bo_object);
+ vfs_drain_busy_pages(bp);
vfs_setdirty_locked_object(bp);
+ for (i = 0; i < bp->b_npages; i++) {
+ noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
+ eoff = noff;
+ if (eoff > bp->b_offset + bp->b_bufsize)
+ eoff = bp->b_offset + bp->b_bufsize;
+ m = bp->b_pages[i];
+ vfs_page_set_validclean(bp, foff, m);
+ /* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */
+ foff = noff;
+ }
VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object);
}
@@ -3533,6 +3545,31 @@ vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, vm_page_t m)
}
/*
+ * Ensure that all buffer pages are not busied by VPO_BUSY flag. If
+ * any page is busy, drain the flag.
+ */
+static void
+vfs_drain_busy_pages(struct buf *bp)
+{
+ vm_page_t m;
+ int i, last_busied;
+
+ VM_OBJECT_LOCK_ASSERT(bp->b_bufobj->bo_object, MA_OWNED);
+ last_busied = 0;
+ for (i = 0; i < bp->b_npages; i++) {
+ m = bp->b_pages[i];
+ if ((m->oflags & VPO_BUSY) != 0) {
+ for (; last_busied < i; last_busied++)
+ vm_page_busy(bp->b_pages[last_busied]);
+ while ((m->oflags & VPO_BUSY) != 0)
+ vm_page_sleep(m, "vbpage");
+ }
+ }
+ for (i = 0; i < last_busied; i++)
+ vm_page_wakeup(bp->b_pages[i]);
+}
+
+/*
* This routine is called before a device strategy routine.
* It is used to tell the VM system that paging I/O is in
* progress, and treat the pages associated with the buffer
@@ -3560,15 +3597,9 @@ vfs_busy_pages(struct buf *bp, int clear_modify)
KASSERT(bp->b_offset != NOOFFSET,
("vfs_busy_pages: no buffer offset"));
VM_OBJECT_LOCK(obj);
+ vfs_drain_busy_pages(bp);
if (bp->b_bufsize != 0)
vfs_setdirty_locked_object(bp);
-retry:
- for (i = 0; i < bp->b_npages; i++) {
- m = bp->b_pages[i];
-
- if (vm_page_sleep_if_busy(m, FALSE, "vbpage"))
- goto retry;
- }
bogus = 0;
for (i = 0; i < bp->b_npages; i++) {
m = bp->b_pages[i];
@@ -3609,42 +3640,6 @@ retry:
}
/*
- * Tell the VM system that the pages associated with this buffer
- * are clean. This is used for delayed writes where the data is
- * going to go to disk eventually without additional VM intevention.
- *
- * Note that while we only really need to clean through to b_bcount, we
- * just go ahead and clean through to b_bufsize.
- */
-static void
-vfs_clean_pages(struct buf *bp)
-{
- int i;
- vm_ooffset_t foff, noff, eoff;
- vm_page_t m;
-
- if (!(bp->b_flags & B_VMIO))
- return;
-
- foff = bp->b_offset;
- KASSERT(bp->b_offset != NOOFFSET,
- ("vfs_clean_pages: no buffer offset"));
- VM_OBJECT_LOCK(bp->b_bufobj->bo_object);
- for (i = 0; i < bp->b_npages; i++) {
- m = bp->b_pages[i];
- noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
- eoff = noff;
-
- if (eoff > bp->b_offset + bp->b_bufsize)
- eoff = bp->b_offset + bp->b_bufsize;
- vfs_page_set_validclean(bp, foff, m);
- /* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */
- foff = noff;
- }
- VM_OBJECT_UNLOCK(bp->b_bufobj->bo_object);
-}
-
-/*
* vfs_bio_set_valid:
*
* Set the range within the buffer to valid. The range is
OpenPOWER on IntegriCloud