summaryrefslogtreecommitdiffstats
path: root/sys/kern/vfs_bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern/vfs_bio.c')
-rw-r--r--sys/kern/vfs_bio.c105
1 files changed, 101 insertions, 4 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index c1b53d8..a980330 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -281,6 +281,8 @@ waitrunningbufspace(void)
* Called when a buffer is extended. This function clears the B_CACHE
* bit if the newly extended portion of the buffer does not contain
* valid data.
+ *
+ * must be called with vm_mtx held
*/
static __inline__
void
@@ -426,11 +428,13 @@ bufinit(void)
* from buf_daemon.
*/
+ mtx_lock(&vm_mtx);
bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
bogus_page = vm_page_alloc(kernel_object,
((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
VM_ALLOC_NORMAL);
cnt.v_wire_count++;
+ mtx_unlock(&vm_mtx);
}
@@ -441,17 +445,27 @@ bufinit(void)
* buffer_map.
*
* Since this call frees up buffer space, we call bufspacewakeup().
+ *
+ * Can be called with or without the vm_mtx.
*/
static void
bfreekva(struct buf * bp)
{
+
if (bp->b_kvasize) {
+ int hadvmlock;
+
++buffreekvacnt;
bufspace -= bp->b_kvasize;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
vm_map_delete(buffer_map,
(vm_offset_t) bp->b_kvabase,
(vm_offset_t) bp->b_kvabase + bp->b_kvasize
);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
bp->b_kvasize = 0;
bufspacewakeup();
}
@@ -807,6 +821,7 @@ bdwrite(struct buf * bp)
VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
}
+ mtx_lock(&vm_mtx);
/*
* Set the *dirty* buffer range based upon the VM system dirty pages.
*/
@@ -820,6 +835,7 @@ bdwrite(struct buf * bp)
* out on the next sync, or perhaps the cluster will be completed.
*/
vfs_clean_pages(bp);
+ mtx_unlock(&vm_mtx);
bqrelse(bp);
/*
@@ -973,12 +989,15 @@ buf_dirty_count_severe(void)
* Release a busy buffer and, if requested, free its resources. The
* buffer will be stashed in the appropriate bufqueue[] allowing it
* to be accessed later as a cache entity or reused for other purposes.
+ *
+ * vm_mtx must be not be held.
*/
void
brelse(struct buf * bp)
{
int s;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
s = splbio();
@@ -1088,6 +1107,7 @@ brelse(struct buf * bp)
resid = bp->b_bufsize;
foff = bp->b_offset;
+ mtx_lock(&vm_mtx);
for (i = 0; i < bp->b_npages; i++) {
int had_bogus = 0;
@@ -1099,10 +1119,12 @@ brelse(struct buf * bp)
* now.
*/
if (m == bogus_page) {
+ mtx_unlock(&vm_mtx);
VOP_GETVOBJECT(vp, &obj);
poff = OFF_TO_IDX(bp->b_offset);
had_bogus = 1;
+ mtx_lock(&vm_mtx);
for (j = i; j < bp->b_npages; j++) {
vm_page_t mtmp;
mtmp = bp->b_pages[j];
@@ -1136,11 +1158,15 @@ brelse(struct buf * bp)
if (bp->b_flags & (B_INVAL | B_RELBUF))
vfs_vmio_release(bp);
+ mtx_unlock(&vm_mtx);
} else if (bp->b_flags & B_VMIO) {
- if (bp->b_flags & (B_INVAL | B_RELBUF))
+ if (bp->b_flags & (B_INVAL | B_RELBUF)) {
+ mtx_lock(&vm_mtx);
vfs_vmio_release(bp);
+ mtx_unlock(&vm_mtx);
+ }
}
@@ -1302,6 +1328,9 @@ bqrelse(struct buf * bp)
splx(s);
}
+/*
+ * Must be called with vm_mtx held.
+ */
static void
vfs_vmio_release(bp)
struct buf *bp;
@@ -1310,6 +1339,7 @@ vfs_vmio_release(bp)
vm_page_t m;
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
for (i = 0; i < bp->b_npages; i++) {
m = bp->b_pages[i];
bp->b_pages[i] = NULL;
@@ -1343,6 +1373,9 @@ vfs_vmio_release(bp)
}
splx(s);
pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
+
+ /* could drop vm_mtx here */
+
if (bp->b_bufsize) {
bufspacewakeup();
bp->b_bufsize = 0;
@@ -1614,7 +1647,9 @@ restart:
if (qindex == QUEUE_CLEAN) {
if (bp->b_flags & B_VMIO) {
bp->b_flags &= ~B_ASYNC;
+ mtx_lock(&vm_mtx);
vfs_vmio_release(bp);
+ mtx_unlock(&vm_mtx);
}
if (bp->b_vp)
brelvp(bp);
@@ -1735,6 +1770,8 @@ restart:
if (maxsize != bp->b_kvasize) {
vm_offset_t addr = 0;
+ /* we'll hold the lock over some vm ops */
+ mtx_lock(&vm_mtx);
bfreekva(bp);
if (vm_map_findspace(buffer_map,
@@ -1743,6 +1780,7 @@ restart:
* Uh oh. Buffer map is to fragmented. We
* must defragment the map.
*/
+ mtx_unlock(&vm_mtx);
++bufdefragcnt;
defrag = 1;
bp->b_flags |= B_INVAL;
@@ -1759,6 +1797,7 @@ restart:
bufspace += bp->b_kvasize;
++bufreusecnt;
}
+ mtx_unlock(&vm_mtx);
}
bp->b_data = bp->b_kvabase;
}
@@ -1936,18 +1975,24 @@ inmem(struct vnode * vp, daddr_t blkno)
size = vp->v_mount->mnt_stat.f_iosize;
off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
+ mtx_lock(&vm_mtx);
for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));
if (!m)
- return 0;
+ goto notinmem;
tinc = size;
if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
if (vm_page_is_valid(m,
(vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
- return 0;
+ goto notinmem;
}
+ mtx_unlock(&vm_mtx);
return 1;
+
+notinmem:
+ mtx_unlock(&vm_mtx);
+ return (0);
}
/*
@@ -1960,11 +2005,14 @@ inmem(struct vnode * vp, daddr_t blkno)
*
* This routine is primarily used by NFS, but is generalized for the
* B_VMIO case.
+ *
+ * Can be called with or without vm_mtx
*/
static void
vfs_setdirty(struct buf *bp)
{
int i;
+ int hadvmlock;
vm_object_t object;
/*
@@ -1983,6 +2031,10 @@ vfs_setdirty(struct buf *bp)
if ((bp->b_flags & B_VMIO) == 0)
return;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+
object = bp->b_pages[0]->object;
if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY))
@@ -2040,6 +2092,8 @@ vfs_setdirty(struct buf *bp)
bp->b_dirtyend = eoffset;
}
}
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
/*
@@ -2441,6 +2495,7 @@ allocbuf(struct buf *bp, int size)
* DEV_BSIZE aligned existing buffer size. Figure out
* if we have to remove any pages.
*/
+ mtx_lock(&vm_mtx);
if (desiredpages < bp->b_npages) {
for (i = desiredpages; i < bp->b_npages; i++) {
/*
@@ -2461,6 +2516,7 @@ allocbuf(struct buf *bp, int size)
(desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages));
bp->b_npages = desiredpages;
}
+ mtx_unlock(&vm_mtx);
} else if (size > bp->b_bcount) {
/*
* We are growing the buffer, possibly in a
@@ -2481,6 +2537,7 @@ allocbuf(struct buf *bp, int size)
vp = bp->b_vp;
VOP_GETVOBJECT(vp, &obj);
+ mtx_lock(&vm_mtx);
while (bp->b_npages < desiredpages) {
vm_page_t m;
vm_pindex_t pi;
@@ -2589,6 +2646,9 @@ allocbuf(struct buf *bp, int size)
bp->b_pages,
bp->b_npages
);
+
+ mtx_unlock(&vm_mtx);
+
bp->b_data = (caddr_t)((vm_offset_t)bp->b_data |
(vm_offset_t)(bp->b_offset & PAGE_MASK));
}
@@ -2726,6 +2786,7 @@ bufdone(struct buf *bp)
if (error) {
panic("biodone: no object");
}
+ mtx_lock(&vm_mtx);
#if defined(VFS_BIO_DEBUG)
if (obj->paging_in_progress < bp->b_npages) {
printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
@@ -2814,6 +2875,7 @@ bufdone(struct buf *bp)
}
if (obj)
vm_object_pip_wakeupn(obj, 0);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -2837,12 +2899,15 @@ bufdone(struct buf *bp)
* This routine is called in lieu of iodone in the case of
* incomplete I/O. This keeps the busy status for pages
* consistant.
+ *
+ * vm_mtx should not be held
*/
void
vfs_unbusy_pages(struct buf * bp)
{
int i;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
runningbufwakeup(bp);
if (bp->b_flags & B_VMIO) {
struct vnode *vp = bp->b_vp;
@@ -2850,6 +2915,7 @@ vfs_unbusy_pages(struct buf * bp)
VOP_GETVOBJECT(vp, &obj);
+ mtx_lock(&vm_mtx);
for (i = 0; i < bp->b_npages; i++) {
vm_page_t m = bp->b_pages[i];
@@ -2866,6 +2932,7 @@ vfs_unbusy_pages(struct buf * bp)
vm_page_io_finish(m);
}
vm_object_pip_wakeupn(obj, 0);
+ mtx_unlock(&vm_mtx);
}
}
@@ -2876,12 +2943,15 @@ vfs_unbusy_pages(struct buf * bp)
* range is restricted to the buffer's size.
*
* This routine is typically called after a read completes.
+ *
+ * vm_mtx should be held
*/
static void
vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
{
vm_ooffset_t soff, eoff;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Start and end offsets in buffer. eoff - soff may not cross a
* page boundry or cross the end of the buffer. The end of the
@@ -2917,12 +2987,15 @@ vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
* Since I/O has not been initiated yet, certain buffer flags
* such as BIO_ERROR or B_INVAL may be in an inconsistant state
* and should be ignored.
+ *
+ * vm_mtx should not be held
*/
void
vfs_busy_pages(struct buf * bp, int clear_modify)
{
int i, bogus;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
if (bp->b_flags & B_VMIO) {
struct vnode *vp = bp->b_vp;
vm_object_t obj;
@@ -2932,6 +3005,7 @@ vfs_busy_pages(struct buf * bp, int clear_modify)
foff = bp->b_offset;
KASSERT(bp->b_offset != NOOFFSET,
("vfs_busy_pages: no buffer offset"));
+ mtx_lock(&vm_mtx);
vfs_setdirty(bp);
retry:
@@ -2979,6 +3053,7 @@ retry:
}
if (bogus)
pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
+ mtx_unlock(&vm_mtx);
}
}
@@ -2989,12 +3064,15 @@ retry:
*
* Note that while we only really need to clean through to b_bcount, we
* just go ahead and clean through to b_bufsize.
+ *
+ * should be called with vm_mtx held
*/
static void
vfs_clean_pages(struct buf * bp)
{
int i;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (bp->b_flags & B_VMIO) {
vm_ooffset_t foff;
@@ -3021,6 +3099,7 @@ vfs_clean_pages(struct buf * bp)
* Set the range within the buffer to valid and clean. The range is
* relative to the beginning of the buffer, b_offset. Note that b_offset
* itself may be offset from the beginning of the first page.
+ *
*/
void
@@ -3061,13 +3140,18 @@ vfs_bio_set_validclean(struct buf *bp, int base, int size)
*
* Note that while we only theoretically need to clear through b_bcount,
* we go ahead and clear through b_bufsize.
+ *
+ * We'll get vm_mtx here for safety if processing a VMIO buffer.
+ * I don't think vm_mtx is needed, but we're twiddling vm_page flags.
*/
void
vfs_bio_clrbuf(struct buf *bp) {
int i, mask = 0;
caddr_t sa, ea;
+
if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
+ mtx_lock(&vm_mtx);
bp->b_flags &= ~B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
@@ -3079,6 +3163,7 @@ vfs_bio_clrbuf(struct buf *bp) {
}
bp->b_pages[0]->valid |= mask;
bp->b_resid = 0;
+ mtx_unlock(&vm_mtx);
return;
}
ea = sa = bp->b_data;
@@ -3106,6 +3191,7 @@ vfs_bio_clrbuf(struct buf *bp) {
vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
}
bp->b_resid = 0;
+ mtx_unlock(&vm_mtx);
} else {
clrbuf(bp);
}
@@ -3115,18 +3201,22 @@ vfs_bio_clrbuf(struct buf *bp) {
* vm_hold_load_pages and vm_hold_unload pages get pages into
* a buffers address space. The pages are anonymous and are
* not associated with a file object.
+ *
+ * vm_mtx should not be held
*/
-void
+static void
vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
{
vm_offset_t pg;
vm_page_t p;
int index;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
to = round_page(to);
from = round_page(from);
index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
+ mtx_lock(&vm_mtx);
for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
tryagain:
@@ -3152,6 +3242,7 @@ tryagain:
vm_page_wakeup(p);
}
bp->b_npages = index;
+ mtx_unlock(&vm_mtx);
}
void
@@ -3160,11 +3251,15 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
vm_offset_t pg;
vm_page_t p;
int index, newnpages;
+ int hadvmlock;
from = round_page(from);
to = round_page(to);
newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
p = bp->b_pages[index];
if (p && (index < bp->b_npages)) {
@@ -3180,6 +3275,8 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
}
}
bp->b_npages = newnpages;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
OpenPOWER on IntegriCloud