diff options
Diffstat (limited to 'sys/kern/vfs_bio.c')
-rw-r--r-- | sys/kern/vfs_bio.c | 105 |
1 files changed, 101 insertions, 4 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index c1b53d8..a980330 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -281,6 +281,8 @@ waitrunningbufspace(void) * Called when a buffer is extended. This function clears the B_CACHE * bit if the newly extended portion of the buffer does not contain * valid data. + * + * must be called with vm_mtx held */ static __inline__ void @@ -426,11 +428,13 @@ bufinit(void) * from buf_daemon. */ + mtx_lock(&vm_mtx); bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE); bogus_page = vm_page_alloc(kernel_object, ((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT), VM_ALLOC_NORMAL); cnt.v_wire_count++; + mtx_unlock(&vm_mtx); } @@ -441,17 +445,27 @@ bufinit(void) * buffer_map. * * Since this call frees up buffer space, we call bufspacewakeup(). + * + * Can be called with or without the vm_mtx. */ static void bfreekva(struct buf * bp) { + if (bp->b_kvasize) { + int hadvmlock; + ++buffreekvacnt; bufspace -= bp->b_kvasize; + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); vm_map_delete(buffer_map, (vm_offset_t) bp->b_kvabase, (vm_offset_t) bp->b_kvabase + bp->b_kvasize ); + if (!hadvmlock) + mtx_unlock(&vm_mtx); bp->b_kvasize = 0; bufspacewakeup(); } @@ -807,6 +821,7 @@ bdwrite(struct buf * bp) VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL); } + mtx_lock(&vm_mtx); /* * Set the *dirty* buffer range based upon the VM system dirty pages. */ @@ -820,6 +835,7 @@ bdwrite(struct buf * bp) * out on the next sync, or perhaps the cluster will be completed. */ vfs_clean_pages(bp); + mtx_unlock(&vm_mtx); bqrelse(bp); /* @@ -973,12 +989,15 @@ buf_dirty_count_severe(void) * Release a busy buffer and, if requested, free its resources. The * buffer will be stashed in the appropriate bufqueue[] allowing it * to be accessed later as a cache entity or reused for other purposes. + * + * vm_mtx must be not be held. */ void brelse(struct buf * bp) { int s; + mtx_assert(&vm_mtx, MA_NOTOWNED); KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp)); s = splbio(); @@ -1088,6 +1107,7 @@ brelse(struct buf * bp) resid = bp->b_bufsize; foff = bp->b_offset; + mtx_lock(&vm_mtx); for (i = 0; i < bp->b_npages; i++) { int had_bogus = 0; @@ -1099,10 +1119,12 @@ brelse(struct buf * bp) * now. */ if (m == bogus_page) { + mtx_unlock(&vm_mtx); VOP_GETVOBJECT(vp, &obj); poff = OFF_TO_IDX(bp->b_offset); had_bogus = 1; + mtx_lock(&vm_mtx); for (j = i; j < bp->b_npages; j++) { vm_page_t mtmp; mtmp = bp->b_pages[j]; @@ -1136,11 +1158,15 @@ brelse(struct buf * bp) if (bp->b_flags & (B_INVAL | B_RELBUF)) vfs_vmio_release(bp); + mtx_unlock(&vm_mtx); } else if (bp->b_flags & B_VMIO) { - if (bp->b_flags & (B_INVAL | B_RELBUF)) + if (bp->b_flags & (B_INVAL | B_RELBUF)) { + mtx_lock(&vm_mtx); vfs_vmio_release(bp); + mtx_unlock(&vm_mtx); + } } @@ -1302,6 +1328,9 @@ bqrelse(struct buf * bp) splx(s); } +/* + * Must be called with vm_mtx held. + */ static void vfs_vmio_release(bp) struct buf *bp; @@ -1310,6 +1339,7 @@ vfs_vmio_release(bp) vm_page_t m; s = splvm(); + mtx_assert(&vm_mtx, MA_OWNED); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; bp->b_pages[i] = NULL; @@ -1343,6 +1373,9 @@ vfs_vmio_release(bp) } splx(s); pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); + + /* could drop vm_mtx here */ + if (bp->b_bufsize) { bufspacewakeup(); bp->b_bufsize = 0; @@ -1614,7 +1647,9 @@ restart: if (qindex == QUEUE_CLEAN) { if (bp->b_flags & B_VMIO) { bp->b_flags &= ~B_ASYNC; + mtx_lock(&vm_mtx); vfs_vmio_release(bp); + mtx_unlock(&vm_mtx); } if (bp->b_vp) brelvp(bp); @@ -1735,6 +1770,8 @@ restart: if (maxsize != bp->b_kvasize) { vm_offset_t addr = 0; + /* we'll hold the lock over some vm ops */ + mtx_lock(&vm_mtx); bfreekva(bp); if (vm_map_findspace(buffer_map, @@ -1743,6 +1780,7 @@ restart: * Uh oh. Buffer map is to fragmented. We * must defragment the map. */ + mtx_unlock(&vm_mtx); ++bufdefragcnt; defrag = 1; bp->b_flags |= B_INVAL; @@ -1759,6 +1797,7 @@ restart: bufspace += bp->b_kvasize; ++bufreusecnt; } + mtx_unlock(&vm_mtx); } bp->b_data = bp->b_kvabase; } @@ -1936,18 +1975,24 @@ inmem(struct vnode * vp, daddr_t blkno) size = vp->v_mount->mnt_stat.f_iosize; off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize; + mtx_lock(&vm_mtx); for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { m = vm_page_lookup(obj, OFF_TO_IDX(off + toff)); if (!m) - return 0; + goto notinmem; tinc = size; if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK)) tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK); if (vm_page_is_valid(m, (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0) - return 0; + goto notinmem; } + mtx_unlock(&vm_mtx); return 1; + +notinmem: + mtx_unlock(&vm_mtx); + return (0); } /* @@ -1960,11 +2005,14 @@ inmem(struct vnode * vp, daddr_t blkno) * * This routine is primarily used by NFS, but is generalized for the * B_VMIO case. + * + * Can be called with or without vm_mtx */ static void vfs_setdirty(struct buf *bp) { int i; + int hadvmlock; vm_object_t object; /* @@ -1983,6 +2031,10 @@ vfs_setdirty(struct buf *bp) if ((bp->b_flags & B_VMIO) == 0) return; + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); + object = bp->b_pages[0]->object; if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY)) @@ -2040,6 +2092,8 @@ vfs_setdirty(struct buf *bp) bp->b_dirtyend = eoffset; } } + if (!hadvmlock) + mtx_unlock(&vm_mtx); } /* @@ -2441,6 +2495,7 @@ allocbuf(struct buf *bp, int size) * DEV_BSIZE aligned existing buffer size. Figure out * if we have to remove any pages. */ + mtx_lock(&vm_mtx); if (desiredpages < bp->b_npages) { for (i = desiredpages; i < bp->b_npages; i++) { /* @@ -2461,6 +2516,7 @@ allocbuf(struct buf *bp, int size) (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages)); bp->b_npages = desiredpages; } + mtx_unlock(&vm_mtx); } else if (size > bp->b_bcount) { /* * We are growing the buffer, possibly in a @@ -2481,6 +2537,7 @@ allocbuf(struct buf *bp, int size) vp = bp->b_vp; VOP_GETVOBJECT(vp, &obj); + mtx_lock(&vm_mtx); while (bp->b_npages < desiredpages) { vm_page_t m; vm_pindex_t pi; @@ -2589,6 +2646,9 @@ allocbuf(struct buf *bp, int size) bp->b_pages, bp->b_npages ); + + mtx_unlock(&vm_mtx); + bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | (vm_offset_t)(bp->b_offset & PAGE_MASK)); } @@ -2726,6 +2786,7 @@ bufdone(struct buf *bp) if (error) { panic("biodone: no object"); } + mtx_lock(&vm_mtx); #if defined(VFS_BIO_DEBUG) if (obj->paging_in_progress < bp->b_npages) { printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", @@ -2814,6 +2875,7 @@ bufdone(struct buf *bp) } if (obj) vm_object_pip_wakeupn(obj, 0); + mtx_unlock(&vm_mtx); } /* @@ -2837,12 +2899,15 @@ bufdone(struct buf *bp) * This routine is called in lieu of iodone in the case of * incomplete I/O. This keeps the busy status for pages * consistant. + * + * vm_mtx should not be held */ void vfs_unbusy_pages(struct buf * bp) { int i; + mtx_assert(&vm_mtx, MA_NOTOWNED); runningbufwakeup(bp); if (bp->b_flags & B_VMIO) { struct vnode *vp = bp->b_vp; @@ -2850,6 +2915,7 @@ vfs_unbusy_pages(struct buf * bp) VOP_GETVOBJECT(vp, &obj); + mtx_lock(&vm_mtx); for (i = 0; i < bp->b_npages; i++) { vm_page_t m = bp->b_pages[i]; @@ -2866,6 +2932,7 @@ vfs_unbusy_pages(struct buf * bp) vm_page_io_finish(m); } vm_object_pip_wakeupn(obj, 0); + mtx_unlock(&vm_mtx); } } @@ -2876,12 +2943,15 @@ vfs_unbusy_pages(struct buf * bp) * range is restricted to the buffer's size. * * This routine is typically called after a read completes. + * + * vm_mtx should be held */ static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m) { vm_ooffset_t soff, eoff; + mtx_assert(&vm_mtx, MA_OWNED); /* * Start and end offsets in buffer. eoff - soff may not cross a * page boundry or cross the end of the buffer. The end of the @@ -2917,12 +2987,15 @@ vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m) * Since I/O has not been initiated yet, certain buffer flags * such as BIO_ERROR or B_INVAL may be in an inconsistant state * and should be ignored. + * + * vm_mtx should not be held */ void vfs_busy_pages(struct buf * bp, int clear_modify) { int i, bogus; + mtx_assert(&vm_mtx, MA_NOTOWNED); if (bp->b_flags & B_VMIO) { struct vnode *vp = bp->b_vp; vm_object_t obj; @@ -2932,6 +3005,7 @@ vfs_busy_pages(struct buf * bp, int clear_modify) foff = bp->b_offset; KASSERT(bp->b_offset != NOOFFSET, ("vfs_busy_pages: no buffer offset")); + mtx_lock(&vm_mtx); vfs_setdirty(bp); retry: @@ -2979,6 +3053,7 @@ retry: } if (bogus) pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages); + mtx_unlock(&vm_mtx); } } @@ -2989,12 +3064,15 @@ retry: * * Note that while we only really need to clean through to b_bcount, we * just go ahead and clean through to b_bufsize. + * + * should be called with vm_mtx held */ static void vfs_clean_pages(struct buf * bp) { int i; + mtx_assert(&vm_mtx, MA_OWNED); if (bp->b_flags & B_VMIO) { vm_ooffset_t foff; @@ -3021,6 +3099,7 @@ vfs_clean_pages(struct buf * bp) * Set the range within the buffer to valid and clean. The range is * relative to the beginning of the buffer, b_offset. Note that b_offset * itself may be offset from the beginning of the first page. + * */ void @@ -3061,13 +3140,18 @@ vfs_bio_set_validclean(struct buf *bp, int base, int size) * * Note that while we only theoretically need to clear through b_bcount, * we go ahead and clear through b_bufsize. + * + * We'll get vm_mtx here for safety if processing a VMIO buffer. + * I don't think vm_mtx is needed, but we're twiddling vm_page flags. */ void vfs_bio_clrbuf(struct buf *bp) { int i, mask = 0; caddr_t sa, ea; + if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) { + mtx_lock(&vm_mtx); bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) && @@ -3079,6 +3163,7 @@ vfs_bio_clrbuf(struct buf *bp) { } bp->b_pages[0]->valid |= mask; bp->b_resid = 0; + mtx_unlock(&vm_mtx); return; } ea = sa = bp->b_data; @@ -3106,6 +3191,7 @@ vfs_bio_clrbuf(struct buf *bp) { vm_page_flag_clear(bp->b_pages[i], PG_ZERO); } bp->b_resid = 0; + mtx_unlock(&vm_mtx); } else { clrbuf(bp); } @@ -3115,18 +3201,22 @@ vfs_bio_clrbuf(struct buf *bp) { * vm_hold_load_pages and vm_hold_unload pages get pages into * a buffers address space. The pages are anonymous and are * not associated with a file object. + * + * vm_mtx should not be held */ -void +static void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) { vm_offset_t pg; vm_page_t p; int index; + mtx_assert(&vm_mtx, MA_NOTOWNED); to = round_page(to); from = round_page(from); index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; + mtx_lock(&vm_mtx); for (pg = from; pg < to; pg += PAGE_SIZE, index++) { tryagain: @@ -3152,6 +3242,7 @@ tryagain: vm_page_wakeup(p); } bp->b_npages = index; + mtx_unlock(&vm_mtx); } void @@ -3160,11 +3251,15 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) vm_offset_t pg; vm_page_t p; int index, newnpages; + int hadvmlock; from = round_page(from); to = round_page(to); newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); for (pg = from; pg < to; pg += PAGE_SIZE, index++) { p = bp->b_pages[index]; if (p && (index < bp->b_npages)) { @@ -3180,6 +3275,8 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) } } bp->b_npages = newnpages; + if (!hadvmlock) + mtx_unlock(&vm_mtx); } |