Introduce a global lock for the vm subsystem (vm_mtx).

vm_mtx does not recurse and is required for most low level vm operations. faults can not be taken without holding Giant. Memory subsystems can now call the base page allocators safely. Almost all atomic ops were removed as they are covered under the vm mutex. Alpha and ia64 now need to catch up to i386's trap handlers. FFS and NFS have been tested, other filesystems will need minor changes (grabbing the vm lock when twiddling page properties). Reviewed (partially) by: jake, jhb
author: alfred <alfred@FreeBSD.org> 2001-05-19 01:28:09 +0000
committer: alfred <alfred@FreeBSD.org> 2001-05-19 01:28:09 +0000
commit: a3f0842419d98da211706f921fc626e160cd960b (patch)
tree: e86922a5639c32e1242d4f3088fc487f3be5b236 /sys/kern/vfs_bio.c
parent: 9eda9187f024233436e6a743f13bd938b1a0f19c (diff)
download: FreeBSD-src-a3f0842419d98da211706f921fc626e160cd960b.zip
FreeBSD-src-a3f0842419d98da211706f921fc626e160cd960b.tar.gz
1 files changed, 101 insertions, 4 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index c1b53d8..a980330 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -281,6 +281,8 @@ waitrunningbufspace(void)
  *	Called when a buffer is extended.  This function clears the B_CACHE
  *	bit if the newly extended portion of the buffer does not contain
  *	valid data.
+ *
+ *	must be called with vm_mtx held
  */
 static __inline__
 void
@@ -426,11 +428,13 @@ bufinit(void)
  * from buf_daemon.
  */
 
+	mtx_lock(&vm_mtx);
 	bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 	bogus_page = vm_page_alloc(kernel_object,
 			((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
 			VM_ALLOC_NORMAL);
 	cnt.v_wire_count++;
+	mtx_unlock(&vm_mtx);
 
 }
 
@@ -441,17 +445,27 @@ bufinit(void)
  *	buffer_map.
  *
  *	Since this call frees up buffer space, we call bufspacewakeup().
+ *
+ *	Can be called with or without the vm_mtx.
  */
 static void
 bfreekva(struct buf * bp)
 {
+
 	if (bp->b_kvasize) {
+		int hadvmlock;
+
 		++buffreekvacnt;
 		bufspace -= bp->b_kvasize;
+		hadvmlock = mtx_owned(&vm_mtx);
+		if (!hadvmlock)
+			mtx_lock(&vm_mtx);
 		vm_map_delete(buffer_map,
 		    (vm_offset_t) bp->b_kvabase,
 		    (vm_offset_t) bp->b_kvabase + bp->b_kvasize
 		);
+		if (!hadvmlock)
+			mtx_unlock(&vm_mtx);
 		bp->b_kvasize = 0;
 		bufspacewakeup();
 	}
@@ -807,6 +821,7 @@ bdwrite(struct buf * bp)
 		VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
 	}
 
+	mtx_lock(&vm_mtx);
 	/*
 	 * Set the *dirty* buffer range based upon the VM system dirty pages.
 	 */
@@ -820,6 +835,7 @@ bdwrite(struct buf * bp)
 	 * out on the next sync, or perhaps the cluster will be completed.
 	 */
 	vfs_clean_pages(bp);
+	mtx_unlock(&vm_mtx);
 	bqrelse(bp);
 
 	/*
@@ -973,12 +989,15 @@ buf_dirty_count_severe(void)
  *	Release a busy buffer and, if requested, free its resources.  The
  *	buffer will be stashed in the appropriate bufqueue[] allowing it
  *	to be accessed later as a cache entity or reused for other purposes.
+ *
+ *	vm_mtx must be not be held.
  */
 void
 brelse(struct buf * bp)
 {
 	int s;
 
+	mtx_assert(&vm_mtx, MA_NOTOWNED);
 	KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
 
 	s = splbio();
@@ -1088,6 +1107,7 @@ brelse(struct buf * bp)
 		resid = bp->b_bufsize;
 		foff = bp->b_offset;
 
+		mtx_lock(&vm_mtx);
 		for (i = 0; i < bp->b_npages; i++) {
 			int had_bogus = 0;
 
@@ -1099,10 +1119,12 @@ brelse(struct buf * bp)
 			 * now.
 			 */
 			if (m == bogus_page) {
+				mtx_unlock(&vm_mtx);
 				VOP_GETVOBJECT(vp, &obj);
 				poff = OFF_TO_IDX(bp->b_offset);
 				had_bogus = 1;
 
+				mtx_lock(&vm_mtx);
 				for (j = i; j < bp->b_npages; j++) {
 					vm_page_t mtmp;
 					mtmp = bp->b_pages[j];
@@ -1136,11 +1158,15 @@ brelse(struct buf * bp)
 
 		if (bp->b_flags & (B_INVAL | B_RELBUF))
 			vfs_vmio_release(bp);
+		mtx_unlock(&vm_mtx);
 
 	} else if (bp->b_flags & B_VMIO) {
 
-		if (bp->b_flags & (B_INVAL | B_RELBUF))
+		if (bp->b_flags & (B_INVAL | B_RELBUF)) {
+			mtx_lock(&vm_mtx);
 			vfs_vmio_release(bp);
+			mtx_unlock(&vm_mtx);
+		}
 
 	}
 			
@@ -1302,6 +1328,9 @@ bqrelse(struct buf * bp)
 	splx(s);
 }
 
+/*
+ * Must be called with vm_mtx held.
+ */
 static void
 vfs_vmio_release(bp)
 	struct buf *bp;
@@ -1310,6 +1339,7 @@ vfs_vmio_release(bp)
 	vm_page_t m;
 
 	s = splvm();
+	mtx_assert(&vm_mtx, MA_OWNED);
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
 		bp->b_pages[i] = NULL;
@@ -1343,6 +1373,9 @@ vfs_vmio_release(bp)
 	}
 	splx(s);
 	pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
+
+	/* could drop vm_mtx here */
+	
 	if (bp->b_bufsize) {
 		bufspacewakeup();
 		bp->b_bufsize = 0;
@@ -1614,7 +1647,9 @@ restart:
 		if (qindex == QUEUE_CLEAN) {
 			if (bp->b_flags & B_VMIO) {
 				bp->b_flags &= ~B_ASYNC;
+				mtx_lock(&vm_mtx);
 				vfs_vmio_release(bp);
+				mtx_unlock(&vm_mtx);
 			}
 			if (bp->b_vp)
 				brelvp(bp);
@@ -1735,6 +1770,8 @@ restart:
 		if (maxsize != bp->b_kvasize) {
 			vm_offset_t addr = 0;
 
+			/* we'll hold the lock over some vm ops */
+			mtx_lock(&vm_mtx);
 			bfreekva(bp);
 
 			if (vm_map_findspace(buffer_map,
@@ -1743,6 +1780,7 @@ restart:
 				 * Uh oh.  Buffer map is to fragmented.  We
 				 * must defragment the map.
 				 */
+				mtx_unlock(&vm_mtx);
 				++bufdefragcnt;
 				defrag = 1;
 				bp->b_flags |= B_INVAL;
@@ -1759,6 +1797,7 @@ restart:
 				bufspace += bp->b_kvasize;
 				++bufreusecnt;
 			}
+			mtx_unlock(&vm_mtx);
 		}
 		bp->b_data = bp->b_kvabase;
 	}
@@ -1936,18 +1975,24 @@ inmem(struct vnode * vp, daddr_t blkno)
 		size = vp->v_mount->mnt_stat.f_iosize;
 	off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
 
+	mtx_lock(&vm_mtx);
 	for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
 		m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));
 		if (!m)
-			return 0;
+			goto notinmem;
 		tinc = size;
 		if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
 			tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
 		if (vm_page_is_valid(m,
 		    (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
-			return 0;
+			goto notinmem;
 	}
+	mtx_unlock(&vm_mtx);
 	return 1;
+
+notinmem:
+	mtx_unlock(&vm_mtx);
+	return (0);
 }
 
 /*
@@ -1960,11 +2005,14 @@ inmem(struct vnode * vp, daddr_t blkno)
  *
  *	This routine is primarily used by NFS, but is generalized for the
  *	B_VMIO case.
+ *
+ *	Can be called with or without vm_mtx
  */
 static void
 vfs_setdirty(struct buf *bp) 
 {
 	int i;
+	int hadvmlock;
 	vm_object_t object;
 
 	/*
@@ -1983,6 +2031,10 @@ vfs_setdirty(struct buf *bp)
 	if ((bp->b_flags & B_VMIO) == 0)
 		return;
 
+	hadvmlock = mtx_owned(&vm_mtx);
+	if (!hadvmlock)
+		mtx_lock(&vm_mtx);
+
 	object = bp->b_pages[0]->object;
 
 	if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY))
@@ -2040,6 +2092,8 @@ vfs_setdirty(struct buf *bp)
 				bp->b_dirtyend = eoffset;
 		}
 	}
+	if (!hadvmlock)
+		mtx_unlock(&vm_mtx);
 }
 
 /*
@@ -2441,6 +2495,7 @@ allocbuf(struct buf *bp, int size)
 			 * DEV_BSIZE aligned existing buffer size.  Figure out
 			 * if we have to remove any pages.
 			 */
+			mtx_lock(&vm_mtx);
 			if (desiredpages < bp->b_npages) {
 				for (i = desiredpages; i < bp->b_npages; i++) {
 					/*
@@ -2461,6 +2516,7 @@ allocbuf(struct buf *bp, int size)
 				    (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages));
 				bp->b_npages = desiredpages;
 			}
+			mtx_unlock(&vm_mtx);
 		} else if (size > bp->b_bcount) {
 			/*
 			 * We are growing the buffer, possibly in a 
@@ -2481,6 +2537,7 @@ allocbuf(struct buf *bp, int size)
 			vp = bp->b_vp;
 			VOP_GETVOBJECT(vp, &obj);
 
+			mtx_lock(&vm_mtx);
 			while (bp->b_npages < desiredpages) {
 				vm_page_t m;
 				vm_pindex_t pi;
@@ -2589,6 +2646,9 @@ allocbuf(struct buf *bp, int size)
 			    bp->b_pages, 
 			    bp->b_npages
 			);
+			
+			mtx_unlock(&vm_mtx);
+
 			bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | 
 			    (vm_offset_t)(bp->b_offset & PAGE_MASK));
 		}
@@ -2726,6 +2786,7 @@ bufdone(struct buf *bp)
 		if (error) {
 			panic("biodone: no object");
 		}
+		mtx_lock(&vm_mtx);
 #if defined(VFS_BIO_DEBUG)
 		if (obj->paging_in_progress < bp->b_npages) {
 			printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
@@ -2814,6 +2875,7 @@ bufdone(struct buf *bp)
 		}
 		if (obj)
 			vm_object_pip_wakeupn(obj, 0);
+		mtx_unlock(&vm_mtx);
 	}
 
 	/*
@@ -2837,12 +2899,15 @@ bufdone(struct buf *bp)
  * This routine is called in lieu of iodone in the case of
  * incomplete I/O.  This keeps the busy status for pages
  * consistant.
+ *
+ * vm_mtx should not be held
  */
 void
 vfs_unbusy_pages(struct buf * bp)
 {
 	int i;
 
+	mtx_assert(&vm_mtx, MA_NOTOWNED);
 	runningbufwakeup(bp);
 	if (bp->b_flags & B_VMIO) {
 		struct vnode *vp = bp->b_vp;
@@ -2850,6 +2915,7 @@ vfs_unbusy_pages(struct buf * bp)
 
 		VOP_GETVOBJECT(vp, &obj);
 
+		mtx_lock(&vm_mtx);
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 
@@ -2866,6 +2932,7 @@ vfs_unbusy_pages(struct buf * bp)
 			vm_page_io_finish(m);
 		}
 		vm_object_pip_wakeupn(obj, 0);
+		mtx_unlock(&vm_mtx);
 	}
 }
 
@@ -2876,12 +2943,15 @@ vfs_unbusy_pages(struct buf * bp)
  *	range is restricted to the buffer's size.
  *
  *	This routine is typically called after a read completes.
+ *
+ *	vm_mtx should be held
  */
 static void
 vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
 {
 	vm_ooffset_t soff, eoff;
 
+	mtx_assert(&vm_mtx, MA_OWNED);
 	/*
 	 * Start and end offsets in buffer.  eoff - soff may not cross a
 	 * page boundry or cross the end of the buffer.  The end of the
@@ -2917,12 +2987,15 @@ vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
  * Since I/O has not been initiated yet, certain buffer flags
  * such as BIO_ERROR or B_INVAL may be in an inconsistant state
  * and should be ignored.
+ *
+ * vm_mtx should not be held
  */
 void
 vfs_busy_pages(struct buf * bp, int clear_modify)
 {
 	int i, bogus;
 
+	mtx_assert(&vm_mtx, MA_NOTOWNED);
 	if (bp->b_flags & B_VMIO) {
 		struct vnode *vp = bp->b_vp;
 		vm_object_t obj;
@@ -2932,6 +3005,7 @@ vfs_busy_pages(struct buf * bp, int clear_modify)
 		foff = bp->b_offset;
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("vfs_busy_pages: no buffer offset"));
+		mtx_lock(&vm_mtx);
 		vfs_setdirty(bp);
 
 retry:
@@ -2979,6 +3053,7 @@ retry:
 		}
 		if (bogus)
 			pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
+		mtx_unlock(&vm_mtx);
 	}
 }
 
@@ -2989,12 +3064,15 @@ retry:
  *
  * Note that while we only really need to clean through to b_bcount, we
  * just go ahead and clean through to b_bufsize.
+ *
+ * should be called with vm_mtx held
  */
 static void
 vfs_clean_pages(struct buf * bp)
 {
 	int i;
 
+	mtx_assert(&vm_mtx, MA_OWNED);
 	if (bp->b_flags & B_VMIO) {
 		vm_ooffset_t foff;
 
@@ -3021,6 +3099,7 @@ vfs_clean_pages(struct buf * bp)
  *	Set the range within the buffer to valid and clean.  The range is 
  *	relative to the beginning of the buffer, b_offset.  Note that b_offset
  *	itself may be offset from the beginning of the first page.
+ *
  */
 
 void   
@@ -3061,13 +3140,18 @@ vfs_bio_set_validclean(struct buf *bp, int base, int size)
  *
  *	Note that while we only theoretically need to clear through b_bcount,
  *	we go ahead and clear through b_bufsize.
+ *
+ *	We'll get vm_mtx here for safety if processing a VMIO buffer.
+ *	I don't think vm_mtx is needed, but we're twiddling vm_page flags.
  */
 
 void
 vfs_bio_clrbuf(struct buf *bp) {
 	int i, mask = 0;
 	caddr_t sa, ea;
+
 	if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
+		mtx_lock(&vm_mtx);
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
@@ -3079,6 +3163,7 @@ vfs_bio_clrbuf(struct buf *bp) {
 			}
 			bp->b_pages[0]->valid |= mask;
 			bp->b_resid = 0;
+			mtx_unlock(&vm_mtx);
 			return;
 		}
 		ea = sa = bp->b_data;
@@ -3106,6 +3191,7 @@ vfs_bio_clrbuf(struct buf *bp) {
 			vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
 		}
 		bp->b_resid = 0;
+		mtx_unlock(&vm_mtx);
 	} else {
 		clrbuf(bp);
 	}
@@ -3115,18 +3201,22 @@ vfs_bio_clrbuf(struct buf *bp) {
  * vm_hold_load_pages and vm_hold_unload pages get pages into
  * a buffers address space.  The pages are anonymous and are
  * not associated with a file object.
+ *
+ * vm_mtx should not be held
  */
-void
+static void
 vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	int index;
 
+	mtx_assert(&vm_mtx, MA_NOTOWNED);
 	to = round_page(to);
 	from = round_page(from);
 	index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 
+	mtx_lock(&vm_mtx);
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 
 tryagain:
@@ -3152,6 +3242,7 @@ tryagain:
 		vm_page_wakeup(p);
 	}
 	bp->b_npages = index;
+	mtx_unlock(&vm_mtx);
 }
 
 void
@@ -3160,11 +3251,15 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 	vm_offset_t pg;
 	vm_page_t p;
 	int index, newnpages;
+	int hadvmlock;
 
 	from = round_page(from);
 	to = round_page(to);
 	newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 
+	hadvmlock = mtx_owned(&vm_mtx);
+	if (!hadvmlock)
+		mtx_lock(&vm_mtx);
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 		p = bp->b_pages[index];
 		if (p && (index < bp->b_npages)) {
@@ -3180,6 +3275,8 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 		}
 	}
 	bp->b_npages = newnpages;
+	if (!hadvmlock)
+		mtx_unlock(&vm_mtx);
 }
author	alfred <alfred@FreeBSD.org>	2001-05-19 01:28:09 +0000
committer	alfred <alfred@FreeBSD.org>	2001-05-19 01:28:09 +0000
commit	a3f0842419d98da211706f921fc626e160cd960b (patch)
tree	e86922a5639c32e1242d4f3088fc487f3be5b236 /sys/kern/vfs_bio.c
parent	9eda9187f024233436e6a743f13bd938b1a0f19c (diff)
download	FreeBSD-src-a3f0842419d98da211706f921fc626e160cd960b.zip FreeBSD-src-a3f0842419d98da211706f921fc626e160cd960b.tar.gz