- Simplify vm_pageout_scan() by introducing a new vm_pageout_clean()

function that does the locking and validation associated with cleaning a page. This moves 150 lines of code into its own function. - Rename vm_pageout_clean() to vm_pageout_cluster() to define what it really does; clustering nearby pages for pageout optimization. Reviewd by: alc, kib, kmacy Tested by: pho (earlier version) Sponsored by: EMC / Isilon
author: jeff <jeff@FreeBSD.org> 2015-04-07 02:18:52 +0000
committer: jeff <jeff@FreeBSD.org> 2015-04-07 02:18:52 +0000
commit: fa9eb8e1eac476efbef9a03cedb44dd3c3f36521 (patch)
tree: 88bc92f4ae2f8311ce1dcbf1ffce1c7c0a09be51 /sys/vm
parent: 398812966535579f3b3adbce0e412e79391d70d6 (diff)
download: FreeBSD-src-fa9eb8e1eac476efbef9a03cedb44dd3c3f36521.zip
FreeBSD-src-fa9eb8e1eac476efbef9a03cedb44dd3c3f36521.tar.gz
1 files changed, 123 insertions, 120 deletions
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 2fb7c55..6f50053 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -118,7 +118,8 @@ __FBSDID("$FreeBSD$");
 /* the kernel process "vm_pageout"*/
 static void vm_pageout(void);
 static void vm_pageout_init(void);
-static int vm_pageout_clean(vm_page_t);
+static int vm_pageout_clean(vm_page_t m);
+static int vm_pageout_cluster(vm_page_t m);
 static void vm_pageout_scan(struct vm_domain *vmd, int pass);
 static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
 
@@ -347,7 +348,7 @@ vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
  * late and we cannot do anything that will mess with the page.
  */
 static int
-vm_pageout_clean(vm_page_t m)
+vm_pageout_cluster(vm_page_t m)
 {
 	vm_object_t object;
 	vm_page_t mc[2*vm_pageout_page_count], pb, ps;
@@ -906,6 +907,115 @@ vm_pageout_map_deactivate_pages(map, desired)
 #endif		/* !defined(NO_SWAPPING) */
 
 /*
+ * Attempt to acquire all of the necessary locks to launder a page and
+ * then call through the clustering layer to PUTPAGES.  Wait a short
+ * time for a vnode lock.
+ *
+ * Requires the page and object lock on entry, releases both before return.
+ * Returns 0 on success and an errno otherwise.
+ */
+static int
+vm_pageout_clean(vm_page_t m)
+{
+	struct vnode *vp;
+	struct mount *mp;
+	vm_object_t object;
+	vm_pindex_t pindex;
+	int error, lockmode;
+
+	vm_page_assert_locked(m);
+	object = m->object;
+	VM_OBJECT_ASSERT_WLOCKED(object);
+	error = 0;
+	vp = NULL;
+	mp = NULL;
+
+	/*
+	 * The object is already known NOT to be dead.   It
+	 * is possible for the vget() to block the whole
+	 * pageout daemon, but the new low-memory handling
+	 * code should prevent it.
+	 *
+	 * We can't wait forever for the vnode lock, we might
+	 * deadlock due to a vn_read() getting stuck in
+	 * vm_wait while holding this vnode.  We skip the 
+	 * vnode if we can't get it in a reasonable amount
+	 * of time.
+	 */
+	if (object->type == OBJT_VNODE) {
+		vm_page_unlock(m);
+		vp = object->handle;
+		if (vp->v_type == VREG &&
+		    vn_start_write(vp, &mp, V_NOWAIT) != 0) {
+			mp = NULL;
+			error = EDEADLK;
+			goto unlock_all;
+		}
+		KASSERT(mp != NULL,
+		    ("vp %p with NULL v_mount", vp));
+		vm_object_reference_locked(object);
+		pindex = m->pindex;
+		VM_OBJECT_WUNLOCK(object);
+		lockmode = MNT_SHARED_WRITES(vp->v_mount) ?
+		    LK_SHARED : LK_EXCLUSIVE;
+		if (vget(vp, lockmode | LK_TIMELOCK, curthread)) {
+			vp = NULL;
+			error = EDEADLK;
+			goto unlock_mp;
+		}
+		VM_OBJECT_WLOCK(object);
+		vm_page_lock(m);
+		/*
+		 * While the object and page were unlocked, the page
+		 * may have been:
+		 * (1) moved to a different queue,
+		 * (2) reallocated to a different object,
+		 * (3) reallocated to a different offset, or
+		 * (4) cleaned.
+		 */
+		if (m->queue != PQ_INACTIVE || m->object != object ||
+		    m->pindex != pindex || m->dirty == 0) {
+			vm_page_unlock(m);
+			error = ENXIO;
+			goto unlock_all;
+		}
+
+		/*
+		 * The page may have been busied or held while the object
+		 * and page locks were released.
+		 */
+		if (vm_page_busied(m) || m->hold_count != 0) {
+			vm_page_unlock(m);
+			error = EBUSY;
+			goto unlock_all;
+		}
+	}
+
+	/*
+	 * If a page is dirty, then it is either being washed
+	 * (but not yet cleaned) or it is still in the
+	 * laundry.  If it is still in the laundry, then we
+	 * start the cleaning operation. 
+	 */
+	if (vm_pageout_cluster(m) == 0)
+		error = EIO;
+
+unlock_all:
+	VM_OBJECT_WUNLOCK(object);
+
+unlock_mp:
+	vm_page_lock_assert(m, MA_NOTOWNED);
+	if (mp != NULL) {
+		if (vp != NULL)
+			vput(vp);
+		vm_object_deallocate(object);
+		vn_finished_write(mp);
+	}
+
+	return (error);
+}
+
+/*
  *	vm_pageout_scan does the dirty work for the pageout daemon.
  *
  *	pass 0 - Update active LRU/deactivate pages
@@ -921,7 +1031,6 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
 	int act_delta, addl_page_shortage, deficit, maxscan, page_shortage;
 	int vnodes_skipped = 0;
 	int maxlaunder;
-	int lockmode;
 	boolean_t queues_locked;
 
 	/*
@@ -1155,9 +1264,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
 			 * on the inactive queue, we may have to go all out.
 			 */
 			int swap_pageouts_ok;
-			struct vnode *vp = NULL;
-			struct mount *mp = NULL;
-			vm_pindex_t pindex;
+			int error;
 
 			if ((object->type != OBJT_SWAP) && (object->type != OBJT_DEFAULT)) {
 				swap_pageouts_ok = 1;
@@ -1180,124 +1287,20 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
 				vm_page_requeue_locked(m);
 				goto relock_queues;
 			}
-
-			/*
-			 * The object is already known NOT to be dead.   It
-			 * is possible for the vget() to block the whole
-			 * pageout daemon, but the new low-memory handling
-			 * code should prevent it.
-			 *
-			 * The previous code skipped locked vnodes and, worse,
-			 * reordered pages in the queue.  This results in
-			 * completely non-deterministic operation and, on a
-			 * busy system, can lead to extremely non-optimal
-			 * pageouts.  For example, it can cause clean pages
-			 * to be freed and dirty pages to be moved to the end
-			 * of the queue.  Since dirty pages are also moved to
-			 * the end of the queue once-cleaned, this gives
-			 * way too large a weighting to deferring the freeing
-			 * of dirty pages.
-			 *
-			 * We can't wait forever for the vnode lock, we might
-			 * deadlock due to a vn_read() getting stuck in
-			 * vm_wait while holding this vnode.  We skip the 
-			 * vnode if we can't get it in a reasonable amount
-			 * of time.
-			 */
-			if (object->type == OBJT_VNODE) {
-				vm_page_unlock(m);
-				vp = object->handle;
-				if (vp->v_type == VREG &&
-				    vn_start_write(vp, &mp, V_NOWAIT) != 0) {
-					mp = NULL;
-					++pageout_lock_miss;
-					if (object->flags & OBJ_MIGHTBEDIRTY)
-						vnodes_skipped++;
-					goto unlock_and_continue;
-				}
-				KASSERT(mp != NULL,
-				    ("vp %p with NULL v_mount", vp));
-				vm_object_reference_locked(object);
-				pindex = m->pindex;
-				VM_OBJECT_WUNLOCK(object);
-				lockmode = MNT_SHARED_WRITES(vp->v_mount) ?
-				    LK_SHARED : LK_EXCLUSIVE;
-				if (vget(vp, lockmode | LK_TIMELOCK,
-				    curthread)) {
-					VM_OBJECT_WLOCK(object);
-					++pageout_lock_miss;
-					if (object->flags & OBJ_MIGHTBEDIRTY)
-						vnodes_skipped++;
-					vp = NULL;
-					goto unlock_and_continue;
-				}
-				VM_OBJECT_WLOCK(object);
-				vm_page_lock(m);
-				/*
-				 * While the object and page were unlocked,
-				 * the page may have been
-				 * (1) moved to a different queue,
-				 * (2) reallocated to a different object,
-				 * (3) reallocated to a different offset, or
-				 * (4) cleaned.
-				 */
-				if (m->queue != PQ_INACTIVE ||
-				    m->object != object ||
-				    m->pindex != pindex ||
-				    m->dirty == 0) {
-					vm_page_unlock(m);
-					if (object->flags & OBJ_MIGHTBEDIRTY)
-						vnodes_skipped++;
-					goto unlock_and_continue;
-				}
-	
-				/*
-				 * The page may have been busied during the
-				 * blocking in vget().  We don't move the
-				 * page back onto the end of the queue so that
-				 * statistics are more correct if we don't.
-				 */
-				if (vm_page_busied(m)) {
-					vm_page_unlock(m);
-					addl_page_shortage++;
-					goto unlock_and_continue;
-				}
-
-				/*
-				 * If the page has become held it might
-				 * be undergoing I/O, so skip it
-				 */
-				if (m->hold_count != 0) {
-					vm_page_unlock(m);
-					addl_page_shortage++;
-					if (object->flags & OBJ_MIGHTBEDIRTY)
-						vnodes_skipped++;
-					goto unlock_and_continue;
-				}
-			}
-
+			error = vm_pageout_clean(m);
 			/*
-			 * If a page is dirty, then it is either being washed
-			 * (but not yet cleaned) or it is still in the
-			 * laundry.  If it is still in the laundry, then we
-			 * start the cleaning operation. 
-			 *
-			 * decrement page_shortage on success to account for
+			 * Decrement page_shortage on success to account for
 			 * the (future) cleaned page.  Otherwise we could wind
 			 * up laundering or cleaning too many pages.
 			 */
-			if (vm_pageout_clean(m) != 0) {
-				--page_shortage;
-				--maxlaunder;
-			}
-unlock_and_continue:
-			vm_page_lock_assert(m, MA_NOTOWNED);
-			VM_OBJECT_WUNLOCK(object);
-			if (mp != NULL) {
-				if (vp != NULL)
-					vput(vp);
-				vm_object_deallocate(object);
-				vn_finished_write(mp);
+			if (error == 0) {
+				page_shortage--;
+				maxlaunder--;
+			} else if (error == EDEADLK) {
+				pageout_lock_miss++;
+				vnodes_skipped++;
+			} else if (error == EBUSY) {
+				addl_page_shortage++;
 			}
 			vm_page_lock_assert(m, MA_NOTOWNED);
 			goto relock_queues;
author	jeff <jeff@FreeBSD.org>	2015-04-07 02:18:52 +0000
committer	jeff <jeff@FreeBSD.org>	2015-04-07 02:18:52 +0000
commit	fa9eb8e1eac476efbef9a03cedb44dd3c3f36521 (patch)
tree	88bc92f4ae2f8311ce1dcbf1ffce1c7c0a09be51 /sys/vm
parent	398812966535579f3b3adbce0e412e79391d70d6 (diff)
download	FreeBSD-src-fa9eb8e1eac476efbef9a03cedb44dd3c3f36521.zip FreeBSD-src-fa9eb8e1eac476efbef9a03cedb44dd3c3f36521.tar.gz