When shutting down the syncer kernel thread, first tell it to run

faster and iterate to over its work list a few times in an attempt to empty the work list before the syncer terminates. This leaves fewer dirty blocks to be written at the "syncing disks" stage and keeps the the "giving up on N buffers" problem from being triggered by the presence of a large soft updates work list at system shutdown time. The downside is that the syncer takes noticeably longer to terminate. Tested by: "Arjan van Leeuwen" <avleeuwen AT piwebs DOT com> Approved by: mckusick
author: truckman <truckman@FreeBSD.org> 2004-07-01 23:59:19 +0000
committer: truckman <truckman@FreeBSD.org> 2004-07-01 23:59:19 +0000
commit: 9ed03e6eb30a4961f22d094c43eb9702ef6841aa (patch)
tree: bd726f82611b583ab8154f71f4f4358da30d8de8 /sys
parent: 657c0f9155fc383a82ca73a70137a4f6ec201444 (diff)
download: FreeBSD-src-9ed03e6eb30a4961f22d094c43eb9702ef6841aa.zip
FreeBSD-src-9ed03e6eb30a4961f22d094c43eb9702ef6841aa.tar.gz
1 files changed, 68 insertions, 6 deletions
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 96fdaab..961b904 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -82,6 +82,7 @@ static void	vclean(struct vnode *vp, int flags, struct thread *td);
 static void	vlruvp(struct vnode *vp);
 static int	flushbuflist(struct buf *blist, int flags, struct vnode *vp,
 		    int slpflag, int slptimeo, int *errorp);
+static void	syncer_shutdown(void *arg, int howto);
 static int	vtryrecycle(struct vnode *vp);
 static void	vx_lock(struct vnode *vp);
 static void	vx_unlock(struct vnode *vp);
@@ -199,8 +200,11 @@ static struct synclist *syncer_workitem_pending;
 /*
  * The sync_mtx protects:
  *	vp->v_synclist
+ *	sync_vnode_count
  *	syncer_delayno
+ *	syncer_shutdown_iter
  *	syncer_workitem_pending
+ *	syncer_worklist_len
  *	rushjob
  */
 static struct mtx sync_mtx;
@@ -219,6 +223,21 @@ static int stat_rush_requests;	/* number of times I/O speeded up */
 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "");
 
 /*
+ * Tell the syncer to make three passes through the work list before
+ * shutting down (unless it runs out of work and shuts down sooner).
+ *
+ * Run at 8 times normal speed when shutting down the syncer.  With
+ * the default settings, the syncer will take approximately 12
+ * seconds to shut down, which is less than the default 60 timeout
+ * in kproc_shutdown().
+ */
+#define SYNCER_SHUTDOWN_ITER_LIMIT	(3*SYNCER_MAXDELAY)
+#define SYNCER_SHUTDOWN_SPEEDUP		7
+static int sync_vnode_count;
+static int syncer_shutdown_iter;
+static int syncer_worklist_len;
+
+/*
  * Number of vnodes we want to exist at any one time.  This is mostly used
  * to size hash tables in vnode-related code.  It is normally not used in
  * getnewvnode(), as wantfreevnodes is normally nonzero.)
@@ -1430,6 +1449,7 @@ brelvp(bp)
 		vp->v_iflag &= ~VI_ONWORKLST;
 		mtx_lock(&sync_mtx);
 		LIST_REMOVE(vp, v_synclist);
+ 		syncer_worklist_len--;
 		mtx_unlock(&sync_mtx);
 	}
 	vdropl(vp);
@@ -1452,8 +1472,10 @@ vn_syncer_add_to_worklist(struct vnode *vp, int delay)
 	mtx_lock(&sync_mtx);
 	if (vp->v_iflag & VI_ONWORKLST)
 		LIST_REMOVE(vp, v_synclist);
-	else
+	else {
 		vp->v_iflag |= VI_ONWORKLST;
+ 		syncer_worklist_len++;
+	}
 
 	if (delay > syncer_maxdelay - 2)
 		delay = syncer_maxdelay - 2;
@@ -1487,19 +1509,30 @@ sched_sync(void)
 
 	mtx_lock(&Giant);
 
-	EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, td->td_proc,
+	EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc,
 	    SHUTDOWN_PRI_LAST);
 
 	for (;;) {
-		kthread_suspend_check(td->td_proc);
-
+		mtx_lock(&sync_mtx);
+		/*
+		 * Make one more full pass through the work list after
+		 * the only vnodes remaining on the work list are the
+		 * syncer vnodes.
+		 */
+		if (syncer_shutdown_iter > SYNCER_MAXDELAY &&
+		    syncer_worklist_len == sync_vnode_count)
+			syncer_shutdown_iter = SYNCER_MAXDELAY;
+		if (syncer_shutdown_iter == 0) {
+			mtx_unlock(&sync_mtx);
+			kthread_suspend_check(td->td_proc);
+			mtx_lock(&sync_mtx);
+		}
 		starttime = time_second;
 
 		/*
 		 * Push files whose dirty time has expired.  Be careful
 		 * of interrupt race on slp queue.
 		 */
-		mtx_lock(&sync_mtx);
 		slp = &syncer_workitem_pending[syncer_delayno];
 		syncer_delayno += 1;
 		if (syncer_delayno == syncer_maxdelay)
@@ -1545,6 +1578,8 @@ sched_sync(void)
 			VI_UNLOCK(vp);
 			mtx_lock(&sync_mtx);
 		}
+		if (syncer_shutdown_iter > 0)
+			syncer_shutdown_iter--;
 		mtx_unlock(&sync_mtx);
 
 		/*
@@ -1568,7 +1603,8 @@ sched_sync(void)
 			rushjob -= 1;
 			mtx_unlock(&sync_mtx);
 			continue;
-		}
+		} else if (syncer_shutdown_iter > 0)
+			rushjob = SYNCER_SHUTDOWN_SPEEDUP;
 		mtx_unlock(&sync_mtx);
 		/*
 		 * If it has taken us less than a second to process the
@@ -1607,6 +1643,25 @@ speedup_syncer()
 }
 
 /*
+ * Tell the syncer to speed up its work and run though its work
+ * list several times, then tell it to shut down.
+ */
+static void
+syncer_shutdown(void *arg, int howto)
+{
+	struct thread *td;
+
+	td = FIRST_THREAD_IN_PROC(updateproc);
+	sleepq_remove(td, &lbolt);
+	mtx_lock(&sync_mtx);
+	if (rushjob < SYNCER_SHUTDOWN_SPEEDUP)
+		rushjob = SYNCER_SHUTDOWN_SPEEDUP;
+	syncer_shutdown_iter = SYNCER_SHUTDOWN_ITER_LIMIT;
+	mtx_unlock(&sync_mtx);
+	kproc_shutdown(arg, howto);
+}
+
+/*
  * Associate a p-buffer with a vnode.
  *
  * Also sets B_PAGING flag to indicate that vnode is not fully associated
@@ -1720,6 +1775,7 @@ reassignbuf(bp, newvp)
 		    TAILQ_EMPTY(&newvp->v_dirtyblkhd)) {
 			mtx_lock(&sync_mtx);
 			LIST_REMOVE(newvp, v_synclist);
+ 			syncer_worklist_len--;
 			mtx_unlock(&sync_mtx);
 			newvp->v_iflag &= ~VI_ONWORKLST;
 		}
@@ -3297,6 +3353,10 @@ vfs_allocate_syncvnode(mp)
 	}
 	VI_LOCK(vp);
 	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
+	/* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */
+	mtx_lock(&sync_mtx);
+	sync_vnode_count++;
+	mtx_unlock(&sync_mtx);
 	VI_UNLOCK(vp);
 	mp->mnt_syncer = vp;
 	return (0);
@@ -3390,6 +3450,8 @@ sync_reclaim(ap)
 	if (vp->v_iflag & VI_ONWORKLST) {
 		mtx_lock(&sync_mtx);
 		LIST_REMOVE(vp, v_synclist);
+ 		syncer_worklist_len--;
+		sync_vnode_count--;
 		mtx_unlock(&sync_mtx);
 		vp->v_iflag &= ~VI_ONWORKLST;
 	}
author	truckman <truckman@FreeBSD.org>	2004-07-01 23:59:19 +0000
committer	truckman <truckman@FreeBSD.org>	2004-07-01 23:59:19 +0000
commit	9ed03e6eb30a4961f22d094c43eb9702ef6841aa (patch)
tree	bd726f82611b583ab8154f71f4f4358da30d8de8 /sys
parent	657c0f9155fc383a82ca73a70137a4f6ec201444 (diff)
download	FreeBSD-src-9ed03e6eb30a4961f22d094c43eb9702ef6841aa.zip FreeBSD-src-9ed03e6eb30a4961f22d094c43eb9702ef6841aa.tar.gz