10 files changed, 875 insertions, 50 deletions
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index a51177c0..7d0746f 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -31,7 +31,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)kern_malloc.c	8.3 (Berkeley) 1/4/94
- * $Id: kern_malloc.c,v 1.43 1998/02/09 06:09:22 eivind Exp $
+ * $Id: kern_malloc.c,v 1.44 1998/02/23 07:41:23 dyson Exp $
  */
 
 #include "opt_vm.h"
@@ -128,7 +128,7 @@ malloc(size, type, flags)
 
 	indx = BUCKETINDX(size);
 	kbp = &bucket[indx];
-	s = splhigh();
+	s = splmem();
 	while (ksp->ks_memuse >= ksp->ks_limit) {
 		if (flags & M_NOWAIT) {
 			splx(s);
@@ -268,7 +268,7 @@ free(addr, type)
 	kup = btokup(addr);
 	size = 1 << kup->ku_indx;
 	kbp = &bucket[kup->ku_indx];
-	s = splhigh();
+	s = splmem();
 #ifdef DIAGNOSTIC
 	/*
 	 * Check for returns of data that do not point to the
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index c83dd75..c6dd9c1 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -36,7 +36,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)kern_shutdown.c	8.3 (Berkeley) 1/21/94
- * $Id: kern_shutdown.c,v 1.27 1997/11/25 07:07:43 julian Exp $
+ * $Id: kern_shutdown.c,v 1.28 1998/02/16 23:57:44 eivind Exp $
  */
 
 #include "opt_ddb.h"
@@ -217,17 +217,27 @@ boot(howto)
 
 		sync(&proc0, NULL);
 
+		/*
+		 * With soft updates, some buffers that are
+		 * written will be remarked as dirty until other
+		 * buffers are written.
+		 */
 		for (iter = 0; iter < 20; iter++) {
 			nbusy = 0;
 			for (bp = &buf[nbuf]; --bp >= buf; ) {
 				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
 					nbusy++;
+				} else if ((bp->b_flags & (B_DELWRI | B_INVAL))
+						== B_DELWRI) {
+					/* bawrite(bp);*/
+					nbusy++;
 				}
 			}
 			if (nbusy == 0)
 				break;
 			printf("%d ", nbusy);
-			DELAY(40000 * iter);
+			sync(&proc0, NULL);
+			DELAY(50000 * iter);
 		}
 		if (nbusy) {
 			/*
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index bb370ac..4fdc5bd 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -36,7 +36,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
- * $Id: kern_synch.c,v 1.47 1998/02/25 06:04:46 bde Exp $
+ * $Id: kern_synch.c,v 1.48 1998/03/04 10:25:55 dufault Exp $
  */
 
 #include "opt_ktrace.h"
@@ -230,7 +230,6 @@ schedcpu(arg)
 	register int s;
 	register unsigned int newcpu;
 
-	wakeup((caddr_t)&lbolt);
 	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 		/*
 		 * Increment time in/out of memory and sleep time
@@ -282,6 +281,7 @@ schedcpu(arg)
 		splx(s);
 	}
 	vmmeter();
+	wakeup((caddr_t)&lbolt);
 	timeout(schedcpu, (void *)0, hz);
 }
 
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 4c09e1d..114e035 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -11,7 +11,7 @@
  * 2. Absolutely no warranty of function or purpose is made by the author
  *		John S. Dyson.
  *
- * $Id: vfs_bio.c,v 1.153 1998/03/04 03:17:30 dyson Exp $
+ * $Id: vfs_bio.c,v 1.154 1998/03/07 21:35:24 dyson Exp $
  */
 
 /*
@@ -37,6 +37,7 @@
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/lock.h>
+#include <miscfs/specfs/specdev.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
@@ -53,6 +54,9 @@
 
 static MALLOC_DEFINE(M_BIOBUF, "BIO buffer", "BIO buffer");
 
+struct	bio_ops bioops;		/* I/O operation notification */
+
+#if 0 	/* replaced bu sched_sync */
 static void vfs_update __P((void));
 static struct	proc *updateproc;
 static struct kproc_desc up_kp = {
@@ -61,6 +65,7 @@ static struct kproc_desc up_kp = {
 	&updateproc
 };
 SYSINIT_KT(update, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
+#endif
 
 struct buf *buf;		/* buffer header pool */
 struct swqueue bswlist;
@@ -179,6 +184,7 @@ bufinit()
 		bp->b_qindex = QUEUE_EMPTY;
 		bp->b_vnbufs.le_next = NOLIST;
 		bp->b_generation = 0;
+		LIST_INIT(&bp->b_dep);
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 	}
@@ -362,6 +368,9 @@ int
 bwrite(struct buf * bp)
 {
 	int oldflags = bp->b_flags;
+	struct vnode *vp;
+	struct mount *mp;
+
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
@@ -386,6 +395,23 @@ bwrite(struct buf * bp)
 		curproc->p_stats->p_ru.ru_oublock++;
 	VOP_STRATEGY(bp);
 
+	/*
+	 * Collect statistics on synchronous and asynchronous writes.
+	 * Writes to block devices are charged to their associated
+	 * filesystem (if any).
+	 */
+	if ((vp = bp->b_vp) != NULL) {
+		if (vp->v_type == VBLK)
+			mp = vp->v_specmountpoint;
+		else
+			mp = vp->v_mount;
+		if (mp != NULL)
+			if ((oldflags & B_ASYNC) == 0)
+				mp->mnt_stat.f_syncwrites++;
+			else
+				mp->mnt_stat.f_asyncwrites++;
+	}
+
 	if ((oldflags & B_ASYNC) == 0) {
 		int rtval = biowait(bp);
 
@@ -420,6 +446,8 @@ vfs_bio_need_satisfy(void) {
 void
 bdwrite(struct buf * bp)
 {
+	int s;
+	struct vnode *vp;
 
 #if !defined(MAX_PERF)
 	if ((bp->b_flags & B_BUSY) == 0) {
@@ -438,7 +466,9 @@ bdwrite(struct buf * bp)
 	bp->b_flags &= ~(B_READ|B_RELBUF);
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		bp->b_flags |= B_DONE | B_DELWRI;
+		s = splbio();
 		reassignbuf(bp, bp->b_vp);
+		splx(s);
 		++numdirtybuffers;
 	}
 
@@ -470,12 +500,45 @@ bdwrite(struct buf * bp)
 	vfs_clean_pages(bp);
 	bqrelse(bp);
 
+	/*
+	 * XXX The soft dependency code is not prepared to
+	 * have I/O done when a bdwrite is requested. For
+	 * now we just let the write be delayed if it is
+	 * requested by the soft dependency code.
+	 */
+	if ((vp = bp->b_vp) &&
+	    (vp->v_type == VBLK && vp->v_specmountpoint &&
+	    (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) ||
+	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)))
+		return;
+
 	if (numdirtybuffers >= hidirtybuffers)
 		flushdirtybuffers(0, 0);
 
 	return;
 }
 
+
+/*
+ * Same as first half of bdwrite, mark buffer dirty, but do not release it.
+ * Check how this compares with vfs_setdirty(); XXX [JRE]
+ */
+void
+bdirty(bp)
+      struct buf *bp;
+{
+	int s;
+	
+	bp->b_flags &= ~(B_READ|B_RELBUF); /* XXX ??? check this */
+	if ((bp->b_flags & B_DELWRI) == 0) {
+		bp->b_flags |= B_DONE | B_DELWRI; /* why done? XXX JRE */
+		s = splbio();
+		reassignbuf(bp, bp->b_vp);
+		splx(s);
+		++numdirtybuffers;
+	}
+}
+
 /*
  * Asynchronous write.
  * Start output on a buffer, but do not wait for it to complete.
@@ -535,6 +598,8 @@ brelse(struct buf * bp)
 	if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) ||
 	    (bp->b_bufsize <= 0)) {
 		bp->b_flags |= B_INVAL;
+		if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
+			(*bioops.io_deallocate)(bp);
 		if (bp->b_flags & B_DELWRI)
 			--numdirtybuffers;
 		bp->b_flags &= ~(B_DELWRI | B_CACHE);
@@ -1065,6 +1130,9 @@ fillbuf:
 		crfree(bp->b_wcred);
 		bp->b_wcred = NOCRED;
 	}
+	if (LIST_FIRST(&bp->b_dep) != NULL &&
+	    bioops.io_deallocate)
+		(*bioops.io_deallocate)(bp);
 
 	LIST_REMOVE(bp, b_hash);
 	LIST_INSERT_HEAD(&invalhash, bp, b_hash);
@@ -1083,6 +1151,8 @@ fillbuf:
 	bp->b_dirtyoff = bp->b_dirtyend = 0;
 	bp->b_validoff = bp->b_validend = 0;
 	bp->b_usecount = 5;
+	/* Here, not kern_physio.c, is where this should be done*/
+	LIST_INIT(&bp->b_dep);
 
 	maxsize = (maxsize + PAGE_MASK) & ~PAGE_MASK;
 
@@ -1799,6 +1869,9 @@ biodone(register struct buf * bp)
 		splx(s);
 		return;
 	}
+	if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete)
+		(*bioops.io_complete)(bp);
+
 	if (bp->b_flags & B_VMIO) {
 		int i, resid;
 		vm_ooffset_t foff;
@@ -1944,6 +2017,7 @@ count_lock_queue()
 	return (count);
 }
 
+#if 0	/* not with kirks code */
 static int vfs_update_interval = 30;
 
 static void
@@ -1970,6 +2044,8 @@ sysctl_kern_updateinterval SYSCTL_HANDLER_ARGS
 SYSCTL_PROC(_kern, KERN_UPDATEINTERVAL, update, CTLTYPE_INT|CTLFLAG_RW,
 	&vfs_update_interval, 0, sysctl_kern_updateinterval, "I", "");
 
+#endif
+
 
 /*
  * This routine is called in lieu of iodone in the case of
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 7f477bf..0022ac9 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -33,7 +33,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)vfs_cluster.c	8.7 (Berkeley) 2/13/94
- * $Id: vfs_cluster.c,v 1.55 1998/02/06 12:13:30 eivind Exp $
+ * $Id: vfs_cluster.c,v 1.56 1998/03/07 21:35:28 dyson Exp $
  */
 
 #include "opt_debug_cluster.h"
@@ -399,6 +399,9 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
 				break;
 			}
 		}
+		/* check for latent dependencies to be handled */
+		if ((LIST_FIRST(&tbp->b_dep)) != NULL && bioops.io_start)
+			(*bioops.io_start)(tbp);
 		TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
 			tbp, b_cluster.cluster_entry);
 		for (j = 0; j < tbp->b_npages; j += 1) {
@@ -684,7 +687,6 @@ cluster_wbuild(vp, size, start_lbn, len)
 						(tbp->b_flags & (B_VMIO|B_NEEDCOMMIT));
 		bp->b_iodone = cluster_callback;
 		pbgetvp(vp, bp);
-
 		for (i = 0; i < len; ++i, ++start_lbn) {
 			if (i != 0) {
 				s = splbio();
@@ -714,7 +716,10 @@ cluster_wbuild(vp, size, start_lbn, len)
 				tbp->b_flags &= ~B_DONE;
 				splx(s);
 			}
-
+			/* check for latent dependencies to be handled */
+			if ((LIST_FIRST(&tbp->b_dep)) != NULL &&
+			    bioops.io_start)
+				(*bioops.io_start)(tbp);
 			if (tbp->b_flags & B_VMIO) {
 				vm_page_t m;
 
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index 5d27cf5..972604d 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -36,7 +36,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
- * $Id: vfs_subr.c,v 1.136 1998/03/01 23:07:45 dyson Exp $
+ * $Id: vfs_subr.c,v 1.137 1998/03/07 21:35:35 dyson Exp $
  */
 
 /*
@@ -123,6 +123,19 @@ static struct simplelock spechash_slock;
 struct nfs_public nfs_pub;	/* publicly exported FS */
 static vm_zone_t vnode_zone;
 
+/*
+ * The workitem queue.
+ */
+#define SYNCER_MAXDELAY		32
+int syncer_maxdelay =		SYNCER_MAXDELAY;	/* maximum delay time */
+time_t syncdelay =		30;
+int rushjob;				/* number of slots to run ASAP */
+
+static int syncer_delayno = 0;
+static long syncer_mask; 
+LIST_HEAD(synclist, vnode);
+static struct synclist *syncer_workitem_pending;
+
 int desiredvnodes;
 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
 
@@ -147,6 +160,12 @@ vntblinit()
 	simple_lock_init(&vnode_free_list_slock);
 	CIRCLEQ_INIT(&mountlist);
 	vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
+	/*
+	 * Initialize the filesystem syncer.
+	 */     
+	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 
+		&syncer_mask);
+	syncer_maxdelay = syncer_mask + 1;
 }
 
 /*
@@ -554,7 +573,7 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	int s, error;
 	vm_object_t object;
 
-	if (flags & V_SAVE) {
+	if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) {
 		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
 			return (error);
 		if (vp->v_dirtyblkhd.lh_first != NULL)
@@ -688,17 +707,154 @@ brelvp(bp)
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
+	vp = bp->b_vp;
 	s = splbio();
 	if (bp->b_vnbufs.le_next != NOLIST)
 		bufremvn(bp);
+	if ((vp->v_flag & VONWORKLST) && (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)) {
+		vp->v_flag &= ~VONWORKLST;
+		LIST_REMOVE(vp, v_synclist);
+	}
 	splx(s);
-
-	vp = bp->b_vp;
 	bp->b_vp = (struct vnode *) 0;
 	vdrop(vp);
 }
 
 /*
+ * The workitem queue.
+ * 
+ * It is useful to delay writes of file data and filesystem metadata
+ * for tens of seconds so that quickly created and deleted files need
+ * not waste disk bandwidth being created and removed. To realize this,
+ * we append vnodes to a "workitem" queue. When running with a soft
+ * updates implementation, most pending metadata dependencies should
+ * not wait for more than a few seconds. Thus, mounted on block devices
+ * are delayed only about a half the time that file data is delayed.
+ * Similarly, directory updates are more critical, so are only delayed
+ * about a third the time that file data is delayed. Thus, there are
+ * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
+ * one each second (driven off the filesystem syner process). The
+ * syncer_delayno variable indicates the next queue that is to be processed.
+ * Items that need to be processed soon are placed in this queue:
+ *
+ *	syncer_workitem_pending[syncer_delayno]
+ *
+ * A delay of fifteen seconds is done by placing the request fifteen
+ * entries later in the queue:
+ *
+ *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
+ *
+ */
+
+/*
+ * Add an item to the syncer work queue.
+ */
+void
+vn_syncer_add_to_worklist(vp, delay)
+	struct vnode *vp;
+	int delay;
+{
+	int s, slot;
+
+	s = splbio();
+
+	if (vp->v_flag & VONWORKLST) {
+		LIST_REMOVE(vp, v_synclist);
+	}
+
+	if (delay > syncer_maxdelay - 2)
+		delay = syncer_maxdelay - 2;
+	slot = (syncer_delayno + delay) & syncer_mask;
+
+	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
+	vp->v_flag |= VONWORKLST;
+	splx(s);
+}
+
+static void sched_sync __P((void));
+static struct	proc *updateproc;
+static struct kproc_desc up_kp = {
+	"syncer",
+	sched_sync,
+	&updateproc
+};
+SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
+
+/*
+ * System filesystem synchronizer daemon.
+ */
+void 
+sched_sync(void)
+{
+	struct synclist *slp;
+	struct vnode *vp;
+	long starttime;
+	int s;
+	struct proc *p = updateproc;
+
+	for (;;) {
+		starttime = time.tv_sec;
+
+		/*
+		 * Push files whose dirty time has expired.
+		 */
+		s = splbio();
+		slp = &syncer_workitem_pending[syncer_delayno];
+		syncer_delayno += 1;
+		if (syncer_delayno == syncer_maxdelay)
+			syncer_delayno = 0;
+		splx(s);
+
+		while ((vp = LIST_FIRST(slp)) != NULL) {
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
+			VOP_UNLOCK(vp, 0, p);
+			if (LIST_FIRST(slp) == vp) {
+				if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
+				    vp->v_type != VBLK)
+					panic("sched_sync: fsync failed");
+				/*
+				 * Move ourselves to the back of the sync list.
+				 */
+				LIST_REMOVE(vp, v_synclist);
+				vn_syncer_add_to_worklist(vp, syncdelay);
+			}
+		}
+
+		/*
+		 * Do soft update processing.
+		 */
+		if (bioops.io_sync)
+			(*bioops.io_sync)(NULL);
+
+		/*
+		 * The variable rushjob allows the kernel to speed up the
+		 * processing of the filesystem syncer process. A rushjob
+		 * value of N tells the filesystem syncer to process the next
+		 * N seconds worth of work on its queue ASAP. Currently rushjob
+		 * is used by the soft update code to speed up the filesystem
+		 * syncer process when the incore state is getting so far
+		 * ahead of the disk that the kernel memory pool is being
+		 * threatened with exhaustion.
+		 */
+		if (rushjob > 0) {
+			rushjob -= 1;
+			continue;
+		}
+		/*
+		 * If it has taken us less than a second to process the
+		 * current work, then wait. Otherwise start right over
+		 * again. We can still lose time if any single round
+		 * takes more than two seconds, but it does not really
+		 * matter as we are just trying to generally pace the
+		 * filesystem activity.
+		 */
+		if (time.tv_sec == starttime)
+			tsleep(&lbolt, PPAUSE, "syncer", 0);
+	}
+}
+
+/*
  * Associate a p-buffer with a vnode.
  */
 void
@@ -743,6 +899,8 @@ reassignbuf(bp, newvp)
 	register struct buf *bp;
 	register struct vnode *newvp;
 {
+	struct buflists *listheadp;
+	int delay;
 	int s;
 
 	if (newvp == NULL) {
@@ -765,18 +923,40 @@ reassignbuf(bp, newvp)
 	if (bp->b_flags & B_DELWRI) {
 		struct buf *tbp;
 
-		tbp = newvp->v_dirtyblkhd.lh_first;
+		listheadp = &newvp->v_dirtyblkhd;
+		if ((newvp->v_flag & VONWORKLST) == 0) {
+			switch (newvp->v_type) {
+			case VDIR:
+				delay = syncdelay / 3;
+				break;
+			case VBLK:
+				if (newvp->v_specmountpoint != NULL) {
+					delay = syncdelay / 2;
+					break;
+				}
+				/* fall through */
+			default:
+				delay = syncdelay;
+			}
+			vn_syncer_add_to_worklist(newvp, delay);
+		}
+		tbp = listheadp->lh_first;
 		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
-			bufinsvn(bp, &newvp->v_dirtyblkhd);
+			bufinsvn(bp, listheadp);
 		} else {
 			while (tbp->b_vnbufs.le_next &&
-				(tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
+			    (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
 				tbp = tbp->b_vnbufs.le_next;
 			}
 			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
 		}
 	} else {
 		bufinsvn(bp, &newvp->v_cleanblkhd);
+		if ((newvp->v_flag & VONWORKLST) &&
+			LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
+			newvp->v_flag &= ~VONWORKLST;
+			LIST_REMOVE(newvp, v_synclist);
+		}
 	}
 	bp->b_vp = newvp;
 	vhold(bp->b_vp);
@@ -863,7 +1043,7 @@ loop:
 		nvp->v_rdev = nvp_rdev;
 		nvp->v_hashchain = vpp;
 		nvp->v_specnext = *vpp;
-		nvp->v_specflags = 0;
+		nvp->v_specmountpoint = NULL;
 		simple_unlock(&spechash_slock);
 		*vpp = nvp;
 		if (vp != NULLVP) {
@@ -920,7 +1100,6 @@ vget(vp, flags, p)
 
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
-
 	if (flags & LK_TYPE_MASK) {
 		if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
 			/*
@@ -1066,7 +1245,7 @@ vdrop(vp)
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_holdcnt <= 0)
-		panic("holdrele: holdcnt");
+		panic("vdrop: holdcnt");
 	vp->v_holdcnt--;
 	if (VSHOULDFREE(vp))
 		vfree(vp);
@@ -1790,7 +1969,7 @@ vfs_mountedon(vp)
 	struct vnode *vq;
 	int error = 0;
 
-	if (vp->v_specflags & SI_MOUNTEDON)
+	if (vp->v_specmountpoint != NULL)
 		return (EBUSY);
 	if (vp->v_flag & VALIASED) {
 		simple_lock(&spechash_slock);
@@ -1798,7 +1977,7 @@ vfs_mountedon(vp)
 			if (vq->v_rdev != vp->v_rdev ||
 			    vq->v_type != vp->v_type)
 				continue;
-			if (vq->v_specflags & SI_MOUNTEDON) {
+			if (vq->v_specmountpoint != NULL) {
 				error = EBUSY;
 				break;
 			}
@@ -2326,3 +2505,170 @@ vn_pollgone(vp)
 	}
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
 }
+
+
+
+/*
+ * Routine to create and manage a filesystem syncer vnode.
+ */
+#define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
+int	sync_fsync __P((struct  vop_fsync_args *));
+int	sync_inactive __P((struct  vop_inactive_args *));
+int	sync_reclaim  __P((struct  vop_reclaim_args *));
+#define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
+#define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
+int	sync_print __P((struct vop_print_args *));
+#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
+
+vop_t **sync_vnodeop_p;
+struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
+	{ &vop_default_desc,	(vop_t *) vop_eopnotsupp },
+	{ &vop_close_desc,	(vop_t *) sync_close },		/* close */
+	{ &vop_fsync_desc,	(vop_t *) sync_fsync },		/* fsync */
+	{ &vop_inactive_desc,	(vop_t *) sync_inactive },	/* inactive */
+	{ &vop_reclaim_desc,	(vop_t *) sync_reclaim },	/* reclaim */
+	{ &vop_lock_desc,	(vop_t *) sync_lock },		/* lock */
+	{ &vop_unlock_desc,	(vop_t *) sync_unlock },	/* unlock */
+	{ &vop_print_desc,	(vop_t *) sync_print },		/* print */
+	{ &vop_islocked_desc,	(vop_t *) sync_islocked },	/* islocked */
+	{ NULL, NULL }
+};
+struct vnodeopv_desc sync_vnodeop_opv_desc =
+	{ &sync_vnodeop_p, sync_vnodeop_entries };
+
+VNODEOP_SET(sync_vnodeop_opv_desc);
+
+/*
+ * Create a new filesystem syncer vnode for the specified mount point.
+ */
+int
+vfs_allocate_syncvnode(mp)
+	struct mount *mp;
+{
+	struct vnode *vp;
+	static long start, incr, next;
+	int error;
+
+	/* Allocate a new vnode */
+	if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
+		mp->mnt_syncer = NULL;
+		return (error);
+	}
+	vp->v_type = VNON;
+	/*
+	 * Place the vnode onto the syncer worklist. We attempt to
+	 * scatter them about on the list so that they will go off
+	 * at evenly distributed times even if all the filesystems
+	 * are mounted at once.
+	 */
+	next += incr;
+	if (next == 0 || next > syncer_maxdelay) {
+		start /= 2;
+		incr /= 2;
+		if (start == 0) {
+			start = syncer_maxdelay / 2;
+			incr = syncer_maxdelay;
+		}
+		next = start;
+	}
+	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
+	mp->mnt_syncer = vp;
+	return (0);
+}
+
+/*
+ * Do a lazy sync of the filesystem.
+ */
+int
+sync_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *syncvp = ap->a_vp;
+	struct mount *mp = syncvp->v_mount;
+	struct proc *p = ap->a_p;
+	int asyncflag;
+
+	/*
+	 * We only need to do something if this is a lazy evaluation.
+	 */
+	if (ap->a_waitfor != MNT_LAZY)
+		return (0);
+
+	/*
+	 * Move ourselves to the back of the sync list.
+	 */
+	vn_syncer_add_to_worklist(syncvp, syncdelay);
+
+	/*
+	 * Walk the list of vnodes pushing all that are dirty and
+	 * not already on the sync list.
+	 */
+	simple_lock(&mountlist_slock);
+	if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0)
+		return (0);
+	asyncflag = mp->mnt_flag & MNT_ASYNC;
+	mp->mnt_flag &= ~MNT_ASYNC;
+	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
+	if (asyncflag)
+		mp->mnt_flag |= MNT_ASYNC;
+	vfs_unbusy(mp, p);
+	return (0);
+}
+
+/*
+ * The syncer vnode is no referenced.
+ */
+int
+sync_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	vgone(ap->a_vp);
+	return (0);
+}
+
+/*
+ * The syncer vnode is no longer needed and is being decommissioned.
+ */
+int
+sync_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	vp->v_mount->mnt_syncer = NULL;
+	if (vp->v_flag & VONWORKLST) {
+		LIST_REMOVE(vp, v_synclist);
+		vp->v_flag &= ~VONWORKLST;
+	}
+
+	return (0);
+}
+
+/*
+ * Print out a syncer vnode.
+ */
+int
+sync_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	printf("syncer vnode");
+	if (vp->v_vnlock != NULL)
+		lockmgr_printinfo(vp->v_vnlock);
+	printf("\n");
+	return (0);
+}
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index 596de95..4a818dc 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -36,7 +36,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
- * $Id: vfs_syscalls.c,v 1.93 1998/02/15 04:17:09 dyson Exp $
+ * $Id: vfs_syscalls.c,v 1.94 1998/03/07 21:35:39 dyson Exp $
  */
 
 /* For 4.3 integer FS ID compatibility */
@@ -283,6 +283,14 @@ update:
 			mp->mnt_flag = flag;
 			mp->mnt_kern_flag = flag2;
 		}
+		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+			if (mp->mnt_syncer == NULL)
+				error = vfs_allocate_syncvnode(mp);
+		} else {
+			if (mp->mnt_syncer != NULL)
+				vrele(mp->mnt_syncer);
+			mp->mnt_syncer = NULL;
+		}
 		vfs_unbusy(mp, p);
 		return (error);
 	}
@@ -296,6 +304,8 @@ update:
 		simple_unlock(&mountlist_slock);
 		checkdirs(vp);
 		VOP_UNLOCK(vp, 0, p);
+		if ((mp->mnt_flag & MNT_RDONLY) == 0)
+			error = vfs_allocate_syncvnode(mp);
 		vfs_unbusy(mp, p);
 		if (error = VFS_START(mp, 0, p))
 			vrele(vp);
@@ -431,12 +441,16 @@ dounmount(mp, flags, p)
 	vfs_msync(mp, MNT_WAIT);
 	mp->mnt_flag &=~ MNT_ASYNC;
 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
+	if (mp->mnt_syncer != NULL)
+		vrele(mp->mnt_syncer);
 	if (((mp->mnt_flag & MNT_RDONLY) ||
 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
 	    (flags & MNT_FORCE))
 		error = VFS_UNMOUNT(mp, flags, p);
 	simple_lock(&mountlist_slock);
 	if (error) {
+		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
+			(void) vfs_allocate_syncvnode(mp);
 		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
 		    &mountlist_slock, p);
@@ -490,9 +504,9 @@ sync(p, uap)
 			asyncflag = mp->mnt_flag & MNT_ASYNC;
 			mp->mnt_flag &= ~MNT_ASYNC;
 			vfs_msync(mp, MNT_NOWAIT);
-			VFS_SYNC(mp, MNT_NOWAIT, p != NULL ? p->p_ucred : NOCRED, p);
-			if (asyncflag)
-				mp->mnt_flag |= MNT_ASYNC;
+			VFS_SYNC(mp, MNT_NOWAIT,
+				((p != NULL) ? p->p_ucred : NOCRED), p);
+			mp->mnt_flag |= asyncflag;
 		}
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
@@ -665,10 +679,11 @@ getfsstat(p, uap)
 		if (sfsp && count < maxcount) {
 			sp = &mp->mnt_stat;
 			/*
-			 * If MNT_NOWAIT is specified, do not refresh the
-			 * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
+			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
+			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
+			 * overrides MNT_WAIT.
 			 */
-			if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 ||
+			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
 			    (SCARG(uap, flags) & MNT_WAIT)) &&
 			    (error = VFS_STATFS(mp, sp, p))) {
 				simple_lock(&mountlist_slock);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 5d27cf5..972604d 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -36,7 +36,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
- * $Id: vfs_subr.c,v 1.136 1998/03/01 23:07:45 dyson Exp $
+ * $Id: vfs_subr.c,v 1.137 1998/03/07 21:35:35 dyson Exp $
  */
 
 /*
@@ -123,6 +123,19 @@ static struct simplelock spechash_slock;
 struct nfs_public nfs_pub;	/* publicly exported FS */
 static vm_zone_t vnode_zone;
 
+/*
+ * The workitem queue.
+ */
+#define SYNCER_MAXDELAY		32
+int syncer_maxdelay =		SYNCER_MAXDELAY;	/* maximum delay time */
+time_t syncdelay =		30;
+int rushjob;				/* number of slots to run ASAP */
+
+static int syncer_delayno = 0;
+static long syncer_mask; 
+LIST_HEAD(synclist, vnode);
+static struct synclist *syncer_workitem_pending;
+
 int desiredvnodes;
 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
 
@@ -147,6 +160,12 @@ vntblinit()
 	simple_lock_init(&vnode_free_list_slock);
 	CIRCLEQ_INIT(&mountlist);
 	vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
+	/*
+	 * Initialize the filesystem syncer.
+	 */     
+	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 
+		&syncer_mask);
+	syncer_maxdelay = syncer_mask + 1;
 }
 
 /*
@@ -554,7 +573,7 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	int s, error;
 	vm_object_t object;
 
-	if (flags & V_SAVE) {
+	if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) {
 		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
 			return (error);
 		if (vp->v_dirtyblkhd.lh_first != NULL)
@@ -688,17 +707,154 @@ brelvp(bp)
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
+	vp = bp->b_vp;
 	s = splbio();
 	if (bp->b_vnbufs.le_next != NOLIST)
 		bufremvn(bp);
+	if ((vp->v_flag & VONWORKLST) && (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)) {
+		vp->v_flag &= ~VONWORKLST;
+		LIST_REMOVE(vp, v_synclist);
+	}
 	splx(s);
-
-	vp = bp->b_vp;
 	bp->b_vp = (struct vnode *) 0;
 	vdrop(vp);
 }
 
 /*
+ * The workitem queue.
+ * 
+ * It is useful to delay writes of file data and filesystem metadata
+ * for tens of seconds so that quickly created and deleted files need
+ * not waste disk bandwidth being created and removed. To realize this,
+ * we append vnodes to a "workitem" queue. When running with a soft
+ * updates implementation, most pending metadata dependencies should
+ * not wait for more than a few seconds. Thus, mounted on block devices
+ * are delayed only about a half the time that file data is delayed.
+ * Similarly, directory updates are more critical, so are only delayed
+ * about a third the time that file data is delayed. Thus, there are
+ * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
+ * one each second (driven off the filesystem syner process). The
+ * syncer_delayno variable indicates the next queue that is to be processed.
+ * Items that need to be processed soon are placed in this queue:
+ *
+ *	syncer_workitem_pending[syncer_delayno]
+ *
+ * A delay of fifteen seconds is done by placing the request fifteen
+ * entries later in the queue:
+ *
+ *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
+ *
+ */
+
+/*
+ * Add an item to the syncer work queue.
+ */
+void
+vn_syncer_add_to_worklist(vp, delay)
+	struct vnode *vp;
+	int delay;
+{
+	int s, slot;
+
+	s = splbio();
+
+	if (vp->v_flag & VONWORKLST) {
+		LIST_REMOVE(vp, v_synclist);
+	}
+
+	if (delay > syncer_maxdelay - 2)
+		delay = syncer_maxdelay - 2;
+	slot = (syncer_delayno + delay) & syncer_mask;
+
+	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
+	vp->v_flag |= VONWORKLST;
+	splx(s);
+}
+
+static void sched_sync __P((void));
+static struct	proc *updateproc;
+static struct kproc_desc up_kp = {
+	"syncer",
+	sched_sync,
+	&updateproc
+};
+SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
+
+/*
+ * System filesystem synchronizer daemon.
+ */
+void 
+sched_sync(void)
+{
+	struct synclist *slp;
+	struct vnode *vp;
+	long starttime;
+	int s;
+	struct proc *p = updateproc;
+
+	for (;;) {
+		starttime = time.tv_sec;
+
+		/*
+		 * Push files whose dirty time has expired.
+		 */
+		s = splbio();
+		slp = &syncer_workitem_pending[syncer_delayno];
+		syncer_delayno += 1;
+		if (syncer_delayno == syncer_maxdelay)
+			syncer_delayno = 0;
+		splx(s);
+
+		while ((vp = LIST_FIRST(slp)) != NULL) {
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
+			VOP_UNLOCK(vp, 0, p);
+			if (LIST_FIRST(slp) == vp) {
+				if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
+				    vp->v_type != VBLK)
+					panic("sched_sync: fsync failed");
+				/*
+				 * Move ourselves to the back of the sync list.
+				 */
+				LIST_REMOVE(vp, v_synclist);
+				vn_syncer_add_to_worklist(vp, syncdelay);
+			}
+		}
+
+		/*
+		 * Do soft update processing.
+		 */
+		if (bioops.io_sync)
+			(*bioops.io_sync)(NULL);
+
+		/*
+		 * The variable rushjob allows the kernel to speed up the
+		 * processing of the filesystem syncer process. A rushjob
+		 * value of N tells the filesystem syncer to process the next
+		 * N seconds worth of work on its queue ASAP. Currently rushjob
+		 * is used by the soft update code to speed up the filesystem
+		 * syncer process when the incore state is getting so far
+		 * ahead of the disk that the kernel memory pool is being
+		 * threatened with exhaustion.
+		 */
+		if (rushjob > 0) {
+			rushjob -= 1;
+			continue;
+		}
+		/*
+		 * If it has taken us less than a second to process the
+		 * current work, then wait. Otherwise start right over
+		 * again. We can still lose time if any single round
+		 * takes more than two seconds, but it does not really
+		 * matter as we are just trying to generally pace the
+		 * filesystem activity.
+		 */
+		if (time.tv_sec == starttime)
+			tsleep(&lbolt, PPAUSE, "syncer", 0);
+	}
+}
+
+/*
  * Associate a p-buffer with a vnode.
  */
 void
@@ -743,6 +899,8 @@ reassignbuf(bp, newvp)
 	register struct buf *bp;
 	register struct vnode *newvp;
 {
+	struct buflists *listheadp;
+	int delay;
 	int s;
 
 	if (newvp == NULL) {
@@ -765,18 +923,40 @@ reassignbuf(bp, newvp)
 	if (bp->b_flags & B_DELWRI) {
 		struct buf *tbp;
 
-		tbp = newvp->v_dirtyblkhd.lh_first;
+		listheadp = &newvp->v_dirtyblkhd;
+		if ((newvp->v_flag & VONWORKLST) == 0) {
+			switch (newvp->v_type) {
+			case VDIR:
+				delay = syncdelay / 3;
+				break;
+			case VBLK:
+				if (newvp->v_specmountpoint != NULL) {
+					delay = syncdelay / 2;
+					break;
+				}
+				/* fall through */
+			default:
+				delay = syncdelay;
+			}
+			vn_syncer_add_to_worklist(newvp, delay);
+		}
+		tbp = listheadp->lh_first;
 		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
-			bufinsvn(bp, &newvp->v_dirtyblkhd);
+			bufinsvn(bp, listheadp);
 		} else {
 			while (tbp->b_vnbufs.le_next &&
-				(tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
+			    (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
 				tbp = tbp->b_vnbufs.le_next;
 			}
 			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
 		}
 	} else {
 		bufinsvn(bp, &newvp->v_cleanblkhd);
+		if ((newvp->v_flag & VONWORKLST) &&
+			LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
+			newvp->v_flag &= ~VONWORKLST;
+			LIST_REMOVE(newvp, v_synclist);
+		}
 	}
 	bp->b_vp = newvp;
 	vhold(bp->b_vp);
@@ -863,7 +1043,7 @@ loop:
 		nvp->v_rdev = nvp_rdev;
 		nvp->v_hashchain = vpp;
 		nvp->v_specnext = *vpp;
-		nvp->v_specflags = 0;
+		nvp->v_specmountpoint = NULL;
 		simple_unlock(&spechash_slock);
 		*vpp = nvp;
 		if (vp != NULLVP) {
@@ -920,7 +1100,6 @@ vget(vp, flags, p)
 
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
-
 	if (flags & LK_TYPE_MASK) {
 		if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
 			/*
@@ -1066,7 +1245,7 @@ vdrop(vp)
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_holdcnt <= 0)
-		panic("holdrele: holdcnt");
+		panic("vdrop: holdcnt");
 	vp->v_holdcnt--;
 	if (VSHOULDFREE(vp))
 		vfree(vp);
@@ -1790,7 +1969,7 @@ vfs_mountedon(vp)
 	struct vnode *vq;
 	int error = 0;
 
-	if (vp->v_specflags & SI_MOUNTEDON)
+	if (vp->v_specmountpoint != NULL)
 		return (EBUSY);
 	if (vp->v_flag & VALIASED) {
 		simple_lock(&spechash_slock);
@@ -1798,7 +1977,7 @@ vfs_mountedon(vp)
 			if (vq->v_rdev != vp->v_rdev ||
 			    vq->v_type != vp->v_type)
 				continue;
-			if (vq->v_specflags & SI_MOUNTEDON) {
+			if (vq->v_specmountpoint != NULL) {
 				error = EBUSY;
 				break;
 			}
@@ -2326,3 +2505,170 @@ vn_pollgone(vp)
 	}
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
 }
+
+
+
+/*
+ * Routine to create and manage a filesystem syncer vnode.
+ */
+#define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
+int	sync_fsync __P((struct  vop_fsync_args *));
+int	sync_inactive __P((struct  vop_inactive_args *));
+int	sync_reclaim  __P((struct  vop_reclaim_args *));
+#define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
+#define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
+int	sync_print __P((struct vop_print_args *));
+#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
+
+vop_t **sync_vnodeop_p;
+struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
+	{ &vop_default_desc,	(vop_t *) vop_eopnotsupp },
+	{ &vop_close_desc,	(vop_t *) sync_close },		/* close */
+	{ &vop_fsync_desc,	(vop_t *) sync_fsync },		/* fsync */
+	{ &vop_inactive_desc,	(vop_t *) sync_inactive },	/* inactive */
+	{ &vop_reclaim_desc,	(vop_t *) sync_reclaim },	/* reclaim */
+	{ &vop_lock_desc,	(vop_t *) sync_lock },		/* lock */
+	{ &vop_unlock_desc,	(vop_t *) sync_unlock },	/* unlock */
+	{ &vop_print_desc,	(vop_t *) sync_print },		/* print */
+	{ &vop_islocked_desc,	(vop_t *) sync_islocked },	/* islocked */
+	{ NULL, NULL }
+};
+struct vnodeopv_desc sync_vnodeop_opv_desc =
+	{ &sync_vnodeop_p, sync_vnodeop_entries };
+
+VNODEOP_SET(sync_vnodeop_opv_desc);
+
+/*
+ * Create a new filesystem syncer vnode for the specified mount point.
+ */
+int
+vfs_allocate_syncvnode(mp)
+	struct mount *mp;
+{
+	struct vnode *vp;
+	static long start, incr, next;
+	int error;
+
+	/* Allocate a new vnode */
+	if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
+		mp->mnt_syncer = NULL;
+		return (error);
+	}
+	vp->v_type = VNON;
+	/*
+	 * Place the vnode onto the syncer worklist. We attempt to
+	 * scatter them about on the list so that they will go off
+	 * at evenly distributed times even if all the filesystems
+	 * are mounted at once.
+	 */
+	next += incr;
+	if (next == 0 || next > syncer_maxdelay) {
+		start /= 2;
+		incr /= 2;
+		if (start == 0) {
+			start = syncer_maxdelay / 2;
+			incr = syncer_maxdelay;
+		}
+		next = start;
+	}
+	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
+	mp->mnt_syncer = vp;
+	return (0);
+}
+
+/*
+ * Do a lazy sync of the filesystem.
+ */
+int
+sync_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *syncvp = ap->a_vp;
+	struct mount *mp = syncvp->v_mount;
+	struct proc *p = ap->a_p;
+	int asyncflag;
+
+	/*
+	 * We only need to do something if this is a lazy evaluation.
+	 */
+	if (ap->a_waitfor != MNT_LAZY)
+		return (0);
+
+	/*
+	 * Move ourselves to the back of the sync list.
+	 */
+	vn_syncer_add_to_worklist(syncvp, syncdelay);
+
+	/*
+	 * Walk the list of vnodes pushing all that are dirty and
+	 * not already on the sync list.
+	 */
+	simple_lock(&mountlist_slock);
+	if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0)
+		return (0);
+	asyncflag = mp->mnt_flag & MNT_ASYNC;
+	mp->mnt_flag &= ~MNT_ASYNC;
+	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
+	if (asyncflag)
+		mp->mnt_flag |= MNT_ASYNC;
+	vfs_unbusy(mp, p);
+	return (0);
+}
+
+/*
+ * The syncer vnode is no referenced.
+ */
+int
+sync_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	vgone(ap->a_vp);
+	return (0);
+}
+
+/*
+ * The syncer vnode is no longer needed and is being decommissioned.
+ */
+int
+sync_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	vp->v_mount->mnt_syncer = NULL;
+	if (vp->v_flag & VONWORKLST) {
+		LIST_REMOVE(vp, v_synclist);
+		vp->v_flag &= ~VONWORKLST;
+	}
+
+	return (0);
+}
+
+/*
+ * Print out a syncer vnode.
+ */
+int
+sync_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	printf("syncer vnode");
+	if (vp->v_vnlock != NULL)
+		lockmgr_printinfo(vp->v_vnlock);
+	printf("\n");
+	return (0);
+}
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 596de95..4a818dc 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -36,7 +36,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
- * $Id: vfs_syscalls.c,v 1.93 1998/02/15 04:17:09 dyson Exp $
+ * $Id: vfs_syscalls.c,v 1.94 1998/03/07 21:35:39 dyson Exp $
  */
 
 /* For 4.3 integer FS ID compatibility */
@@ -283,6 +283,14 @@ update:
 			mp->mnt_flag = flag;
 			mp->mnt_kern_flag = flag2;
 		}
+		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+			if (mp->mnt_syncer == NULL)
+				error = vfs_allocate_syncvnode(mp);
+		} else {
+			if (mp->mnt_syncer != NULL)
+				vrele(mp->mnt_syncer);
+			mp->mnt_syncer = NULL;
+		}
 		vfs_unbusy(mp, p);
 		return (error);
 	}
@@ -296,6 +304,8 @@ update:
 		simple_unlock(&mountlist_slock);
 		checkdirs(vp);
 		VOP_UNLOCK(vp, 0, p);
+		if ((mp->mnt_flag & MNT_RDONLY) == 0)
+			error = vfs_allocate_syncvnode(mp);
 		vfs_unbusy(mp, p);
 		if (error = VFS_START(mp, 0, p))
 			vrele(vp);
@@ -431,12 +441,16 @@ dounmount(mp, flags, p)
 	vfs_msync(mp, MNT_WAIT);
 	mp->mnt_flag &=~ MNT_ASYNC;
 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
+	if (mp->mnt_syncer != NULL)
+		vrele(mp->mnt_syncer);
 	if (((mp->mnt_flag & MNT_RDONLY) ||
 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
 	    (flags & MNT_FORCE))
 		error = VFS_UNMOUNT(mp, flags, p);
 	simple_lock(&mountlist_slock);
 	if (error) {
+		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
+			(void) vfs_allocate_syncvnode(mp);
 		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
 		    &mountlist_slock, p);
@@ -490,9 +504,9 @@ sync(p, uap)
 			asyncflag = mp->mnt_flag & MNT_ASYNC;
 			mp->mnt_flag &= ~MNT_ASYNC;
 			vfs_msync(mp, MNT_NOWAIT);
-			VFS_SYNC(mp, MNT_NOWAIT, p != NULL ? p->p_ucred : NOCRED, p);
-			if (asyncflag)
-				mp->mnt_flag |= MNT_ASYNC;
+			VFS_SYNC(mp, MNT_NOWAIT,
+				((p != NULL) ? p->p_ucred : NOCRED), p);
+			mp->mnt_flag |= asyncflag;
 		}
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
@@ -665,10 +679,11 @@ getfsstat(p, uap)
 		if (sfsp && count < maxcount) {
 			sp = &mp->mnt_stat;
 			/*
-			 * If MNT_NOWAIT is specified, do not refresh the
-			 * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
+			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
+			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
+			 * overrides MNT_WAIT.
 			 */
-			if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 ||
+			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
 			    (SCARG(uap, flags) & MNT_WAIT)) &&
 			    (error = VFS_STATFS(mp, sp, p))) {
 				simple_lock(&mountlist_slock);
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index bedf274..922e060 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -31,7 +31,7 @@
 # SUCH DAMAGE.
 #
 #	@(#)vnode_if.src	8.12 (Berkeley) 5/14/95
-# $Id: vnode_if.src,v 1.14 1997/10/16 10:48:00 phk Exp $
+# $Id: vnode_if.src,v 1.15 1997/10/16 20:32:23 phk Exp $
 #
 
 #
@@ -429,6 +429,18 @@ vop_advlock {
 };
 
 #
+#% balloc	vp	L L L
+#
+vop_balloc {
+	IN struct vnode *vp;
+	IN off_t startoffset;
+	IN int size;
+	IN struct ucred *cred;
+	IN int flags;
+	OUT struct buf **bpp;
+};
+
+#
 #% reallocblks	vp	L L L
 #
 vop_reallocblks {