BSD 4.4 Lite Kernel Sources

author: rgrimes <rgrimes@FreeBSD.org> 1994-05-24 10:09:53 +0000
committer: rgrimes <rgrimes@FreeBSD.org> 1994-05-24 10:09:53 +0000
commit: 8fb65ce818b3e3c6f165b583b910af24000768a5 (patch)
tree: ba751e4f2166aefec707c9d7401c7ff432506642 /sys/ufs
parent: a6ce65d368e623088a4c1a29865889f431b15420 (diff)
download: FreeBSD-src-8fb65ce818b3e3c6f165b583b910af24000768a5.zip
FreeBSD-src-8fb65ce818b3e3c6f165b583b910af24000768a5.tar.gz
46 files changed, 17206 insertions, 0 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
new file mode 100644
index 0000000..cdd2e4b
--- /dev/null
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -0,0 +1,1474 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_alloc.c	8.8 (Berkeley) 2/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+extern u_long nextgennumber;
+
+static daddr_t	ffs_alloccg __P((struct inode *, int, daddr_t, int));
+static daddr_t	ffs_alloccgblk __P((struct fs *, struct cg *, daddr_t));
+static daddr_t	ffs_clusteralloc __P((struct inode *, int, daddr_t, int));
+static ino_t	ffs_dirpref __P((struct fs *));
+static daddr_t	ffs_fragextend __P((struct inode *, int, long, int, int));
+static void	ffs_fserr __P((struct fs *, u_int, char *));
+static u_long	ffs_hashalloc
+		    __P((struct inode *, int, long, int, u_long (*)()));
+static ino_t	ffs_nodealloccg __P((struct inode *, int, daddr_t, int));
+static daddr_t	ffs_mapsearch __P((struct fs *, struct cg *, daddr_t, int));
+
+/*
+ * Allocate a block in the file system.
+ * 
+ * The size of the requested block is given, which must be some
+ * multiple of fs_fsize and <= fs_bsize.
+ * A preference may be optionally specified. If a preference is given
+ * the following hierarchy is used to allocate a block:
+ *   1) allocate the requested block.
+ *   2) allocate a rotationally optimal block in the same cylinder.
+ *   3) allocate a block in the same cylinder group.
+ *   4) quadradically rehash into other cylinder groups, until an
+ *      available block is located.
+ * If no block preference is given the following heirarchy is used
+ * to allocate a block:
+ *   1) allocate a block in the cylinder group that contains the
+ *      inode for the file.
+ *   2) quadradically rehash into other cylinder groups, until an
+ *      available block is located.
+ */
+ffs_alloc(ip, lbn, bpref, size, cred, bnp)
+	register struct inode *ip;
+	daddr_t lbn, bpref;
+	int size;
+	struct ucred *cred;
+	daddr_t *bnp;
+{
+	register struct fs *fs;
+	daddr_t bno;
+	int cg, error;
+	
+	*bnp = 0;
+	fs = ip->i_fs;
+#ifdef DIAGNOSTIC
+	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
+		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
+		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
+		panic("ffs_alloc: bad size");
+	}
+	if (cred == NOCRED)
+		panic("ffs_alloc: missing credential\n");
+#endif /* DIAGNOSTIC */
+	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
+		goto nospace;
+	if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
+		goto nospace;
+#ifdef QUOTA
+	if (error = chkdq(ip, (long)btodb(size), cred, 0))
+		return (error);
+#endif
+	if (bpref >= fs->fs_size)
+		bpref = 0;
+	if (bpref == 0)
+		cg = ino_to_cg(fs, ip->i_number);
+	else
+		cg = dtog(fs, bpref);
+	bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
+	    (u_long (*)())ffs_alloccg);
+	if (bno > 0) {
+		ip->i_blocks += btodb(size);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		*bnp = bno;
+		return (0);
+	}
+#ifdef QUOTA
+	/*
+	 * Restore user's disk quota because allocation failed.
+	 */
+	(void) chkdq(ip, (long)-btodb(size), cred, FORCE);
+#endif
+nospace:
+	ffs_fserr(fs, cred->cr_uid, "file system full");
+	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
+	return (ENOSPC);
+}
+
+/*
+ * Reallocate a fragment to a bigger size
+ *
+ * The number and size of the old block is given, and a preference
+ * and new size is also specified. The allocator attempts to extend
+ * the original block. Failing that, the regular block allocator is
+ * invoked to get an appropriate block.
+ */
+ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
+	register struct inode *ip;
+	daddr_t lbprev;
+	daddr_t bpref;
+	int osize, nsize;
+	struct ucred *cred;
+	struct buf **bpp;
+{
+	register struct fs *fs;
+	struct buf *bp;
+	int cg, request, error;
+	daddr_t bprev, bno;
+	
+	*bpp = 0;
+	fs = ip->i_fs;
+#ifdef DIAGNOSTIC
+	if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
+	    (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
+		printf(
+		    "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
+		    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
+		panic("ffs_realloccg: bad size");
+	}
+	if (cred == NOCRED)
+		panic("ffs_realloccg: missing credential\n");
+#endif /* DIAGNOSTIC */
+	if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
+		goto nospace;
+	if ((bprev = ip->i_db[lbprev]) == 0) {
+		printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
+		    ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
+		panic("ffs_realloccg: bad bprev");
+	}
+	/*
+	 * Allocate the extra space in the buffer.
+	 */
+	if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
+		brelse(bp);
+		return (error);
+	}
+#ifdef QUOTA
+	if (error = chkdq(ip, (long)btodb(nsize - osize), cred, 0)) {
+		brelse(bp);
+		return (error);
+	}
+#endif
+	/*
+	 * Check for extension in the existing location.
+	 */
+	cg = dtog(fs, bprev);
+	if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) {
+		if (bp->b_blkno != fsbtodb(fs, bno))
+			panic("bad blockno");
+		ip->i_blocks += btodb(nsize - osize);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		allocbuf(bp, nsize);
+		bp->b_flags |= B_DONE;
+		bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
+		*bpp = bp;
+		return (0);
+	}
+	/*
+	 * Allocate a new disk location.
+	 */
+	if (bpref >= fs->fs_size)
+		bpref = 0;
+	switch ((int)fs->fs_optim) {
+	case FS_OPTSPACE:
+		/*
+		 * Allocate an exact sized fragment. Although this makes 
+		 * best use of space, we will waste time relocating it if 
+		 * the file continues to grow. If the fragmentation is
+		 * less than half of the minimum free reserve, we choose
+		 * to begin optimizing for time.
+		 */
+		request = nsize;
+		if (fs->fs_minfree < 5 ||
+		    fs->fs_cstotal.cs_nffree >
+		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
+			break;
+		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
+			fs->fs_fsmnt);
+		fs->fs_optim = FS_OPTTIME;
+		break;
+	case FS_OPTTIME:
+		/*
+		 * At this point we have discovered a file that is trying to
+		 * grow a small fragment to a larger fragment. To save time,
+		 * we allocate a full sized block, then free the unused portion.
+		 * If the file continues to grow, the `ffs_fragextend' call
+		 * above will be able to grow it in place without further
+		 * copying. If aberrant programs cause disk fragmentation to
+		 * grow within 2% of the free reserve, we choose to begin
+		 * optimizing for space.
+		 */
+		request = fs->fs_bsize;
+		if (fs->fs_cstotal.cs_nffree <
+		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
+			break;
+		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
+			fs->fs_fsmnt);
+		fs->fs_optim = FS_OPTSPACE;
+		break;
+	default:
+		printf("dev = 0x%x, optim = %d, fs = %s\n",
+		    ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
+		panic("ffs_realloccg: bad optim");
+		/* NOTREACHED */
+	}
+	bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
+	    (u_long (*)())ffs_alloccg);
+	if (bno > 0) {
+		bp->b_blkno = fsbtodb(fs, bno);
+		(void) vnode_pager_uncache(ITOV(ip));
+		ffs_blkfree(ip, bprev, (long)osize);
+		if (nsize < request)
+			ffs_blkfree(ip, bno + numfrags(fs, nsize),
+			    (long)(request - nsize));
+		ip->i_blocks += btodb(nsize - osize);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		allocbuf(bp, nsize);
+		bp->b_flags |= B_DONE;
+		bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
+		*bpp = bp;
+		return (0);
+	}
+#ifdef QUOTA
+	/*
+	 * Restore user's disk quota because allocation failed.
+	 */
+	(void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE);
+#endif
+	brelse(bp);
+nospace:
+	/*
+	 * no space available
+	 */
+	ffs_fserr(fs, cred->cr_uid, "file system full");
+	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
+	return (ENOSPC);
+}
+
+/*
+ * Reallocate a sequence of blocks into a contiguous sequence of blocks.
+ *
+ * The vnode and an array of buffer pointers for a range of sequential
+ * logical blocks to be made contiguous is given. The allocator attempts
+ * to find a range of sequential blocks starting as close as possible to
+ * an fs_rotdelay offset from the end of the allocation for the logical
+ * block immediately preceeding the current range. If successful, the
+ * physical block numbers in the buffer pointers and in the inode are
+ * changed to reflect the new allocation. If unsuccessful, the allocation
+ * is left unchanged. The success in doing the reallocation is returned.
+ * Note that the error return is not reflected back to the user. Rather
+ * the previous block allocation will be used.
+ */
+#include <sys/sysctl.h>
+int doasyncfree = 1;
+struct ctldebug debug14 = { "doasyncfree", &doasyncfree };
+int
+ffs_reallocblks(ap)
+	struct vop_reallocblks_args /* {
+		struct vnode *a_vp;
+		struct cluster_save *a_buflist;
+	} */ *ap;
+{
+	struct fs *fs;
+	struct inode *ip;
+	struct vnode *vp;
+	struct buf *sbp, *ebp;
+	daddr_t *bap, *sbap, *ebap;
+	struct cluster_save *buflist;
+	daddr_t start_lbn, end_lbn, soff, eoff, newblk, blkno;
+	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
+	int i, len, start_lvl, end_lvl, pref, ssize;
+
+	vp = ap->a_vp;
+	ip = VTOI(vp);
+	fs = ip->i_fs;
+	if (fs->fs_contigsumsize <= 0)
+		return (ENOSPC);
+	buflist = ap->a_buflist;
+	len = buflist->bs_nchildren;
+	start_lbn = buflist->bs_children[0]->b_lblkno;
+	end_lbn = start_lbn + len - 1;
+#ifdef DIAGNOSTIC
+	for (i = 1; i < len; i++)
+		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
+			panic("ffs_reallocblks: non-cluster");
+#endif
+	/*
+	 * If the latest allocation is in a new cylinder group, assume that
+	 * the filesystem has decided to move and do not force it back to
+	 * the previous cylinder group.
+	 */
+	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
+	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
+		return (ENOSPC);
+	if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
+	    ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
+		return (ENOSPC);
+	/*
+	 * Get the starting offset and block map for the first block.
+	 */
+	if (start_lvl == 0) {
+		sbap = &ip->i_db[0];
+		soff = start_lbn;
+	} else {
+		idp = &start_ap[start_lvl - 1];
+		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
+			brelse(sbp);
+			return (ENOSPC);
+		}
+		sbap = (daddr_t *)sbp->b_data;
+		soff = idp->in_off;
+	}
+	/*
+	 * Find the preferred location for the cluster.
+	 */
+	pref = ffs_blkpref(ip, start_lbn, soff, sbap);
+	/*
+	 * If the block range spans two block maps, get the second map.
+	 */
+	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
+		ssize = len;
+	} else {
+#ifdef DIAGNOSTIC
+		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
+			panic("ffs_reallocblk: start == end");
+#endif
+		ssize = len - (idp->in_off + 1);
+		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
+			goto fail;
+		ebap = (daddr_t *)ebp->b_data;
+	}
+	/*
+	 * Search the block map looking for an allocation of the desired size.
+	 */
+	if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
+	    len, (u_long (*)())ffs_clusteralloc)) == 0)
+		goto fail;
+	/*
+	 * We have found a new contiguous block.
+	 *
+	 * First we have to replace the old block pointers with the new
+	 * block pointers in the inode and indirect blocks associated
+	 * with the file.
+	 */
+	blkno = newblk;
+	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
+		if (i == ssize)
+			bap = ebap;
+#ifdef DIAGNOSTIC
+		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
+			panic("ffs_reallocblks: alloc mismatch");
+#endif
+		*bap++ = blkno;
+	}
+	/*
+	 * Next we must write out the modified inode and indirect blocks.
+	 * For strict correctness, the writes should be synchronous since
+	 * the old block values may have been written to disk. In practise
+	 * they are almost never written, but if we are concerned about 
+	 * strict correctness, the `doasyncfree' flag should be set to zero.
+	 *
+	 * The test on `doasyncfree' should be changed to test a flag
+	 * that shows whether the associated buffers and inodes have
+	 * been written. The flag should be set when the cluster is
+	 * started and cleared whenever the buffer or inode is flushed.
+	 * We can then check below to see if it is set, and do the
+	 * synchronous write only when it has been cleared.
+	 */
+	if (sbap != &ip->i_db[0]) {
+		if (doasyncfree)
+			bdwrite(sbp);
+		else
+			bwrite(sbp);
+	} else {
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		if (!doasyncfree)
+			VOP_UPDATE(vp, &time, &time, MNT_WAIT);
+	}
+	if (ssize < len)
+		if (doasyncfree)
+			bdwrite(ebp);
+		else
+			bwrite(ebp);
+	/*
+	 * Last, free the old blocks and assign the new blocks to the buffers.
+	 */
+	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
+		ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
+		    fs->fs_bsize);
+		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
+	}
+	return (0);
+
+fail:
+	if (ssize < len)
+		brelse(ebp);
+	if (sbap != &ip->i_db[0])
+		brelse(sbp);
+	return (ENOSPC);
+}
+
+/*
+ * Allocate an inode in the file system.
+ * 
+ * If allocating a directory, use ffs_dirpref to select the inode.
+ * If allocating in a directory, the following hierarchy is followed:
+ *   1) allocate the preferred inode.
+ *   2) allocate an inode in the same cylinder group.
+ *   3) quadradically rehash into other cylinder groups, until an
+ *      available inode is located.
+ * If no inode preference is given the following heirarchy is used
+ * to allocate an inode:
+ *   1) allocate an inode in cylinder group 0.
+ *   2) quadradically rehash into other cylinder groups, until an
+ *      available inode is located.
+ */
+ffs_valloc(ap)
+	struct vop_valloc_args /* {
+		struct vnode *a_pvp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct vnode **a_vpp;
+	} */ *ap;
+{
+	register struct vnode *pvp = ap->a_pvp;
+	register struct inode *pip;
+	register struct fs *fs;
+	register struct inode *ip;
+	mode_t mode = ap->a_mode;
+	ino_t ino, ipref;
+	int cg, error;
+	
+	*ap->a_vpp = NULL;
+	pip = VTOI(pvp);
+	fs = pip->i_fs;
+	if (fs->fs_cstotal.cs_nifree == 0)
+		goto noinodes;
+
+	if ((mode & IFMT) == IFDIR)
+		ipref = ffs_dirpref(fs);
+	else
+		ipref = pip->i_number;
+	if (ipref >= fs->fs_ncg * fs->fs_ipg)
+		ipref = 0;
+	cg = ino_to_cg(fs, ipref);
+	ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg);
+	if (ino == 0)
+		goto noinodes;
+	error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp);
+	if (error) {
+		VOP_VFREE(pvp, ino, mode);
+		return (error);
+	}
+	ip = VTOI(*ap->a_vpp);
+	if (ip->i_mode) {
+		printf("mode = 0%o, inum = %d, fs = %s\n",
+		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
+		panic("ffs_valloc: dup alloc");
+	}
+	if (ip->i_blocks) {				/* XXX */
+		printf("free inode %s/%d had %d blocks\n",
+		    fs->fs_fsmnt, ino, ip->i_blocks);
+		ip->i_blocks = 0;
+	}
+	ip->i_flags = 0;
+	/*
+	 * Set up a new generation number for this inode.
+	 */
+	if (++nextgennumber < (u_long)time.tv_sec)
+		nextgennumber = time.tv_sec;
+	ip->i_gen = nextgennumber;
+	return (0);
+noinodes:
+	ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
+	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
+	return (ENOSPC);
+}
+
+/*
+ * Find a cylinder to place a directory.
+ *
+ * The policy implemented by this algorithm is to select from
+ * among those cylinder groups with above the average number of
+ * free inodes, the one with the smallest number of directories.
+ */
+static ino_t
+ffs_dirpref(fs)
+	register struct fs *fs;
+{
+	int cg, minndir, mincg, avgifree;
+
+	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
+	minndir = fs->fs_ipg;
+	mincg = 0;
+	for (cg = 0; cg < fs->fs_ncg; cg++)
+		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
+		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
+			mincg = cg;
+			minndir = fs->fs_cs(fs, cg).cs_ndir;
+		}
+	return ((ino_t)(fs->fs_ipg * mincg));
+}
+
+/*
+ * Select the desired position for the next block in a file.  The file is
+ * logically divided into sections. The first section is composed of the
+ * direct blocks. Each additional section contains fs_maxbpg blocks.
+ * 
+ * If no blocks have been allocated in the first section, the policy is to
+ * request a block in the same cylinder group as the inode that describes
+ * the file. If no blocks have been allocated in any other section, the
+ * policy is to place the section in a cylinder group with a greater than
+ * average number of free blocks.  An appropriate cylinder group is found
+ * by using a rotor that sweeps the cylinder groups. When a new group of
+ * blocks is needed, the sweep begins in the cylinder group following the
+ * cylinder group from which the previous allocation was made. The sweep
+ * continues until a cylinder group with greater than the average number
+ * of free blocks is found. If the allocation is for the first block in an
+ * indirect block, the information on the previous allocation is unavailable;
+ * here a best guess is made based upon the logical block number being
+ * allocated.
+ * 
+ * If a section is already partially allocated, the policy is to
+ * contiguously allocate fs_maxcontig blocks.  The end of one of these
+ * contiguous blocks and the beginning of the next is physically separated
+ * so that the disk head will be in transit between them for at least
+ * fs_rotdelay milliseconds.  This is to allow time for the processor to
+ * schedule another I/O transfer.
+ */
+daddr_t
+ffs_blkpref(ip, lbn, indx, bap)
+	struct inode *ip;
+	daddr_t lbn;
+	int indx;
+	daddr_t *bap;
+{
+	register struct fs *fs;
+	register int cg;
+	int avgbfree, startcg;
+	daddr_t nextblk;
+
+	fs = ip->i_fs;
+	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
+		if (lbn < NDADDR) {
+			cg = ino_to_cg(fs, ip->i_number);
+			return (fs->fs_fpg * cg + fs->fs_frag);
+		}
+		/*
+		 * Find a cylinder with greater than average number of
+		 * unused data blocks.
+		 */
+		if (indx == 0 || bap[indx - 1] == 0)
+			startcg =
+			    ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
+		else
+			startcg = dtog(fs, bap[indx - 1]) + 1;
+		startcg %= fs->fs_ncg;
+		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
+		for (cg = startcg; cg < fs->fs_ncg; cg++)
+			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+				fs->fs_cgrotor = cg;
+				return (fs->fs_fpg * cg + fs->fs_frag);
+			}
+		for (cg = 0; cg <= startcg; cg++)
+			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
+				fs->fs_cgrotor = cg;
+				return (fs->fs_fpg * cg + fs->fs_frag);
+			}
+		return (NULL);
+	}
+	/*
+	 * One or more previous blocks have been laid out. If less
+	 * than fs_maxcontig previous blocks are contiguous, the
+	 * next block is requested contiguously, otherwise it is
+	 * requested rotationally delayed by fs_rotdelay milliseconds.
+	 */
+	nextblk = bap[indx - 1] + fs->fs_frag;
+	if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] +
+	    blkstofrags(fs, fs->fs_maxcontig) != nextblk)
+		return (nextblk);
+	if (fs->fs_rotdelay != 0)
+		/*
+		 * Here we convert ms of delay to frags as:
+		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
+		 *	((sect/frag) * (ms/sec))
+		 * then round up to the next block.
+		 */
+		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
+		    (NSPF(fs) * 1000), fs->fs_frag);
+	return (nextblk);
+}
+
+/*
+ * Implement the cylinder overflow algorithm.
+ *
+ * The policy implemented by this algorithm is:
+ *   1) allocate the block in its requested cylinder group.
+ *   2) quadradically rehash on the cylinder group number.
+ *   3) brute force search for a free block.
+ */
+/*VARARGS5*/
+static u_long
+ffs_hashalloc(ip, cg, pref, size, allocator)
+	struct inode *ip;
+	int cg;
+	long pref;
+	int size;	/* size for data blocks, mode for inodes */
+	u_long (*allocator)();
+{
+	register struct fs *fs;
+	long result;
+	int i, icg = cg;
+
+	fs = ip->i_fs;
+	/*
+	 * 1: preferred cylinder group
+	 */
+	result = (*allocator)(ip, cg, pref, size);
+	if (result)
+		return (result);
+	/*
+	 * 2: quadratic rehash
+	 */
+	for (i = 1; i < fs->fs_ncg; i *= 2) {
+		cg += i;
+		if (cg >= fs->fs_ncg)
+			cg -= fs->fs_ncg;
+		result = (*allocator)(ip, cg, 0, size);
+		if (result)
+			return (result);
+	}
+	/*
+	 * 3: brute force search
+	 * Note that we start at i == 2, since 0 was checked initially,
+	 * and 1 is always checked in the quadratic rehash.
+	 */
+	cg = (icg + 2) % fs->fs_ncg;
+	for (i = 2; i < fs->fs_ncg; i++) {
+		result = (*allocator)(ip, cg, 0, size);
+		if (result)
+			return (result);
+		cg++;
+		if (cg == fs->fs_ncg)
+			cg = 0;
+	}
+	return (NULL);
+}
+
+/*
+ * Determine whether a fragment can be extended.
+ *
+ * Check to see if the necessary fragments are available, and 
+ * if they are, allocate them.
+ */
+static daddr_t
+ffs_fragextend(ip, cg, bprev, osize, nsize)
+	struct inode *ip;
+	int cg;
+	long bprev;
+	int osize, nsize;
+{
+	register struct fs *fs;
+	register struct cg *cgp;
+	struct buf *bp;
+	long bno;
+	int frags, bbase;
+	int i, error;
+
+	fs = ip->i_fs;
+	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
+		return (NULL);
+	frags = numfrags(fs, nsize);
+	bbase = fragnum(fs, bprev);
+	if (bbase > fragnum(fs, (bprev + frags - 1))) {
+		/* cannot extend across a block boundary */
+		return (NULL);
+	}
+	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (NULL);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp)) {
+		brelse(bp);
+		return (NULL);
+	}
+	cgp->cg_time = time.tv_sec;
+	bno = dtogd(fs, bprev);
+	for (i = numfrags(fs, osize); i < frags; i++)
+		if (isclr(cg_blksfree(cgp), bno + i)) {
+			brelse(bp);
+			return (NULL);
+		}
+	/*
+	 * the current fragment can be extended
+	 * deduct the count on fragment being extended into
+	 * increase the count on the remaining fragment (if any)
+	 * allocate the extended piece
+	 */
+	for (i = frags; i < fs->fs_frag - bbase; i++)
+		if (isclr(cg_blksfree(cgp), bno + i))
+			break;
+	cgp->cg_frsum[i - numfrags(fs, osize)]--;
+	if (i != frags)
+		cgp->cg_frsum[i - frags]++;
+	for (i = numfrags(fs, osize); i < frags; i++) {
+		clrbit(cg_blksfree(cgp), bno + i);
+		cgp->cg_cs.cs_nffree--;
+		fs->fs_cstotal.cs_nffree--;
+		fs->fs_cs(fs, cg).cs_nffree--;
+	}
+	fs->fs_fmod = 1;
+	bdwrite(bp);
+	return (bprev);
+}
+
+/*
+ * Determine whether a block can be allocated.
+ *
+ * Check to see if a block of the appropriate size is available,
+ * and if it is, allocate it.
+ */
+static daddr_t
+ffs_alloccg(ip, cg, bpref, size)
+	struct inode *ip;
+	int cg;
+	daddr_t bpref;
+	int size;
+{
+	register struct fs *fs;
+	register struct cg *cgp;
+	struct buf *bp;
+	register int i;
+	int error, bno, frags, allocsiz;
+
+	fs = ip->i_fs;
+	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
+		return (NULL);
+	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (NULL);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp) ||
+	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
+		brelse(bp);
+		return (NULL);
+	}
+	cgp->cg_time = time.tv_sec;
+	if (size == fs->fs_bsize) {
+		bno = ffs_alloccgblk(fs, cgp, bpref);
+		bdwrite(bp);
+		return (bno);
+	}
+	/*
+	 * check to see if any fragments are already available
+	 * allocsiz is the size which will be allocated, hacking
+	 * it down to a smaller size if necessary
+	 */
+	frags = numfrags(fs, size);
+	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
+		if (cgp->cg_frsum[allocsiz] != 0)
+			break;
+	if (allocsiz == fs->fs_frag) {
+		/*
+		 * no fragments were available, so a block will be 
+		 * allocated, and hacked up
+		 */
+		if (cgp->cg_cs.cs_nbfree == 0) {
+			brelse(bp);
+			return (NULL);
+		}
+		bno = ffs_alloccgblk(fs, cgp, bpref);
+		bpref = dtogd(fs, bno);
+		for (i = frags; i < fs->fs_frag; i++)
+			setbit(cg_blksfree(cgp), bpref + i);
+		i = fs->fs_frag - frags;
+		cgp->cg_cs.cs_nffree += i;
+		fs->fs_cstotal.cs_nffree += i;
+		fs->fs_cs(fs, cg).cs_nffree += i;
+		fs->fs_fmod = 1;
+		cgp->cg_frsum[i]++;
+		bdwrite(bp);
+		return (bno);
+	}
+	bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
+	if (bno < 0) {
+		brelse(bp);
+		return (NULL);
+	}
+	for (i = 0; i < frags; i++)
+		clrbit(cg_blksfree(cgp), bno + i);
+	cgp->cg_cs.cs_nffree -= frags;
+	fs->fs_cstotal.cs_nffree -= frags;
+	fs->fs_cs(fs, cg).cs_nffree -= frags;
+	fs->fs_fmod = 1;
+	cgp->cg_frsum[allocsiz]--;
+	if (frags != allocsiz)
+		cgp->cg_frsum[allocsiz - frags]++;
+	bdwrite(bp);
+	return (cg * fs->fs_fpg + bno);
+}
+
+/*
+ * Allocate a block in a cylinder group.
+ *
+ * This algorithm implements the following policy:
+ *   1) allocate the requested block.
+ *   2) allocate a rotationally optimal block in the same cylinder.
+ *   3) allocate the next available block on the block rotor for the
+ *      specified cylinder group.
+ * Note that this routine only allocates fs_bsize blocks; these
+ * blocks may be fragmented by the routine that allocates them.
+ */
+static daddr_t
+ffs_alloccgblk(fs, cgp, bpref)
+	register struct fs *fs;
+	register struct cg *cgp;
+	daddr_t bpref;
+{
+	daddr_t bno, blkno;
+	int cylno, pos, delta;
+	short *cylbp;
+	register int i;
+
+	if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
+		bpref = cgp->cg_rotor;
+		goto norot;
+	}
+	bpref = blknum(fs, bpref);
+	bpref = dtogd(fs, bpref);
+	/*
+	 * if the requested block is available, use it
+	 */
+	if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
+		bno = bpref;
+		goto gotit;
+	}
+	/*
+	 * check for a block available on the same cylinder
+	 */
+	cylno = cbtocylno(fs, bpref);
+	if (cg_blktot(cgp)[cylno] == 0)
+		goto norot;
+	if (fs->fs_cpc == 0) {
+		/*
+		 * Block layout information is not available.
+		 * Leaving bpref unchanged means we take the
+		 * next available free block following the one 
+		 * we just allocated. Hopefully this will at
+		 * least hit a track cache on drives of unknown
+		 * geometry (e.g. SCSI).
+		 */
+		goto norot;
+	}
+	/*
+	 * check the summary information to see if a block is 
+	 * available in the requested cylinder starting at the
+	 * requested rotational position and proceeding around.
+	 */
+	cylbp = cg_blks(fs, cgp, cylno);
+	pos = cbtorpos(fs, bpref);
+	for (i = pos; i < fs->fs_nrpos; i++)
+		if (cylbp[i] > 0)
+			break;
+	if (i == fs->fs_nrpos)
+		for (i = 0; i < pos; i++)
+			if (cylbp[i] > 0)
+				break;
+	if (cylbp[i] > 0) {
+		/*
+		 * found a rotational position, now find the actual
+		 * block. A panic if none is actually there.
+		 */
+		pos = cylno % fs->fs_cpc;
+		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
+		if (fs_postbl(fs, pos)[i] == -1) {
+			printf("pos = %d, i = %d, fs = %s\n",
+			    pos, i, fs->fs_fsmnt);
+			panic("ffs_alloccgblk: cyl groups corrupted");
+		}
+		for (i = fs_postbl(fs, pos)[i];; ) {
+			if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) {
+				bno = blkstofrags(fs, (bno + i));
+				goto gotit;
+			}
+			delta = fs_rotbl(fs)[i];
+			if (delta <= 0 ||
+			    delta + i > fragstoblks(fs, fs->fs_fpg))
+				break;
+			i += delta;
+		}
+		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
+		panic("ffs_alloccgblk: can't find blk in cyl");
+	}
+norot:
+	/*
+	 * no blocks in the requested cylinder, so take next
+	 * available one in this cylinder group.
+	 */
+	bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
+	if (bno < 0)
+		return (NULL);
+	cgp->cg_rotor = bno;
+gotit:
+	blkno = fragstoblks(fs, bno);
+	ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno);
+	ffs_clusteracct(fs, cgp, blkno, -1);
+	cgp->cg_cs.cs_nbfree--;
+	fs->fs_cstotal.cs_nbfree--;
+	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
+	cylno = cbtocylno(fs, bno);
+	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
+	cg_blktot(cgp)[cylno]--;
+	fs->fs_fmod = 1;
+	return (cgp->cg_cgx * fs->fs_fpg + bno);
+}
+
+/*
+ * Determine whether a cluster can be allocated.
+ *
+ * We do not currently check for optimal rotational layout if there
+ * are multiple choices in the same cylinder group. Instead we just
+ * take the first one that we find following bpref.
+ */
+static daddr_t
+ffs_clusteralloc(ip, cg, bpref, len)
+	struct inode *ip;
+	int cg;
+	daddr_t bpref;
+	int len;
+{
+	register struct fs *fs;
+	register struct cg *cgp;
+	struct buf *bp;
+	int i, run, bno, bit, map;
+	u_char *mapp;
+
+	fs = ip->i_fs;
+	if (fs->fs_cs(fs, cg).cs_nbfree < len)
+		return (NULL);
+	if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
+	    NOCRED, &bp))
+		goto fail;
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp))
+		goto fail;
+	/*
+	 * Check to see if a cluster of the needed size (or bigger) is
+	 * available in this cylinder group.
+	 */
+	for (i = len; i <= fs->fs_contigsumsize; i++)
+		if (cg_clustersum(cgp)[i] > 0)
+			break;
+	if (i > fs->fs_contigsumsize)
+		goto fail;
+	/*
+	 * Search the cluster map to find a big enough cluster.
+	 * We take the first one that we find, even if it is larger
+	 * than we need as we prefer to get one close to the previous
+	 * block allocation. We do not search before the current
+	 * preference point as we do not want to allocate a block
+	 * that is allocated before the previous one (as we will
+	 * then have to wait for another pass of the elevator
+	 * algorithm before it will be read). We prefer to fail and
+	 * be recalled to try an allocation in the next cylinder group.
+	 */
+	if (dtog(fs, bpref) != cg)
+		bpref = 0;
+	else
+		bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
+	mapp = &cg_clustersfree(cgp)[bpref / NBBY];
+	map = *mapp++;
+	bit = 1 << (bpref % NBBY);
+	for (run = 0, i = bpref; i < cgp->cg_nclusterblks; i++) {
+		if ((map & bit) == 0) {
+			run = 0;
+		} else {
+			run++;
+			if (run == len)
+				break;
+		}
+		if ((i & (NBBY - 1)) != (NBBY - 1)) {
+			bit <<= 1;
+		} else {
+			map = *mapp++;
+			bit = 1;
+		}
+	}
+	if (i == cgp->cg_nclusterblks)
+		goto fail;
+	/*
+	 * Allocate the cluster that we have found.
+	 */
+	bno = cg * fs->fs_fpg + blkstofrags(fs, i - run + 1);
+	len = blkstofrags(fs, len);
+	for (i = 0; i < len; i += fs->fs_frag)
+		if (ffs_alloccgblk(fs, cgp, bno + i) != bno + i)
+			panic("ffs_clusteralloc: lost block");
+	brelse(bp);
+	return (bno);
+
+fail:
+	brelse(bp);
+	return (0);
+}
+
+/*
+ * Determine whether an inode can be allocated.
+ *
+ * Check to see if an inode is available, and if it is,
+ * allocate it using the following policy:
+ *   1) allocate the requested inode.
+ *   2) allocate the next available inode after the requested
+ *      inode in the specified cylinder group.
+ */
+static ino_t
+ffs_nodealloccg(ip, cg, ipref, mode)
+	struct inode *ip;
+	int cg;
+	daddr_t ipref;
+	int mode;
+{
+	register struct fs *fs;
+	register struct cg *cgp;
+	struct buf *bp;
+	int error, start, len, loc, map, i;
+
+	fs = ip->i_fs;
+	if (fs->fs_cs(fs, cg).cs_nifree == 0)
+		return (NULL);
+	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (NULL);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
+		brelse(bp);
+		return (NULL);
+	}
+	cgp->cg_time = time.tv_sec;
+	if (ipref) {
+		ipref %= fs->fs_ipg;
+		if (isclr(cg_inosused(cgp), ipref))
+			goto gotit;
+	}
+	start = cgp->cg_irotor / NBBY;
+	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
+	loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
+	if (loc == 0) {
+		len = start + 1;
+		start = 0;
+		loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
+		if (loc == 0) {
+			printf("cg = %d, irotor = %d, fs = %s\n",
+			    cg, cgp->cg_irotor, fs->fs_fsmnt);
+			panic("ffs_nodealloccg: map corrupted");
+			/* NOTREACHED */
+		}
+	}
+	i = start + len - loc;
+	map = cg_inosused(cgp)[i];
+	ipref = i * NBBY;
+	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
+		if ((map & i) == 0) {
+			cgp->cg_irotor = ipref;
+			goto gotit;
+		}
+	}
+	printf("fs = %s\n", fs->fs_fsmnt);
+	panic("ffs_nodealloccg: block not in map");
+	/* NOTREACHED */
+gotit:
+	setbit(cg_inosused(cgp), ipref);
+	cgp->cg_cs.cs_nifree--;
+	fs->fs_cstotal.cs_nifree--;
+	fs->fs_cs(fs, cg).cs_nifree--;
+	fs->fs_fmod = 1;
+	if ((mode & IFMT) == IFDIR) {
+		cgp->cg_cs.cs_ndir++;
+		fs->fs_cstotal.cs_ndir++;
+		fs->fs_cs(fs, cg).cs_ndir++;
+	}
+	bdwrite(bp);
+	return (cg * fs->fs_ipg + ipref);
+}
+
+/*
+ * Free a block or fragment.
+ *
+ * The specified block or fragment is placed back in the
+ * free map. If a fragment is deallocated, a possible 
+ * block reassembly is checked.
+ */
+ffs_blkfree(ip, bno, size)
+	register struct inode *ip;
+	daddr_t bno;
+	long size;
+{
+	register struct fs *fs;
+	register struct cg *cgp;
+	struct buf *bp;
+	daddr_t blkno;
+	int i, error, cg, blk, frags, bbase;
+
+	fs = ip->i_fs;
+	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
+		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
+		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
+		panic("blkfree: bad size");
+	}
+	cg = dtog(fs, bno);
+	if ((u_int)bno >= fs->fs_size) {
+		printf("bad block %d, ino %d\n", bno, ip->i_number);
+		ffs_fserr(fs, ip->i_uid, "bad block");
+		return;
+	}
+	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return;
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp)) {
+		brelse(bp);
+		return;
+	}
+	cgp->cg_time = time.tv_sec;
+	bno = dtogd(fs, bno);
+	if (size == fs->fs_bsize) {
+		blkno = fragstoblks(fs, bno);
+		if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
+			printf("dev = 0x%x, block = %d, fs = %s\n",
+			    ip->i_dev, bno, fs->fs_fsmnt);
+			panic("blkfree: freeing free block");
+		}
+		ffs_setblock(fs, cg_blksfree(cgp), blkno);
+		ffs_clusteracct(fs, cgp, blkno, 1);
+		cgp->cg_cs.cs_nbfree++;
+		fs->fs_cstotal.cs_nbfree++;
+		fs->fs_cs(fs, cg).cs_nbfree++;
+		i = cbtocylno(fs, bno);
+		cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
+		cg_blktot(cgp)[i]++;
+	} else {
+		bbase = bno - fragnum(fs, bno);
+		/*
+		 * decrement the counts associated with the old frags
+		 */
+		blk = blkmap(fs, cg_blksfree(cgp), bbase);
+		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
+		/*
+		 * deallocate the fragment
+		 */
+		frags = numfrags(fs, size);
+		for (i = 0; i < frags; i++) {
+			if (isset(cg_blksfree(cgp), bno + i)) {
+				printf("dev = 0x%x, block = %d, fs = %s\n",
+				    ip->i_dev, bno + i, fs->fs_fsmnt);
+				panic("blkfree: freeing free frag");
+			}
+			setbit(cg_blksfree(cgp), bno + i);
+		}
+		cgp->cg_cs.cs_nffree += i;
+		fs->fs_cstotal.cs_nffree += i;
+		fs->fs_cs(fs, cg).cs_nffree += i;
+		/*
+		 * add back in counts associated with the new frags
+		 */
+		blk = blkmap(fs, cg_blksfree(cgp), bbase);
+		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
+		/*
+		 * if a complete block has been reassembled, account for it
+		 */
+		blkno = fragstoblks(fs, bbase);
+		if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
+			cgp->cg_cs.cs_nffree -= fs->fs_frag;
+			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
+			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
+			ffs_clusteracct(fs, cgp, blkno, 1);
+			cgp->cg_cs.cs_nbfree++;
+			fs->fs_cstotal.cs_nbfree++;
+			fs->fs_cs(fs, cg).cs_nbfree++;
+			i = cbtocylno(fs, bbase);
+			cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
+			cg_blktot(cgp)[i]++;
+		}
+	}
+	fs->fs_fmod = 1;
+	bdwrite(bp);
+}
+
+/*
+ * Free an inode.
+ *
+ * The specified inode is placed back in the free map.
+ */
+int
+ffs_vfree(ap)
+	struct vop_vfree_args /* {
+		struct vnode *a_pvp;
+		ino_t a_ino;
+		int a_mode;
+	} */ *ap;
+{
+	register struct fs *fs;
+	register struct cg *cgp;
+	register struct inode *pip;
+	ino_t ino = ap->a_ino;
+	struct buf *bp;
+	int error, cg;
+
+	pip = VTOI(ap->a_pvp);
+	fs = pip->i_fs;
+	if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
+		panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
+		    pip->i_dev, ino, fs->fs_fsmnt);
+	cg = ino_to_cg(fs, ino);
+	error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+		(int)fs->fs_cgsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (0);
+	}
+	cgp = (struct cg *)bp->b_data;
+	if (!cg_chkmagic(cgp)) {
+		brelse(bp);
+		return (0);
+	}
+	cgp->cg_time = time.tv_sec;
+	ino %= fs->fs_ipg;
+	if (isclr(cg_inosused(cgp), ino)) {
+		printf("dev = 0x%x, ino = %d, fs = %s\n",
+		    pip->i_dev, ino, fs->fs_fsmnt);
+		if (fs->fs_ronly == 0)
+			panic("ifree: freeing free inode");
+	}
+	clrbit(cg_inosused(cgp), ino);
+	if (ino < cgp->cg_irotor)
+		cgp->cg_irotor = ino;
+	cgp->cg_cs.cs_nifree++;
+	fs->fs_cstotal.cs_nifree++;
+	fs->fs_cs(fs, cg).cs_nifree++;
+	if ((ap->a_mode & IFMT) == IFDIR) {
+		cgp->cg_cs.cs_ndir--;
+		fs->fs_cstotal.cs_ndir--;
+		fs->fs_cs(fs, cg).cs_ndir--;
+	}
+	fs->fs_fmod = 1;
+	bdwrite(bp);
+	return (0);
+}
+
+/*
+ * Find a block of the specified size in the specified cylinder group.
+ *
+ * It is a panic if a request is made to find a block if none are
+ * available.
+ */
+static daddr_t
+ffs_mapsearch(fs, cgp, bpref, allocsiz)
+	register struct fs *fs;
+	register struct cg *cgp;
+	daddr_t bpref;
+	int allocsiz;
+{
+	daddr_t bno;
+	int start, len, loc, i;
+	int blk, field, subfield, pos;
+
+	/*
+	 * find the fragment by searching through the free block
+	 * map for an appropriate bit pattern
+	 */
+	if (bpref)
+		start = dtogd(fs, bpref) / NBBY;
+	else
+		start = cgp->cg_frotor / NBBY;
+	len = howmany(fs->fs_fpg, NBBY) - start;
+	loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start],
+		(u_char *)fragtbl[fs->fs_frag],
+		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
+	if (loc == 0) {
+		len = start + 1;
+		start = 0;
+		loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0],
+			(u_char *)fragtbl[fs->fs_frag],
+			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
+		if (loc == 0) {
+			printf("start = %d, len = %d, fs = %s\n",
+			    start, len, fs->fs_fsmnt);
+			panic("ffs_alloccg: map corrupted");
+			/* NOTREACHED */
+		}
+	}
+	bno = (start + len - loc) * NBBY;
+	cgp->cg_frotor = bno;
+	/*
+	 * found the byte in the map
+	 * sift through the bits to find the selected frag
+	 */
+	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
+		blk = blkmap(fs, cg_blksfree(cgp), bno);
+		blk <<= 1;
+		field = around[allocsiz];
+		subfield = inside[allocsiz];
+		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
+			if ((blk & field) == subfield)
+				return (bno + pos);
+			field <<= 1;
+			subfield <<= 1;
+		}
+	}
+	printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
+	panic("ffs_alloccg: block not in map");
+	return (-1);
+}
+
+/*
+ * Update the cluster map because of an allocation or free.
+ *
+ * Cnt == 1 means free; cnt == -1 means allocating.
+ */
+ffs_clusteracct(fs, cgp, blkno, cnt)
+	struct fs *fs;
+	struct cg *cgp;
+	daddr_t blkno;
+	int cnt;
+{
+	long *sump;
+	u_char *freemapp, *mapp;
+	int i, start, end, forw, back, map, bit;
+
+	if (fs->fs_contigsumsize <= 0)
+		return;
+	freemapp = cg_clustersfree(cgp);
+	sump = cg_clustersum(cgp);
+	/*
+	 * Allocate or clear the actual block.
+	 */
+	if (cnt > 0)
+		setbit(freemapp, blkno);
+	else
+		clrbit(freemapp, blkno);
+	/*
+	 * Find the size of the cluster going forward.
+	 */
+	start = blkno + 1;
+	end = start + fs->fs_contigsumsize;
+	if (end >= cgp->cg_nclusterblks)
+		end = cgp->cg_nclusterblks;
+	mapp = &freemapp[start / NBBY];
+	map = *mapp++;
+	bit = 1 << (start % NBBY);
+	for (i = start; i < end; i++) {
+		if ((map & bit) == 0)
+			break;
+		if ((i & (NBBY - 1)) != (NBBY - 1)) {
+			bit <<= 1;
+		} else {
+			map = *mapp++;
+			bit = 1;
+		}
+	}
+	forw = i - start;
+	/*
+	 * Find the size of the cluster going backward.
+	 */
+	start = blkno - 1;
+	end = start - fs->fs_contigsumsize;
+	if (end < 0)
+		end = -1;
+	mapp = &freemapp[start / NBBY];
+	map = *mapp--;
+	bit = 1 << (start % NBBY);
+	for (i = start; i > end; i--) {
+		if ((map & bit) == 0)
+			break;
+		if ((i & (NBBY - 1)) != 0) {
+			bit >>= 1;
+		} else {
+			map = *mapp--;
+			bit = 1 << (NBBY - 1);
+		}
+	}
+	back = start - i;
+	/*
+	 * Account for old cluster and the possibly new forward and
+	 * back clusters.
+	 */
+	i = back + forw + 1;
+	if (i > fs->fs_contigsumsize)
+		i = fs->fs_contigsumsize;
+	sump[i] += cnt;
+	if (back > 0)
+		sump[back] -= cnt;
+	if (forw > 0)
+		sump[forw] -= cnt;
+}
+
+/*
+ * Fserr prints the name of a file system with an error diagnostic.
+ * 
+ * The form of the error message is:
+ *	fs: error message
+ */
+static void
+ffs_fserr(fs, uid, cp)
+	struct fs *fs;
+	u_int uid;
+	char *cp;
+{
+
+	log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
+}
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
new file mode 100644
index 0000000..752feec
--- /dev/null
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_balloc.c	8.4 (Berkeley) 9/23/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+/*
+ * Balloc defines the structure of file system storage
+ * by allocating the physical blocks on a device given
+ * the inode and the logical block number in a file.
+ */
+ffs_balloc(ip, bn, size, cred, bpp, flags)
+	register struct inode *ip;
+	register daddr_t bn;
+	int size;
+	struct ucred *cred;
+	struct buf **bpp;
+	int flags;
+{
+	register struct fs *fs;
+	register daddr_t nb;
+	struct buf *bp, *nbp;
+	struct vnode *vp = ITOV(ip);
+	struct indir indirs[NIADDR + 2];
+	daddr_t newb, lbn, *bap, pref;
+	int osize, nsize, num, i, error;
+
+	*bpp = NULL;
+	if (bn < 0)
+		return (EFBIG);
+	fs = ip->i_fs;
+	lbn = bn;
+
+	/*
+	 * If the next write will extend the file into a new block,
+	 * and the file is currently composed of a fragment
+	 * this fragment has to be extended to be a full block.
+	 */
+	nb = lblkno(fs, ip->i_size);
+	if (nb < NDADDR && nb < bn) {
+		osize = blksize(fs, ip, nb);
+		if (osize < fs->fs_bsize && osize > 0) {
+			error = ffs_realloccg(ip, nb,
+				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
+				osize, (int)fs->fs_bsize, cred, &bp);
+			if (error)
+				return (error);
+			ip->i_size = (nb + 1) * fs->fs_bsize;
+			vnode_pager_setsize(vp, (u_long)ip->i_size);
+			ip->i_db[nb] = dbtofsb(fs, bp->b_blkno);
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+			if (flags & B_SYNC)
+				bwrite(bp);
+			else
+				bawrite(bp);
+		}
+	}
+	/*
+	 * The first NDADDR blocks are direct blocks
+	 */
+	if (bn < NDADDR) {
+		nb = ip->i_db[bn];
+		if (nb != 0 && ip->i_size >= (bn + 1) * fs->fs_bsize) {
+			error = bread(vp, bn, fs->fs_bsize, NOCRED, &bp);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			*bpp = bp;
+			return (0);
+		}
+		if (nb != 0) {
+			/*
+			 * Consider need to reallocate a fragment.
+			 */
+			osize = fragroundup(fs, blkoff(fs, ip->i_size));
+			nsize = fragroundup(fs, size);
+			if (nsize <= osize) {
+				error = bread(vp, bn, osize, NOCRED, &bp);
+				if (error) {
+					brelse(bp);
+					return (error);
+				}
+			} else {
+				error = ffs_realloccg(ip, bn,
+				    ffs_blkpref(ip, bn, (int)bn, &ip->i_db[0]),
+				    osize, nsize, cred, &bp);
+				if (error)
+					return (error);
+			}
+		} else {
+			if (ip->i_size < (bn + 1) * fs->fs_bsize)
+				nsize = fragroundup(fs, size);
+			else
+				nsize = fs->fs_bsize;
+			error = ffs_alloc(ip, bn,
+			    ffs_blkpref(ip, bn, (int)bn, &ip->i_db[0]),
+			    nsize, cred, &newb);
+			if (error)
+				return (error);
+			bp = getblk(vp, bn, nsize, 0, 0);
+			bp->b_blkno = fsbtodb(fs, newb);
+			if (flags & B_CLRBUF)
+				clrbuf(bp);
+		}
+		ip->i_db[bn] = dbtofsb(fs, bp->b_blkno);
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		*bpp = bp;
+		return (0);
+	}
+	/*
+	 * Determine the number of levels of indirection.
+	 */
+	pref = 0;
+	if (error = ufs_getlbns(vp, bn, indirs, &num))
+		return(error);
+#ifdef DIAGNOSTIC
+	if (num < 1)
+		panic ("ffs_balloc: ufs_bmaparray returned indirect block\n");
+#endif
+	/*
+	 * Fetch the first indirect block allocating if necessary.
+	 */
+	--num;
+	nb = ip->i_ib[indirs[0].in_off];
+	if (nb == 0) {
+		pref = ffs_blkpref(ip, lbn, 0, (daddr_t *)0);
+	        if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+		    cred, &newb))
+			return (error);
+		nb = newb;
+		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
+		bp->b_blkno = fsbtodb(fs, newb);
+		clrbuf(bp);
+		/*
+		 * Write synchronously so that indirect blocks
+		 * never point at garbage.
+		 */
+		if (error = bwrite(bp)) {
+			ffs_blkfree(ip, nb, fs->fs_bsize);
+			return (error);
+		}
+		ip->i_ib[indirs[0].in_off] = newb;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+	/*
+	 * Fetch through the indirect blocks, allocating as necessary.
+	 */
+	for (i = 1;;) {
+		error = bread(vp,
+		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
+		if (error) {
+			brelse(bp);
+			return (error);
+		}
+		bap = (daddr_t *)bp->b_data;
+		nb = bap[indirs[i].in_off];
+		if (i == num)
+			break;
+		i += 1;
+		if (nb != 0) {
+			brelse(bp);
+			continue;
+		}
+		if (pref == 0)
+			pref = ffs_blkpref(ip, lbn, 0, (daddr_t *)0);
+		if (error =
+		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
+			brelse(bp);
+			return (error);
+		}
+		nb = newb;
+		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
+		nbp->b_blkno = fsbtodb(fs, nb);
+		clrbuf(nbp);
+		/*
+		 * Write synchronously so that indirect blocks
+		 * never point at garbage.
+		 */
+		if (error = bwrite(nbp)) {
+			ffs_blkfree(ip, nb, fs->fs_bsize);
+			brelse(bp);
+			return (error);
+		}
+		bap[indirs[i - 1].in_off] = nb;
+		/*
+		 * If required, write synchronously, otherwise use
+		 * delayed write.
+		 */
+		if (flags & B_SYNC) {
+			bwrite(bp);
+		} else {
+			bdwrite(bp);
+		}
+	}
+	/*
+	 * Get the data block, allocating if necessary.
+	 */
+	if (nb == 0) {
+		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
+		if (error = ffs_alloc(ip,
+		    lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
+			brelse(bp);
+			return (error);
+		}
+		nb = newb;
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
+		nbp->b_blkno = fsbtodb(fs, nb);
+		if (flags & B_CLRBUF)
+			clrbuf(nbp);
+		bap[indirs[i].in_off] = nb;
+		/*
+		 * If required, write synchronously, otherwise use
+		 * delayed write.
+		 */
+		if (flags & B_SYNC) {
+			bwrite(bp);
+		} else {
+			bdwrite(bp);
+		}
+		*bpp = nbp;
+		return (0);
+	}
+	brelse(bp);
+	if (flags & B_CLRBUF) {
+		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+		if (error) {
+			brelse(nbp);
+			return (error);
+		}
+	} else {
+		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
+		nbp->b_blkno = fsbtodb(fs, nb);
+	}
+	*bpp = nbp;
+	return (0);
+}
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
new file mode 100644
index 0000000..ab467a2
--- /dev/null
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -0,0 +1,101 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_extern.h	8.3 (Berkeley) 4/16/94
+ */
+
+struct buf;
+struct fid;
+struct fs;
+struct inode;
+struct mount;
+struct nameidata;
+struct proc;
+struct statfs;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct mbuf;
+
+__BEGIN_DECLS
+int	ffs_alloc __P((struct inode *,
+	    daddr_t, daddr_t, int, struct ucred *, daddr_t *));
+int	ffs_balloc __P((struct inode *,
+	    daddr_t, int, struct ucred *, struct buf **, int));
+int	ffs_blkatoff __P((struct vop_blkatoff_args *));
+int	ffs_blkfree __P((struct inode *, daddr_t, long));
+daddr_t	ffs_blkpref __P((struct inode *, daddr_t, int, daddr_t *));
+int	ffs_bmap __P((struct vop_bmap_args *));
+void	ffs_clrblock __P((struct fs *, u_char *, daddr_t));
+int	ffs_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
+	    struct vnode **, int *, struct ucred **));
+void	ffs_fragacct __P((struct fs *, int, long [], int));
+int	ffs_fsync __P((struct vop_fsync_args *));
+int	ffs_init __P((void));
+int	ffs_isblock __P((struct fs *, u_char *, daddr_t));
+int	ffs_mount __P((struct mount *,
+	    char *, caddr_t, struct nameidata *, struct proc *));
+int	ffs_mountfs __P((struct vnode *, struct mount *, struct proc *));
+int	ffs_mountroot __P((void));
+int	ffs_read __P((struct vop_read_args *));
+int	ffs_reallocblks __P((struct vop_reallocblks_args *));
+int	ffs_realloccg __P((struct inode *,
+	    daddr_t, daddr_t, int, int, struct ucred *, struct buf **));
+int	ffs_reclaim __P((struct vop_reclaim_args *));
+void	ffs_setblock __P((struct fs *, u_char *, daddr_t));
+int	ffs_statfs __P((struct mount *, struct statfs *, struct proc *));
+int	ffs_sync __P((struct mount *, int, struct ucred *, struct proc *));
+int	ffs_truncate __P((struct vop_truncate_args *));
+int	ffs_unmount __P((struct mount *, int, struct proc *));
+int	ffs_update __P((struct vop_update_args *));
+int	ffs_valloc __P((struct vop_valloc_args *));
+int	ffs_vfree __P((struct vop_vfree_args *));
+int	ffs_vget __P((struct mount *, ino_t, struct vnode **));
+int	ffs_vptofh __P((struct vnode *, struct fid *));
+int	ffs_write __P((struct vop_write_args *));
+
+int	bwrite();		/* FFS needs a bwrite routine.  XXX */
+
+#ifdef DIAGNOSTIC
+void	ffs_checkoverlap __P((struct buf *, struct inode *));
+#endif
+__END_DECLS
+
+extern int (**ffs_vnodeop_p)();
+extern int (**ffs_specop_p)();
+#ifdef FIFO
+extern int (**ffs_fifoop_p)();
+#define FFS_FIFOOPS ffs_fifoop_p
+#else
+#define FFS_FIFOOPS NULL
+#endif
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
new file mode 100644
index 0000000..b45aee5
--- /dev/null
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_inode.c	8.5 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/trace.h>
+#include <sys/resourcevar.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+static int ffs_indirtrunc __P((struct inode *, daddr_t, daddr_t, daddr_t, int,
+	    long *));
+
+int
+ffs_init()
+{
+	return (ufs_init());
+}
+
+/*
+ * Update the access, modified, and inode change times as specified by the
+ * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is
+ * used to specify that the inode needs to be updated but that the times have
+ * already been set. The access and modified times are taken from the second
+ * and third parameters; the inode change time is always taken from the current
+ * time. If waitfor is set, then wait for the disk write of the inode to
+ * complete.
+ */
+int
+ffs_update(ap)
+	struct vop_update_args /* {
+		struct vnode *a_vp;
+		struct timeval *a_access;
+		struct timeval *a_modify;
+		int a_waitfor;
+	} */ *ap;
+{
+	register struct fs *fs;
+	struct buf *bp;
+	struct inode *ip;
+	int error;
+
+	ip = VTOI(ap->a_vp);
+	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) {
+		ip->i_flag &=
+		    ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
+		return (0);
+	}
+	if ((ip->i_flag &
+	    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)
+		return (0);
+	if (ip->i_flag & IN_ACCESS)
+		ip->i_atime.ts_sec = ap->a_access->tv_sec;
+	if (ip->i_flag & IN_UPDATE) {
+		ip->i_mtime.ts_sec = ap->a_modify->tv_sec;
+		ip->i_modrev++;
+	}
+	if (ip->i_flag & IN_CHANGE)
+		ip->i_ctime.ts_sec = time.tv_sec;
+	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
+	fs = ip->i_fs;
+	/*
+	 * Ensure that uid and gid are correct. This is a temporary
+	 * fix until fsck has been changed to do the update.
+	 */
+	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
+		ip->i_din.di_ouid = ip->i_uid;		/* XXX */
+		ip->i_din.di_ogid = ip->i_gid;		/* XXX */
+	}						/* XXX */
+	if (error = bread(ip->i_devvp,
+	    fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+		(int)fs->fs_bsize, NOCRED, &bp)) {
+		brelse(bp);
+		return (error);
+	}
+	*((struct dinode *)bp->b_data +
+	    ino_to_fsbo(fs, ip->i_number)) = ip->i_din;
+	if (ap->a_waitfor)
+		return (bwrite(bp));
+	else {
+		bdwrite(bp);
+		return (0);
+	}
+}
+
+#define	SINGLE	0	/* index of single indirect block */
+#define	DOUBLE	1	/* index of double indirect block */
+#define	TRIPLE	2	/* index of triple indirect block */
+/*
+ * Truncate the inode oip to at most length size, freeing the
+ * disk blocks.
+ */
+ffs_truncate(ap)
+	struct vop_truncate_args /* {
+		struct vnode *a_vp;
+		off_t a_length;
+		int a_flags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *ovp = ap->a_vp;
+	register daddr_t lastblock;
+	register struct inode *oip;
+	daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
+	daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
+	off_t length = ap->a_length;
+	register struct fs *fs;
+	struct buf *bp;
+	int offset, size, level;
+	long count, nblocks, vflags, blocksreleased = 0;
+	struct timeval tv;
+	register int i;
+	int aflags, error, allerror;
+	off_t osize;
+
+	oip = VTOI(ovp);
+	tv = time;
+	if (ovp->v_type == VLNK &&
+	    oip->i_size < ovp->v_mount->mnt_maxsymlinklen) {
+#ifdef DIAGNOSTIC
+		if (length != 0)
+			panic("ffs_truncate: partial truncate of symlink");
+#endif
+		bzero((char *)&oip->i_shortlink, (u_int)oip->i_size);
+		oip->i_size = 0;
+		oip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (VOP_UPDATE(ovp, &tv, &tv, 1));
+	}
+	if (oip->i_size == length) {
+		oip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (VOP_UPDATE(ovp, &tv, &tv, 0));
+	}
+#ifdef QUOTA
+	if (error = getinoquota(oip))
+		return (error);
+#endif
+	vnode_pager_setsize(ovp, (u_long)length);
+	fs = oip->i_fs;
+	osize = oip->i_size;
+	/*
+	 * Lengthen the size of the file. We must ensure that the
+	 * last byte of the file is allocated. Since the smallest
+	 * value of oszie is 0, length will be at least 1.
+	 */
+	if (osize < length) {
+		offset = blkoff(fs, length - 1);
+		lbn = lblkno(fs, length - 1);
+		aflags = B_CLRBUF;
+		if (ap->a_flags & IO_SYNC)
+			aflags |= B_SYNC;
+		if (error = ffs_balloc(oip, lbn, offset + 1, ap->a_cred, &bp,
+		    aflags))
+			return (error);
+		oip->i_size = length;
+		(void) vnode_pager_uncache(ovp);
+		if (aflags & IO_SYNC)
+			bwrite(bp);
+		else
+			bawrite(bp);
+		oip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (VOP_UPDATE(ovp, &tv, &tv, 1));
+	}
+	/*
+	 * Shorten the size of the file. If the file is not being
+	 * truncated to a block boundry, the contents of the
+	 * partial block following the end of the file must be
+	 * zero'ed in case it ever become accessable again because
+	 * of subsequent file growth.
+	 */
+	offset = blkoff(fs, length);
+	if (offset == 0) {
+		oip->i_size = length;
+	} else {
+		lbn = lblkno(fs, length);
+		aflags = B_CLRBUF;
+		if (ap->a_flags & IO_SYNC)
+			aflags |= B_SYNC;
+		if (error = ffs_balloc(oip, lbn, offset, ap->a_cred, &bp,
+		    aflags))
+			return (error);
+		oip->i_size = length;
+		size = blksize(fs, oip, lbn);
+		(void) vnode_pager_uncache(ovp);
+		bzero((char *)bp->b_data + offset, (u_int)(size - offset));
+		allocbuf(bp, size);
+		if (aflags & IO_SYNC)
+			bwrite(bp);
+		else
+			bawrite(bp);
+	}
+	/*
+	 * Calculate index into inode's block list of
+	 * last direct and indirect blocks (if any)
+	 * which we want to keep.  Lastblock is -1 when
+	 * the file is truncated to 0.
+	 */
+	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
+	lastiblock[SINGLE] = lastblock - NDADDR;
+	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
+	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
+	nblocks = btodb(fs->fs_bsize);
+	/*
+	 * Update file and block pointers on disk before we start freeing
+	 * blocks.  If we crash before free'ing blocks below, the blocks
+	 * will be returned to the free list.  lastiblock values are also
+	 * normalized to -1 for calls to ffs_indirtrunc below.
+	 */
+	bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks);
+	for (level = TRIPLE; level >= SINGLE; level--)
+		if (lastiblock[level] < 0) {
+			oip->i_ib[level] = 0;
+			lastiblock[level] = -1;
+		}
+	for (i = NDADDR - 1; i > lastblock; i--)
+		oip->i_db[i] = 0;
+	oip->i_flag |= IN_CHANGE | IN_UPDATE;
+	if (error = VOP_UPDATE(ovp, &tv, &tv, MNT_WAIT))
+		allerror = error;
+	/*
+	 * Having written the new inode to disk, save its new configuration
+	 * and put back the old block pointers long enough to process them.
+	 * Note that we save the new block configuration so we can check it
+	 * when we are done.
+	 */
+	bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks);
+	bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks);
+	oip->i_size = osize;
+	vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
+	allerror = vinvalbuf(ovp, vflags, ap->a_cred, ap->a_p, 0, 0);
+
+	/*
+	 * Indirect blocks first.
+	 */
+	indir_lbn[SINGLE] = -NDADDR;
+	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
+	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
+	for (level = TRIPLE; level >= SINGLE; level--) {
+		bn = oip->i_ib[level];
+		if (bn != 0) {
+			error = ffs_indirtrunc(oip, indir_lbn[level],
+			    fsbtodb(fs, bn), lastiblock[level], level, &count);
+			if (error)
+				allerror = error;
+			blocksreleased += count;
+			if (lastiblock[level] < 0) {
+				oip->i_ib[level] = 0;
+				ffs_blkfree(oip, bn, fs->fs_bsize);
+				blocksreleased += nblocks;
+			}
+		}
+		if (lastiblock[level] >= 0)
+			goto done;
+	}
+
+	/*
+	 * All whole direct blocks or frags.
+	 */
+	for (i = NDADDR - 1; i > lastblock; i--) {
+		register long bsize;
+
+		bn = oip->i_db[i];
+		if (bn == 0)
+			continue;
+		oip->i_db[i] = 0;
+		bsize = blksize(fs, oip, i);
+		ffs_blkfree(oip, bn, bsize);
+		blocksreleased += btodb(bsize);
+	}
+	if (lastblock < 0)
+		goto done;
+
+	/*
+	 * Finally, look for a change in size of the
+	 * last direct block; release any frags.
+	 */
+	bn = oip->i_db[lastblock];
+	if (bn != 0) {
+		long oldspace, newspace;
+
+		/*
+		 * Calculate amount of space we're giving
+		 * back as old block size minus new block size.
+		 */
+		oldspace = blksize(fs, oip, lastblock);
+		oip->i_size = length;
+		newspace = blksize(fs, oip, lastblock);
+		if (newspace == 0)
+			panic("itrunc: newspace");
+		if (oldspace - newspace > 0) {
+			/*
+			 * Block number of space to be free'd is
+			 * the old block # plus the number of frags
+			 * required for the storage we're keeping.
+			 */
+			bn += numfrags(fs, newspace);
+			ffs_blkfree(oip, bn, oldspace - newspace);
+			blocksreleased += btodb(oldspace - newspace);
+		}
+	}
+done:
+#ifdef DIAGNOSTIC
+	for (level = SINGLE; level <= TRIPLE; level++)
+		if (newblks[NDADDR + level] != oip->i_ib[level])
+			panic("itrunc1");
+	for (i = 0; i < NDADDR; i++)
+		if (newblks[i] != oip->i_db[i])
+			panic("itrunc2");
+	if (length == 0 &&
+	    (ovp->v_dirtyblkhd.lh_first || ovp->v_cleanblkhd.lh_first))
+		panic("itrunc3");
+#endif /* DIAGNOSTIC */
+	/*
+	 * Put back the real size.
+	 */
+	oip->i_size = length;
+	oip->i_blocks -= blocksreleased;
+	if (oip->i_blocks < 0)			/* sanity */
+		oip->i_blocks = 0;
+	oip->i_flag |= IN_CHANGE;
+#ifdef QUOTA
+	(void) chkdq(oip, -blocksreleased, NOCRED, 0);
+#endif
+	return (allerror);
+}
+
+/*
+ * Release blocks associated with the inode ip and stored in the indirect
+ * block bn.  Blocks are free'd in LIFO order up to (but not including)
+ * lastbn.  If level is greater than SINGLE, the block is an indirect block
+ * and recursive calls to indirtrunc must be used to cleanse other indirect
+ * blocks.
+ *
+ * NB: triple indirect blocks are untested.
+ */
+static int
+ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
+	register struct inode *ip;
+	daddr_t lbn, lastbn;
+	daddr_t dbn;
+	int level;
+	long *countp;
+{
+	register int i;
+	struct buf *bp;
+	register struct fs *fs = ip->i_fs;
+	register daddr_t *bap;
+	struct vnode *vp;
+	daddr_t *copy, nb, nlbn, last;
+	long blkcount, factor;
+	int nblocks, blocksreleased = 0;
+	int error = 0, allerror = 0;
+
+	/*
+	 * Calculate index in current block of last
+	 * block to be kept.  -1 indicates the entire
+	 * block so we need not calculate the index.
+	 */
+	factor = 1;
+	for (i = SINGLE; i < level; i++)
+		factor *= NINDIR(fs);
+	last = lastbn;
+	if (lastbn > 0)
+		last /= factor;
+	nblocks = btodb(fs->fs_bsize);
+	/*
+	 * Get buffer of block pointers, zero those entries corresponding
+	 * to blocks to be free'd, and update on disk copy first.  Since
+	 * double(triple) indirect before single(double) indirect, calls
+	 * to bmap on these blocks will fail.  However, we already have
+	 * the on disk address, so we have to set the b_blkno field
+	 * explicitly instead of letting bread do everything for us.
+	 */
+	vp = ITOV(ip);
+	bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0);
+	if (bp->b_flags & (B_DONE | B_DELWRI)) {
+		/* Braces must be here in case trace evaluates to nothing. */
+		trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn);
+	} else {
+		trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn);
+		curproc->p_stats->p_ru.ru_inblock++;	/* pay for read */
+		bp->b_flags |= B_READ;
+		if (bp->b_bcount > bp->b_bufsize)
+			panic("ffs_indirtrunc: bad buffer size");
+		bp->b_blkno = dbn;
+		VOP_STRATEGY(bp);
+		error = biowait(bp);
+	}
+	if (error) {
+		brelse(bp);
+		*countp = 0;
+		return (error);
+	}
+
+	bap = (daddr_t *)bp->b_data;
+	MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
+	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
+	bzero((caddr_t)&bap[last + 1],
+	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
+	if (last == -1)
+		bp->b_flags |= B_INVAL;
+	error = bwrite(bp);
+	if (error)
+		allerror = error;
+	bap = copy;
+
+	/*
+	 * Recursively free totally unused blocks.
+	 */
+	for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
+	    i--, nlbn += factor) {
+		nb = bap[i];
+		if (nb == 0)
+			continue;
+		if (level > SINGLE) {
+			if (error = ffs_indirtrunc(ip, nlbn,
+			    fsbtodb(fs, nb), (daddr_t)-1, level - 1, &blkcount))
+				allerror = error;
+			blocksreleased += blkcount;
+		}
+		ffs_blkfree(ip, nb, fs->fs_bsize);
+		blocksreleased += nblocks;
+	}
+
+	/*
+	 * Recursively free last partial block.
+	 */
+	if (level > SINGLE && lastbn >= 0) {
+		last = lastbn % factor;
+		nb = bap[i];
+		if (nb != 0) {
+			if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
+			    last, level - 1, &blkcount))
+				allerror = error;
+			blocksreleased += blkcount;
+		}
+	}
+	FREE(copy, M_TEMP);
+	*countp = blocksreleased;
+	return (allerror);
+}
diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c
new file mode 100644
index 0000000..c251b16
--- /dev/null
+++ b/sys/ufs/ffs/ffs_subr.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_subr.c	8.2 (Berkeley) 9/21/93
+ */
+
+#include <sys/param.h>
+#include <ufs/ffs/fs.h>
+
+#ifdef KERNEL
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <ufs/ffs/ffs_extern.h>
+#include <sys/buf.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+
+/*
+ * Return buffer with the contents of block "offset" from the beginning of
+ * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+int
+ffs_blkatoff(ap)
+	struct vop_blkatoff_args /* {
+		struct vnode *a_vp;
+		off_t a_offset;
+		char **a_res;
+		struct buf **a_bpp;
+	} */ *ap;
+{
+	struct inode *ip;
+	register struct fs *fs;
+	struct buf *bp;
+	daddr_t lbn;
+	int bsize, error;
+
+	ip = VTOI(ap->a_vp);
+	fs = ip->i_fs;
+	lbn = lblkno(fs, ap->a_offset);
+	bsize = blksize(fs, ip, lbn);
+
+	*ap->a_bpp = NULL;
+	if (error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) {
+		brelse(bp);
+		return (error);
+	}
+	if (ap->a_res)
+		*ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset);
+	*ap->a_bpp = bp;
+	return (0);
+}
+#endif
+
+/*
+ * Update the frsum fields to reflect addition or deletion 
+ * of some frags.
+ */
+void
+ffs_fragacct(fs, fragmap, fraglist, cnt)
+	struct fs *fs;
+	int fragmap;
+	long fraglist[];
+	int cnt;
+{
+	int inblk;
+	register int field, subfield;
+	register int siz, pos;
+
+	inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1;
+	fragmap <<= 1;
+	for (siz = 1; siz < fs->fs_frag; siz++) {
+		if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0)
+			continue;
+		field = around[siz];
+		subfield = inside[siz];
+		for (pos = siz; pos <= fs->fs_frag; pos++) {
+			if ((fragmap & field) == subfield) {
+				fraglist[siz] += cnt;
+				pos += siz;
+				field <<= siz;
+				subfield <<= siz;
+			}
+			field <<= 1;
+			subfield <<= 1;
+		}
+	}
+}
+
+#if defined(KERNEL) && defined(DIAGNOSTIC)
+void
+ffs_checkoverlap(bp, ip)
+	struct buf *bp;
+	struct inode *ip;
+{
+	register struct buf *ebp, *ep;
+	register daddr_t start, last;
+	struct vnode *vp;
+
+	ebp = &buf[nbuf];
+	start = bp->b_blkno;
+	last = start + btodb(bp->b_bcount) - 1;
+	for (ep = buf; ep < ebp; ep++) {
+		if (ep == bp || (ep->b_flags & B_INVAL) ||
+		    ep->b_vp == NULLVP)
+			continue;
+		if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL))
+			continue;
+		if (vp != ip->i_devvp)
+			continue;
+		/* look for overlap */
+		if (ep->b_bcount == 0 || ep->b_blkno > last ||
+		    ep->b_blkno + btodb(ep->b_bcount) <= start)
+			continue;
+		vprint("Disk overlap", vp);
+		(void)printf("\tstart %d, end %d overlap start %d, end %d\n",
+			start, last, ep->b_blkno,
+			ep->b_blkno + btodb(ep->b_bcount) - 1);
+		panic("Disk buffer overlap");
+	}
+}
+#endif /* DIAGNOSTIC */
+
+/*
+ * block operations
+ *
+ * check if a block is available
+ */
+int
+ffs_isblock(fs, cp, h)
+	struct fs *fs;
+	unsigned char *cp;
+	daddr_t h;
+{
+	unsigned char mask;
+
+	switch ((int)fs->fs_frag) {
+	case 8:
+		return (cp[h] == 0xff);
+	case 4:
+		mask = 0x0f << ((h & 0x1) << 2);
+		return ((cp[h >> 1] & mask) == mask);
+	case 2:
+		mask = 0x03 << ((h & 0x3) << 1);
+		return ((cp[h >> 2] & mask) == mask);
+	case 1:
+		mask = 0x01 << (h & 0x7);
+		return ((cp[h >> 3] & mask) == mask);
+	default:
+		panic("ffs_isblock");
+	}
+}
+
+/*
+ * take a block out of the map
+ */
+void
+ffs_clrblock(fs, cp, h)
+	struct fs *fs;
+	u_char *cp;
+	daddr_t h;
+{
+
+	switch ((int)fs->fs_frag) {
+	case 8:
+		cp[h] = 0;
+		return;
+	case 4:
+		cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2));
+		return;
+	case 2:
+		cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1));
+		return;
+	case 1:
+		cp[h >> 3] &= ~(0x01 << (h & 0x7));
+		return;
+	default:
+		panic("ffs_clrblock");
+	}
+}
+
+/*
+ * put a block into the map
+ */
+void
+ffs_setblock(fs, cp, h)
+	struct fs *fs;
+	unsigned char *cp;
+	daddr_t h;
+{
+
+	switch ((int)fs->fs_frag) {
+
+	case 8:
+		cp[h] = 0xff;
+		return;
+	case 4:
+		cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
+		return;
+	case 2:
+		cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
+		return;
+	case 1:
+		cp[h >> 3] |= (0x01 << (h & 0x7));
+		return;
+	default:
+		panic("ffs_setblock");
+	}
+}
diff --git a/sys/ufs/ffs/ffs_tables.c b/sys/ufs/ffs/ffs_tables.c
new file mode 100644
index 0000000..8cf46b0
--- /dev/null
+++ b/sys/ufs/ffs/ffs_tables.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_tables.c	8.1 (Berkeley) 6/11/93
+ */
+
+#include <sys/param.h>
+
+/*
+ * Bit patterns for identifying fragments in the block map
+ * used as ((map & around) == inside)
+ */
+int around[9] = {
+	0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff
+};
+int inside[9] = {
+	0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe
+};
+
+/*
+ * Given a block map bit pattern, the frag tables tell whether a
+ * particular size fragment is available. 
+ *
+ * used as:
+ * if ((1 << (size - 1)) & fragtbl[fs->fs_frag][map] {
+ *	at least one fragment of the indicated size is available
+ * }
+ *
+ * These tables are used by the scanc instruction on the VAX to
+ * quickly find an appropriate fragment.
+ */
+u_char fragtbl124[256] = {
+	0x00, 0x16, 0x16, 0x2a, 0x16, 0x16, 0x26, 0x4e,
+	0x16, 0x16, 0x16, 0x3e, 0x2a, 0x3e, 0x4e, 0x8a,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x26, 0x36, 0x36, 0x2e, 0x36, 0x36, 0x26, 0x6e,
+	0x36, 0x36, 0x36, 0x3e, 0x2e, 0x3e, 0x6e, 0xae,
+	0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e,
+	0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e,
+	0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe,
+	0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e,
+	0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe,
+	0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e,
+	0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce,
+	0x8a, 0x9e, 0x9e, 0xaa, 0x9e, 0x9e, 0xae, 0xce,
+	0x9e, 0x9e, 0x9e, 0xbe, 0xaa, 0xbe, 0xce, 0x8a,
+};
+
+u_char fragtbl8[256] = {
+	0x00, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x04,
+	0x01, 0x01, 0x01, 0x03, 0x02, 0x03, 0x04, 0x08,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x02, 0x03, 0x03, 0x02, 0x04, 0x05, 0x08, 0x10,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+	0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06,
+	0x04, 0x05, 0x05, 0x06, 0x08, 0x09, 0x10, 0x20,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11,
+	0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06,
+	0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a,
+	0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04,
+	0x08, 0x09, 0x09, 0x0a, 0x10, 0x11, 0x20, 0x40,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11,
+	0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05,
+	0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09,
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07,
+	0x05, 0x05, 0x05, 0x07, 0x09, 0x09, 0x11, 0x21,
+	0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06,
+	0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a,
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07,
+	0x02, 0x03, 0x03, 0x02, 0x06, 0x07, 0x0a, 0x12,
+	0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04,
+	0x05, 0x05, 0x05, 0x07, 0x06, 0x07, 0x04, 0x0c,
+	0x08, 0x09, 0x09, 0x0a, 0x09, 0x09, 0x0a, 0x0c,
+	0x10, 0x11, 0x11, 0x12, 0x20, 0x21, 0x40, 0x80,
+};
+
+/*
+ * The actual fragtbl array.
+ */
+u_char *fragtbl[MAXFRAG + 1] = {
+	0, fragtbl124, fragtbl124, 0, fragtbl124, 0, 0, 0, fragtbl8,
+};
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
new file mode 100644
index 0000000..505dd5d
--- /dev/null
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -0,0 +1,843 @@
+/*
+ * Copyright (c) 1989, 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_vfsops.c	8.8 (Berkeley) 4/18/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/file.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+int ffs_sbupdate __P((struct ufsmount *, int));
+
+struct vfsops ufs_vfsops = {
+	ffs_mount,
+	ufs_start,
+	ffs_unmount,
+	ufs_root,
+	ufs_quotactl,
+	ffs_statfs,
+	ffs_sync,
+	ffs_vget,
+	ffs_fhtovp,
+	ffs_vptofh,
+	ffs_init,
+};
+
+extern u_long nextgennumber;
+
+/*
+ * Called by main() when ufs is going to be mounted as root.
+ *
+ * Name is updated by mount(8) after booting.
+ */
+#define ROOTNAME	"root_device"
+
+ffs_mountroot()
+{
+	extern struct vnode *rootvp;
+	register struct fs *fs;
+	register struct mount *mp;
+	struct proc *p = curproc;	/* XXX */
+	struct ufsmount *ump;
+	u_int size;
+	int error;
+	
+	/*
+	 * Get vnodes for swapdev and rootdev.
+	 */
+	if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
+		panic("ffs_mountroot: can't setup bdevvp's");
+
+	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	mp->mnt_op = &ufs_vfsops;
+	mp->mnt_flag = MNT_RDONLY;
+	if (error = ffs_mountfs(rootvp, mp, p)) {
+		free(mp, M_MOUNT);
+		return (error);
+	}
+	if (error = vfs_lock(mp)) {
+		(void)ffs_unmount(mp, 0, p);
+		free(mp, M_MOUNT);
+		return (error);
+	}
+	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	mp->mnt_flag |= MNT_ROOTFS;
+	mp->mnt_vnodecovered = NULLVP;
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
+	fs->fs_fsmnt[0] = '/';
+	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void)ffs_statfs(mp, &mp->mnt_stat, p);
+	vfs_unlock(mp);
+	inittodr(fs->fs_time);
+	return (0);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+int
+ffs_mount(mp, path, data, ndp, p)
+	register struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *devvp;
+	struct ufs_args args;
+	struct ufsmount *ump;
+	register struct fs *fs;
+	u_int size;
+	int error, flags;
+
+	if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)))
+		return (error);
+	/*
+	 * If updating, check whether changing from read-only to
+	 * read/write; if there is no device name, that's all we do.
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		ump = VFSTOUFS(mp);
+		fs = ump->um_fs;
+		error = 0;
+		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+			flags = WRITECLOSE;
+			if (mp->mnt_flag & MNT_FORCE)
+				flags |= FORCECLOSE;
+			if (vfs_busy(mp))
+				return (EBUSY);
+			error = ffs_flushfiles(mp, flags, p);
+			vfs_unbusy(mp);
+		}
+		if (!error && (mp->mnt_flag & MNT_RELOAD))
+			error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
+		if (error)
+			return (error);
+		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR))
+			fs->fs_ronly = 0;
+		if (args.fspec == 0) {
+			/*
+			 * Process export requests.
+			 */
+			return (vfs_export(mp, &ump->um_export, &args.export));
+		}
+	}
+	/*
+	 * Not an update, or updating the name: look up the name
+	 * and verify that it refers to a sensible block device.
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+	if (error = namei(ndp))
+		return (error);
+	devvp = ndp->ni_vp;
+
+	if (devvp->v_type != VBLK) {
+		vrele(devvp);
+		return (ENOTBLK);
+	}
+	if (major(devvp->v_rdev) >= nblkdev) {
+		vrele(devvp);
+		return (ENXIO);
+	}
+	if ((mp->mnt_flag & MNT_UPDATE) == 0)
+		error = ffs_mountfs(devvp, mp, p);
+	else {
+		if (devvp != ump->um_devvp)
+			error = EINVAL;	/* needs translation */
+		else
+			vrele(devvp);
+	}
+	if (error) {
+		vrele(devvp);
+		return (error);
+	}
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
+	bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
+	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void)ffs_statfs(mp, &mp->mnt_stat, p);
+	return (0);
+}
+
+/*
+ * Reload all incore data for a filesystem (used after running fsck on
+ * the root filesystem and finding things to fix). The filesystem must
+ * be mounted read-only.
+ *
+ * Things to do to update the mount:
+ *	1) invalidate all cached meta-data.
+ *	2) re-read superblock from disk.
+ *	3) re-read summary information from disk.
+ *	4) invalidate all inactive vnodes.
+ *	5) invalidate all cached file data.
+ *	6) re-read inode data for all active vnodes.
+ */
+ffs_reload(mountp, cred, p)
+	register struct mount *mountp;
+	struct ucred *cred;
+	struct proc *p;
+{
+	register struct vnode *vp, *nvp, *devvp;
+	struct inode *ip;
+	struct csum *space;
+	struct buf *bp;
+	struct fs *fs;
+	int i, blks, size, error;
+
+	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
+		return (EINVAL);
+	/*
+	 * Step 1: invalidate all cached meta-data.
+	 */
+	devvp = VFSTOUFS(mountp)->um_devvp;
+	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
+		panic("ffs_reload: dirty1");
+	/*
+	 * Step 2: re-read superblock from disk.
+	 */
+	if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp))
+		return (error);
+	fs = (struct fs *)bp->b_data;
+	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
+	    fs->fs_bsize < sizeof(struct fs)) {
+		brelse(bp);
+		return (EIO);		/* XXX needs translation */
+	}
+	fs = VFSTOUFS(mountp)->um_fs;
+	bcopy(&fs->fs_csp[0], &((struct fs *)bp->b_data)->fs_csp[0],
+	    sizeof(fs->fs_csp));
+	bcopy(bp->b_data, fs, (u_int)fs->fs_sbsize);
+	if (fs->fs_sbsize < SBSIZE)
+		bp->b_flags |= B_INVAL;
+	brelse(bp);
+	ffs_oldfscompat(fs);
+	/*
+	 * Step 3: re-read summary information from disk.
+	 */
+	blks = howmany(fs->fs_cssize, fs->fs_fsize);
+	space = fs->fs_csp[0];
+	for (i = 0; i < blks; i += fs->fs_frag) {
+		size = fs->fs_bsize;
+		if (i + fs->fs_frag > blks)
+			size = (blks - i) * fs->fs_fsize;
+		if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
+		    NOCRED, &bp))
+			return (error);
+		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
+		brelse(bp);
+	}
+loop:
+	for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
+		nvp = vp->v_mntvnodes.le_next;
+		/*
+		 * Step 4: invalidate all inactive vnodes.
+		 */
+		if (vp->v_usecount == 0) {
+			vgone(vp);
+			continue;
+		}
+		/*
+		 * Step 5: invalidate all cached file data.
+		 */
+		if (vget(vp, 1))
+			goto loop;
+		if (vinvalbuf(vp, 0, cred, p, 0, 0))
+			panic("ffs_reload: dirty2");
+		/*
+		 * Step 6: re-read inode data for all active vnodes.
+		 */
+		ip = VTOI(vp);
+		if (error =
+		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
+		    (int)fs->fs_bsize, NOCRED, &bp)) {
+			vput(vp);
+			return (error);
+		}
+		ip->i_din = *((struct dinode *)bp->b_data +
+		    ino_to_fsbo(fs, ip->i_number));
+		brelse(bp);
+		vput(vp);
+		if (vp->v_mount != mountp)
+			goto loop;
+	}
+	return (0);
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+int
+ffs_mountfs(devvp, mp, p)
+	register struct vnode *devvp;
+	struct mount *mp;
+	struct proc *p;
+{
+	register struct ufsmount *ump;
+	struct buf *bp;
+	register struct fs *fs;
+	dev_t dev = devvp->v_rdev;
+	struct partinfo dpart;
+	caddr_t base, space;
+	int havepart = 0, blks;
+	int error, i, size;
+	int ronly;
+	extern struct vnode *rootvp;
+
+	/*
+	 * Disallow multiple mounts of the same device.
+	 * Disallow mounting of a device that is currently in use
+	 * (except for root, which might share swap device for miniroot).
+	 * Flush out any old buffers remaining from a previous use.
+	 */
+	if (error = vfs_mountedon(devvp))
+		return (error);
+	if (vcount(devvp) > 1 && devvp != rootvp)
+		return (EBUSY);
+	if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))
+		return (error);
+
+	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+	if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
+		return (error);
+	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
+		size = DEV_BSIZE;
+	else {
+		havepart = 1;
+		size = dpart.disklab->d_secsize;
+	}
+
+	bp = NULL;
+	ump = NULL;
+	if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp))
+		goto out;
+	fs = (struct fs *)bp->b_data;
+	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
+	    fs->fs_bsize < sizeof(struct fs)) {
+		error = EINVAL;		/* XXX needs translation */
+		goto out;
+	}
+	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
+	bzero((caddr_t)ump, sizeof *ump);
+	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
+	    M_WAITOK);
+	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
+	if (fs->fs_sbsize < SBSIZE)
+		bp->b_flags |= B_INVAL;
+	brelse(bp);
+	bp = NULL;
+	fs = ump->um_fs;
+	fs->fs_ronly = ronly;
+	if (ronly == 0)
+		fs->fs_fmod = 1;
+	blks = howmany(fs->fs_cssize, fs->fs_fsize);
+	base = space = malloc((u_long)fs->fs_cssize, M_UFSMNT,
+	    M_WAITOK);
+	for (i = 0; i < blks; i += fs->fs_frag) {
+		size = fs->fs_bsize;
+		if (i + fs->fs_frag > blks)
+			size = (blks - i) * fs->fs_fsize;
+		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
+			NOCRED, &bp);
+		if (error) {
+			free(base, M_UFSMNT);
+			goto out;
+		}
+		bcopy(bp->b_data, space, (u_int)size);
+		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
+		space += size;
+		brelse(bp);
+		bp = NULL;
+	}
+	mp->mnt_data = (qaddr_t)ump;
+	mp->mnt_stat.f_fsid.val[0] = (long)dev;
+	mp->mnt_stat.f_fsid.val[1] = MOUNT_UFS;
+	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
+	mp->mnt_flag |= MNT_LOCAL;
+	ump->um_mountp = mp;
+	ump->um_dev = dev;
+	ump->um_devvp = devvp;
+	ump->um_nindir = fs->fs_nindir;
+	ump->um_bptrtodb = fs->fs_fsbtodb;
+	ump->um_seqinc = fs->fs_frag;
+	for (i = 0; i < MAXQUOTAS; i++)
+		ump->um_quotas[i] = NULLVP;
+	devvp->v_specflags |= SI_MOUNTEDON;
+	ffs_oldfscompat(fs);
+	return (0);
+out:
+	if (bp)
+		brelse(bp);
+	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+	if (ump) {
+		free(ump->um_fs, M_UFSMNT);
+		free(ump, M_UFSMNT);
+		mp->mnt_data = (qaddr_t)0;
+	}
+	return (error);
+}
+
+/*
+ * Sanity checks for old file systems.
+ *
+ * XXX - goes away some day.
+ */
+ffs_oldfscompat(fs)
+	struct fs *fs;
+{
+	int i;
+
+	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
+	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
+	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
+		fs->fs_nrpos = 8;				/* XXX */
+	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
+		quad_t sizepb = fs->fs_bsize;			/* XXX */
+								/* XXX */
+		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
+		for (i = 0; i < NIADDR; i++) {			/* XXX */
+			sizepb *= NINDIR(fs);			/* XXX */
+			fs->fs_maxfilesize += sizepb;		/* XXX */
+		}						/* XXX */
+		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
+		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
+	}							/* XXX */
+	return (0);
+}
+
+/*
+ * unmount system call
+ */
+int
+ffs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	register struct ufsmount *ump;
+	register struct fs *fs;
+	int error, flags, ronly;
+
+	flags = 0;
+	if (mntflags & MNT_FORCE) {
+		if (mp->mnt_flag & MNT_ROOTFS)
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+	if (error = ffs_flushfiles(mp, flags, p))
+		return (error);
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	ronly = !fs->fs_ronly;
+	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
+	error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE,
+		NOCRED, p);
+	vrele(ump->um_devvp);
+	free(fs->fs_csp[0], M_UFSMNT);
+	free(fs, M_UFSMNT);
+	free(ump, M_UFSMNT);
+	mp->mnt_data = (qaddr_t)0;
+	mp->mnt_flag &= ~MNT_LOCAL;
+	return (error);
+}
+
+/*
+ * Flush out all the files in a filesystem.
+ */
+ffs_flushfiles(mp, flags, p)
+	register struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	extern int doforce;
+	register struct ufsmount *ump;
+	int i, error;
+
+	if (!doforce)
+		flags &= ~FORCECLOSE;
+	ump = VFSTOUFS(mp);
+#ifdef QUOTA
+	if (mp->mnt_flag & MNT_QUOTA) {
+		if (error = vflush(mp, NULLVP, SKIPSYSTEM|flags))
+			return (error);
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if (ump->um_quotas[i] == NULLVP)
+				continue;
+			quotaoff(p, mp, i);
+		}
+		/*
+		 * Here we fall through to vflush again to ensure
+		 * that we have gotten rid of all the system vnodes.
+		 */
+	}
+#endif
+	error = vflush(mp, NULLVP, flags);
+	return (error);
+}
+
+/*
+ * Get file system statistics.
+ */
+int
+ffs_statfs(mp, sbp, p)
+	struct mount *mp;
+	register struct statfs *sbp;
+	struct proc *p;
+{
+	register struct ufsmount *ump;
+	register struct fs *fs;
+
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	if (fs->fs_magic != FS_MAGIC)
+		panic("ffs_statfs");
+	sbp->f_type = MOUNT_UFS;
+	sbp->f_bsize = fs->fs_fsize;
+	sbp->f_iosize = fs->fs_bsize;
+	sbp->f_blocks = fs->fs_dsize;
+	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
+		fs->fs_cstotal.cs_nffree;
+	sbp->f_bavail = (fs->fs_dsize * (100 - fs->fs_minfree) / 100) -
+		(fs->fs_dsize - sbp->f_bfree);
+	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
+	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
+	if (sbp != &mp->mnt_stat) {
+		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
+			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
+		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
+			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
+	}
+	return (0);
+}
+
+/*
+ * Go through the disk queues to initiate sandbagged IO;
+ * go through the inodes to write those that have been modified;
+ * initiate the writing of the super block if it has been modified.
+ *
+ * Note: we are always called with the filesystem marked `MPBUSY'.
+ */
+int
+ffs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	register struct vnode *vp;
+	register struct inode *ip;
+	register struct ufsmount *ump = VFSTOUFS(mp);
+	register struct fs *fs;
+	int error, allerror = 0;
+
+	fs = ump->um_fs;
+	/*
+	 * Write back modified superblock.
+	 * Consistency check that the superblock
+	 * is still in the buffer cache.
+	 */
+	if (fs->fs_fmod != 0) {
+		if (fs->fs_ronly != 0) {		/* XXX */
+			printf("fs = %s\n", fs->fs_fsmnt);
+			panic("update: rofs mod");
+		}
+		fs->fs_fmod = 0;
+		fs->fs_time = time.tv_sec;
+		allerror = ffs_sbupdate(ump, waitfor);
+	}
+	/*
+	 * Write back each (modified) inode.
+	 */
+loop:
+	for (vp = mp->mnt_vnodelist.lh_first;
+	     vp != NULL;
+	     vp = vp->v_mntvnodes.le_next) {
+		/*
+		 * If the vnode that we are about to sync is no longer
+		 * associated with this mount point, start over.
+		 */
+		if (vp->v_mount != mp)
+			goto loop;
+		if (VOP_ISLOCKED(vp))
+			continue;
+		ip = VTOI(vp);
+		if ((ip->i_flag &
+		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
+		    vp->v_dirtyblkhd.lh_first == NULL)
+			continue;
+		if (vget(vp, 1))
+			goto loop;
+		if (error = VOP_FSYNC(vp, cred, waitfor, p))
+			allerror = error;
+		vput(vp);
+	}
+	/*
+	 * Force stale file system control information to be flushed.
+	 */
+	if (error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p))
+		allerror = error;
+#ifdef QUOTA
+	qsync(mp);
+#endif
+	return (allerror);
+}
+
+/*
+ * Look up a FFS dinode number to find its incore vnode, otherwise read it
+ * in from disk.  If it is in core, wait for the lock bit to clear, then
+ * return the inode locked.  Detection and handling of mount points must be
+ * done by the calling routine.
+ */
+int
+ffs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	register struct fs *fs;
+	register struct inode *ip;
+	struct ufsmount *ump;
+	struct buf *bp;
+	struct vnode *vp;
+	dev_t dev;
+	int i, type, error;
+
+	ump = VFSTOUFS(mp);
+	dev = ump->um_dev;
+	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
+		return (0);
+
+	/* Allocate a new vnode/inode. */
+	if (error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) {
+		*vpp = NULL;
+		return (error);
+	}
+	type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
+	MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
+	bzero((caddr_t)ip, sizeof(struct inode));
+	vp->v_data = ip;
+	ip->i_vnode = vp;
+	ip->i_fs = fs = ump->um_fs;
+	ip->i_dev = dev;
+	ip->i_number = ino;
+#ifdef QUOTA
+	for (i = 0; i < MAXQUOTAS; i++)
+		ip->i_dquot[i] = NODQUOT;
+#endif
+	/*
+	 * Put it onto its hash chain and lock it so that other requests for
+	 * this inode will block if they arrive while we are sleeping waiting
+	 * for old data structures to be purged or for the contents of the
+	 * disk portion of this inode to be read.
+	 */
+	ufs_ihashins(ip);
+
+	/* Read in the disk contents for the inode, copy into the inode. */
+	if (error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
+	    (int)fs->fs_bsize, NOCRED, &bp)) {
+		/*
+		 * The inode does not contain anything useful, so it would
+		 * be misleading to leave it on its hash chain. With mode
+		 * still zero, it will be unlinked and returned to the free
+		 * list by vput().
+		 */
+		vput(vp);
+		brelse(bp);
+		*vpp = NULL;
+		return (error);
+	}
+	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
+	brelse(bp);
+
+	/*
+	 * Initialize the vnode from the inode, check for aliases.
+	 * Note that the underlying vnode may have changed.
+	 */
+	if (error = ufs_vinit(mp, ffs_specop_p, FFS_FIFOOPS, &vp)) {
+		vput(vp);
+		*vpp = NULL;
+		return (error);
+	}
+	/*
+	 * Finish inode initialization now that aliasing has been resolved.
+	 */
+	ip->i_devvp = ump->um_devvp;
+	VREF(ip->i_devvp);
+	/*
+	 * Set up a generation number for this inode if it does not
+	 * already have one. This should only happen on old filesystems.
+	 */
+	if (ip->i_gen == 0) {
+		if (++nextgennumber < (u_long)time.tv_sec)
+			nextgennumber = time.tv_sec;
+		ip->i_gen = nextgennumber;
+		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
+			ip->i_flag |= IN_MODIFIED;
+	}
+	/*
+	 * Ensure that uid and gid are correct. This is a temporary
+	 * fix until fsck has been changed to do the update.
+	 */
+	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
+		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
+		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
+	}						/* XXX */
+
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is valid
+ * - call ffs_vget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the given client host has export rights and return
+ *   those rights via. exflagsp and credanonp
+ */
+int
+ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+	register struct mount *mp;
+	struct fid *fhp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+	register struct ufid *ufhp;
+	struct fs *fs;
+
+	ufhp = (struct ufid *)fhp;
+	fs = VFSTOUFS(mp)->um_fs;
+	if (ufhp->ufid_ino < ROOTINO ||
+	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
+		return (ESTALE);
+	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+ffs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	register struct inode *ip;
+	register struct ufid *ufhp;
+
+	ip = VTOI(vp);
+	ufhp = (struct ufid *)fhp;
+	ufhp->ufid_len = sizeof(struct ufid);
+	ufhp->ufid_ino = ip->i_number;
+	ufhp->ufid_gen = ip->i_gen;
+	return (0);
+}
+
+/*
+ * Write a superblock and associated information back to disk.
+ */
+int
+ffs_sbupdate(mp, waitfor)
+	struct ufsmount *mp;
+	int waitfor;
+{
+	register struct fs *fs = mp->um_fs;
+	register struct buf *bp;
+	int blks;
+	caddr_t space;
+	int i, size, error = 0;
+
+	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
+	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
+	/* Restore compatibility to old file systems.		   XXX */
+	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
+		((struct fs *)bp->b_data)->fs_nrpos = -1;	/* XXX */
+	if (waitfor == MNT_WAIT)
+		error = bwrite(bp);
+	else
+		bawrite(bp);
+	blks = howmany(fs->fs_cssize, fs->fs_fsize);
+	space = (caddr_t)fs->fs_csp[0];
+	for (i = 0; i < blks; i += fs->fs_frag) {
+		size = fs->fs_bsize;
+		if (i + fs->fs_frag > blks)
+			size = (blks - i) * fs->fs_fsize;
+		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
+		    size, 0, 0);
+		bcopy(space, bp->b_data, (u_int)size);
+		space += size;
+		if (waitfor == MNT_WAIT)
+			error = bwrite(bp);
+		else
+			bawrite(bp);
+	}
+	return (error);
+}
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
new file mode 100644
index 0000000..59814f2
--- /dev/null
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_vnops.c	8.7 (Berkeley) 2/3/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <ufs/ufs/lockf.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+/* Global vfs data structures for ufs. */
+int (**ffs_vnodeop_p)();
+struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, ufs_lookup },		/* lookup */
+	{ &vop_create_desc, ufs_create },		/* create */
+	{ &vop_mknod_desc, ufs_mknod },			/* mknod */
+	{ &vop_open_desc, ufs_open },			/* open */
+	{ &vop_close_desc, ufs_close },			/* close */
+	{ &vop_access_desc, ufs_access },		/* access */
+	{ &vop_getattr_desc, ufs_getattr },		/* getattr */
+	{ &vop_setattr_desc, ufs_setattr },		/* setattr */
+	{ &vop_read_desc, ffs_read },			/* read */
+	{ &vop_write_desc, ffs_write },			/* write */
+	{ &vop_ioctl_desc, ufs_ioctl },			/* ioctl */
+	{ &vop_select_desc, ufs_select },		/* select */
+	{ &vop_mmap_desc, ufs_mmap },			/* mmap */
+	{ &vop_fsync_desc, ffs_fsync },			/* fsync */
+	{ &vop_seek_desc, ufs_seek },			/* seek */
+	{ &vop_remove_desc, ufs_remove },		/* remove */
+	{ &vop_link_desc, ufs_link },			/* link */
+	{ &vop_rename_desc, ufs_rename },		/* rename */
+	{ &vop_mkdir_desc, ufs_mkdir },			/* mkdir */
+	{ &vop_rmdir_desc, ufs_rmdir },			/* rmdir */
+	{ &vop_symlink_desc, ufs_symlink },		/* symlink */
+	{ &vop_readdir_desc, ufs_readdir },		/* readdir */
+	{ &vop_readlink_desc, ufs_readlink },		/* readlink */
+	{ &vop_abortop_desc, ufs_abortop },		/* abortop */
+	{ &vop_inactive_desc, ufs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, ufs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, ufs_lock },			/* lock */
+	{ &vop_unlock_desc, ufs_unlock },		/* unlock */
+	{ &vop_bmap_desc, ufs_bmap },			/* bmap */
+	{ &vop_strategy_desc, ufs_strategy },		/* strategy */
+	{ &vop_print_desc, ufs_print },			/* print */
+	{ &vop_islocked_desc, ufs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, ufs_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, ufs_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, ffs_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, ffs_valloc },		/* valloc */
+	{ &vop_reallocblks_desc, ffs_reallocblks },	/* reallocblks */
+	{ &vop_vfree_desc, ffs_vfree },			/* vfree */
+	{ &vop_truncate_desc, ffs_truncate },		/* truncate */
+	{ &vop_update_desc, ffs_update },		/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc ffs_vnodeop_opv_desc =
+	{ &ffs_vnodeop_p, ffs_vnodeop_entries };
+
+int (**ffs_specop_p)();
+struct vnodeopv_entry_desc ffs_specop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, spec_lookup },		/* lookup */
+	{ &vop_create_desc, spec_create },		/* create */
+	{ &vop_mknod_desc, spec_mknod },		/* mknod */
+	{ &vop_open_desc, spec_open },			/* open */
+	{ &vop_close_desc, ufsspec_close },		/* close */
+	{ &vop_access_desc, ufs_access },		/* access */
+	{ &vop_getattr_desc, ufs_getattr },		/* getattr */
+	{ &vop_setattr_desc, ufs_setattr },		/* setattr */
+	{ &vop_read_desc, ufsspec_read },		/* read */
+	{ &vop_write_desc, ufsspec_write },		/* write */
+	{ &vop_ioctl_desc, spec_ioctl },		/* ioctl */
+	{ &vop_select_desc, spec_select },		/* select */
+	{ &vop_mmap_desc, spec_mmap },			/* mmap */
+	{ &vop_fsync_desc, ffs_fsync },			/* fsync */
+	{ &vop_seek_desc, spec_seek },			/* seek */
+	{ &vop_remove_desc, spec_remove },		/* remove */
+	{ &vop_link_desc, spec_link },			/* link */
+	{ &vop_rename_desc, spec_rename },		/* rename */
+	{ &vop_mkdir_desc, spec_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, spec_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, spec_symlink },		/* symlink */
+	{ &vop_readdir_desc, spec_readdir },		/* readdir */
+	{ &vop_readlink_desc, spec_readlink },		/* readlink */
+	{ &vop_abortop_desc, spec_abortop },		/* abortop */
+	{ &vop_inactive_desc, ufs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, ufs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, ufs_lock },			/* lock */
+	{ &vop_unlock_desc, ufs_unlock },		/* unlock */
+	{ &vop_bmap_desc, spec_bmap },			/* bmap */
+	{ &vop_strategy_desc, spec_strategy },		/* strategy */
+	{ &vop_print_desc, ufs_print },			/* print */
+	{ &vop_islocked_desc, ufs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, spec_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, spec_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, spec_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, spec_valloc },		/* valloc */
+	{ &vop_reallocblks_desc, spec_reallocblks },	/* reallocblks */
+	{ &vop_vfree_desc, ffs_vfree },			/* vfree */
+	{ &vop_truncate_desc, spec_truncate },		/* truncate */
+	{ &vop_update_desc, ffs_update },		/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc ffs_specop_opv_desc =
+	{ &ffs_specop_p, ffs_specop_entries };
+
+#ifdef FIFO
+int (**ffs_fifoop_p)();
+struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, fifo_lookup },		/* lookup */
+	{ &vop_create_desc, fifo_create },		/* create */
+	{ &vop_mknod_desc, fifo_mknod },		/* mknod */
+	{ &vop_open_desc, fifo_open },			/* open */
+	{ &vop_close_desc, ufsfifo_close },		/* close */
+	{ &vop_access_desc, ufs_access },		/* access */
+	{ &vop_getattr_desc, ufs_getattr },		/* getattr */
+	{ &vop_setattr_desc, ufs_setattr },		/* setattr */
+	{ &vop_read_desc, ufsfifo_read },		/* read */
+	{ &vop_write_desc, ufsfifo_write },		/* write */
+	{ &vop_ioctl_desc, fifo_ioctl },		/* ioctl */
+	{ &vop_select_desc, fifo_select },		/* select */
+	{ &vop_mmap_desc, fifo_mmap },			/* mmap */
+	{ &vop_fsync_desc, ffs_fsync },			/* fsync */
+	{ &vop_seek_desc, fifo_seek },			/* seek */
+	{ &vop_remove_desc, fifo_remove },		/* remove */
+	{ &vop_link_desc, fifo_link },			/* link */
+	{ &vop_rename_desc, fifo_rename },		/* rename */
+	{ &vop_mkdir_desc, fifo_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, fifo_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, fifo_symlink },		/* symlink */
+	{ &vop_readdir_desc, fifo_readdir },		/* readdir */
+	{ &vop_readlink_desc, fifo_readlink },		/* readlink */
+	{ &vop_abortop_desc, fifo_abortop },		/* abortop */
+	{ &vop_inactive_desc, ufs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, ufs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, ufs_lock },			/* lock */
+	{ &vop_unlock_desc, ufs_unlock },		/* unlock */
+	{ &vop_bmap_desc, fifo_bmap },			/* bmap */
+	{ &vop_strategy_desc, fifo_strategy },		/* strategy */
+	{ &vop_print_desc, ufs_print },			/* print */
+	{ &vop_islocked_desc, ufs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, fifo_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, fifo_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, fifo_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, fifo_valloc },		/* valloc */
+	{ &vop_reallocblks_desc, fifo_reallocblks },	/* reallocblks */
+	{ &vop_vfree_desc, ffs_vfree },			/* vfree */
+	{ &vop_truncate_desc, fifo_truncate },		/* truncate */
+	{ &vop_update_desc, ffs_update },		/* update */
+	{ &vop_bwrite_desc, vn_bwrite },
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc ffs_fifoop_opv_desc =
+	{ &ffs_fifoop_p, ffs_fifoop_entries };
+#endif /* FIFO */
+
+#ifdef DEBUG
+/*
+ * Enabling cluster read/write operations.
+ */
+#include <sys/sysctl.h>
+int doclusterread = 1;
+struct ctldebug debug11 = { "doclusterread", &doclusterread };
+int doclusterwrite = 1;
+struct ctldebug debug12 = { "doclusterwrite", &doclusterwrite };
+#else
+/* XXX for ufs_readwrite */
+#define doclusterread 1
+#define doclusterwrite 1
+#endif
+
+#include <ufs/ufs/ufs_readwrite.c>
+
+/*
+ * Synch an open file.
+ */
+/* ARGSUSED */
+int
+ffs_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct buf *bp;
+	struct timeval tv;
+	struct buf *nbp;
+	int s;
+
+	/*
+	 * Flush all dirty buffers associated with a vnode.
+	 */
+loop:
+	s = splbio();
+	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+		nbp = bp->b_vnbufs.le_next;
+		if ((bp->b_flags & B_BUSY))
+			continue;
+		if ((bp->b_flags & B_DELWRI) == 0)
+			panic("ffs_fsync: not dirty");
+		bremfree(bp);
+		bp->b_flags |= B_BUSY;
+		splx(s);
+		/*
+		 * Wait for I/O associated with indirect blocks to complete,
+		 * since there is no way to quickly wait for them below.
+		 */
+		if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT)
+			(void) bawrite(bp);
+		else
+			(void) bwrite(bp);
+		goto loop;
+	}
+	if (ap->a_waitfor == MNT_WAIT) {
+		while (vp->v_numoutput) {
+			vp->v_flag |= VBWAIT;
+			sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
+		}
+#ifdef DIAGNOSTIC
+		if (vp->v_dirtyblkhd.lh_first) {
+			vprint("ffs_fsync: dirty", vp);
+			goto loop;
+		}
+#endif
+	}
+	splx(s);
+	tv = time;
+	return (VOP_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT));
+}
diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h
new file mode 100644
index 0000000..bef052f
--- /dev/null
+++ b/sys/ufs/ffs/fs.h
@@ -0,0 +1,489 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fs.h	8.7 (Berkeley) 4/19/94
+ */
+
+/*
+ * Each disk drive contains some number of file systems.
+ * A file system consists of a number of cylinder groups.
+ * Each cylinder group has inodes and data.
+ *
+ * A file system is described by its super-block, which in turn
+ * describes the cylinder groups.  The super-block is critical
+ * data and is replicated in each cylinder group to protect against
+ * catastrophic loss.  This is done at `newfs' time and the critical
+ * super-block data does not change, so the copies need not be
+ * referenced further unless disaster strikes.
+ *
+ * For file system fs, the offsets of the various blocks of interest
+ * are given in the super block as:
+ *	[fs->fs_sblkno]		Super-block
+ *	[fs->fs_cblkno]		Cylinder group block
+ *	[fs->fs_iblkno]		Inode blocks
+ *	[fs->fs_dblkno]		Data blocks
+ * The beginning of cylinder group cg in fs, is given by
+ * the ``cgbase(fs, cg)'' macro.
+ *
+ * The first boot and super blocks are given in absolute disk addresses.
+ * The byte-offset forms are preferred, as they don't imply a sector size.
+ */
+#define BBSIZE		8192
+#define SBSIZE		8192
+#define	BBOFF		((off_t)(0))
+#define	SBOFF		((off_t)(BBOFF + BBSIZE))
+#define	BBLOCK		((daddr_t)(0))
+#define	SBLOCK		((daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE))
+
+/*
+ * Addresses stored in inodes are capable of addressing fragments
+ * of `blocks'. File system blocks of at most size MAXBSIZE can 
+ * be optionally broken into 2, 4, or 8 pieces, each of which is
+ * addressible; these pieces may be DEV_BSIZE, or some multiple of
+ * a DEV_BSIZE unit.
+ *
+ * Large files consist of exclusively large data blocks.  To avoid
+ * undue wasted disk space, the last data block of a small file may be
+ * allocated as only as many fragments of a large block as are
+ * necessary.  The file system format retains only a single pointer
+ * to such a fragment, which is a piece of a single large block that
+ * has been divided.  The size of such a fragment is determinable from
+ * information in the inode, using the ``blksize(fs, ip, lbn)'' macro.
+ *
+ * The file system records space availability at the fragment level;
+ * to determine block availability, aligned fragments are examined.
+ */
+
+/*
+ * MINBSIZE is the smallest allowable block size.
+ * In order to insure that it is possible to create files of size
+ * 2^32 with only two levels of indirection, MINBSIZE is set to 4096.
+ * MINBSIZE must be big enough to hold a cylinder group block,
+ * thus changes to (struct cg) must keep its size within MINBSIZE.
+ * Note that super blocks are always of size SBSIZE,
+ * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE.
+ */
+#define MINBSIZE	4096
+
+/*
+ * The path name on which the file system is mounted is maintained
+ * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in 
+ * the super block for this name.
+ * The limit on the amount of summary information per file system
+ * is defined by MAXCSBUFS. It is currently parameterized for a
+ * maximum of two million cylinders.
+ */
+#define MAXMNTLEN 512
+#define MAXCSBUFS 32
+
+/*
+ * A summary of contiguous blocks of various sizes is maintained
+ * in each cylinder group. Normally this is set by the initial
+ * value of fs_maxcontig. To conserve space, a maximum summary size
+ * is set by FS_MAXCONTIG.
+ */
+#define FS_MAXCONTIG	16
+
+/*
+ * MINFREE gives the minimum acceptable percentage of file system
+ * blocks which may be free. If the freelist drops below this level
+ * only the superuser may continue to allocate blocks. This may
+ * be set to 0 if no reserve of free blocks is deemed necessary,
+ * however throughput drops by fifty percent if the file system
+ * is run at between 95% and 100% full; thus the minimum default
+ * value of fs_minfree is 5%. However, to get good clustering
+ * performance, 10% is a better choice. hence we use 10% as our
+ * default value. With 10% free space, fragmentation is not a
+ * problem, so we choose to optimize for time.
+ */
+#define MINFREE		5
+#define DEFAULTOPT	FS_OPTTIME
+
+/*
+ * Per cylinder group information; summarized in blocks allocated
+ * from first cylinder group data blocks.  These blocks have to be
+ * read in from fs_csaddr (size fs_cssize) in addition to the
+ * super block.
+ *
+ * N.B. sizeof(struct csum) must be a power of two in order for
+ * the ``fs_cs'' macro to work (see below).
+ */
+struct csum {
+	long	cs_ndir;	/* number of directories */
+	long	cs_nbfree;	/* number of free blocks */
+	long	cs_nifree;	/* number of free inodes */
+	long	cs_nffree;	/* number of free frags */
+};
+
+/*
+ * Super block for a file system.
+ */
+struct fs {
+	struct	fs *fs_link;		/* linked list of file systems */
+	struct	fs *fs_rlink;		/*     used for incore super blocks */
+	daddr_t	fs_sblkno;		/* addr of super-block in filesys */
+	daddr_t	fs_cblkno;		/* offset of cyl-block in filesys */
+	daddr_t	fs_iblkno;		/* offset of inode-blocks in filesys */
+	daddr_t	fs_dblkno;		/* offset of first data after cg */
+	long	fs_cgoffset;		/* cylinder group offset in cylinder */
+	long	fs_cgmask;		/* used to calc mod fs_ntrak */
+	time_t 	fs_time;    		/* last time written */
+	long	fs_size;		/* number of blocks in fs */
+	long	fs_dsize;		/* number of data blocks in fs */
+	long	fs_ncg;			/* number of cylinder groups */
+	long	fs_bsize;		/* size of basic blocks in fs */
+	long	fs_fsize;		/* size of frag blocks in fs */
+	long	fs_frag;		/* number of frags in a block in fs */
+/* these are configuration parameters */
+	long	fs_minfree;		/* minimum percentage of free blocks */
+	long	fs_rotdelay;		/* num of ms for optimal next block */
+	long	fs_rps;			/* disk revolutions per second */
+/* these fields can be computed from the others */
+	long	fs_bmask;		/* ``blkoff'' calc of blk offsets */
+	long	fs_fmask;		/* ``fragoff'' calc of frag offsets */
+	long	fs_bshift;		/* ``lblkno'' calc of logical blkno */
+	long	fs_fshift;		/* ``numfrags'' calc number of frags */
+/* these are configuration parameters */
+	long	fs_maxcontig;		/* max number of contiguous blks */
+	long	fs_maxbpg;		/* max number of blks per cyl group */
+/* these fields can be computed from the others */
+	long	fs_fragshift;		/* block to frag shift */
+	long	fs_fsbtodb;		/* fsbtodb and dbtofsb shift constant */
+	long	fs_sbsize;		/* actual size of super block */
+	long	fs_csmask;		/* csum block offset */
+	long	fs_csshift;		/* csum block number */
+	long	fs_nindir;		/* value of NINDIR */
+	long	fs_inopb;		/* value of INOPB */
+	long	fs_nspf;		/* value of NSPF */
+/* yet another configuration parameter */
+	long	fs_optim;		/* optimization preference, see below */
+/* these fields are derived from the hardware */
+	long	fs_npsect;		/* # sectors/track including spares */
+	long	fs_interleave;		/* hardware sector interleave */
+	long	fs_trackskew;		/* sector 0 skew, per track */
+	long	fs_headswitch;		/* head switch time, usec */
+	long	fs_trkseek;		/* track-to-track seek, usec */
+/* sizes determined by number of cylinder groups and their sizes */
+	daddr_t fs_csaddr;		/* blk addr of cyl grp summary area */
+	long	fs_cssize;		/* size of cyl grp summary area */
+	long	fs_cgsize;		/* cylinder group size */
+/* these fields are derived from the hardware */
+	long	fs_ntrak;		/* tracks per cylinder */
+	long	fs_nsect;		/* sectors per track */
+	long  	fs_spc;   		/* sectors per cylinder */
+/* this comes from the disk driver partitioning */
+	long	fs_ncyl;   		/* cylinders in file system */
+/* these fields can be computed from the others */
+	long	fs_cpg;			/* cylinders per group */
+	long	fs_ipg;			/* inodes per group */
+	long	fs_fpg;			/* blocks per group * fs_frag */
+/* this data must be re-computed after crashes */
+	struct	csum fs_cstotal;	/* cylinder summary information */
+/* these fields are cleared at mount time */
+	char   	fs_fmod;    		/* super block modified flag */
+	char   	fs_clean;    		/* file system is clean flag */
+	char   	fs_ronly;   		/* mounted read-only flag */
+	char   	fs_flags;   		/* currently unused flag */
+	char	fs_fsmnt[MAXMNTLEN];	/* name mounted on */
+/* these fields retain the current block allocation info */
+	long	fs_cgrotor;		/* last cg searched */
+	struct	csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */
+	long	fs_cpc;			/* cyl per cycle in postbl */
+	short	fs_opostbl[16][8];	/* old rotation block list head */
+	long	fs_sparecon[50];	/* reserved for future constants */
+	long	fs_contigsumsize;	/* size of cluster summary array */ 
+	long	fs_maxsymlinklen;	/* max length of an internal symlink */
+	long	fs_inodefmt;		/* format of on-disk inodes */
+	u_quad_t fs_maxfilesize;	/* maximum representable file size */
+	quad_t	fs_qbmask;		/* ~fs_bmask - for use with quad size */
+	quad_t	fs_qfmask;		/* ~fs_fmask - for use with quad size */
+	long	fs_state;		/* validate fs_clean field */
+	long	fs_postblformat;	/* format of positional layout tables */
+	long	fs_nrpos;		/* number of rotational positions */
+	long	fs_postbloff;		/* (short) rotation block list head */
+	long	fs_rotbloff;		/* (u_char) blocks for each rotation */
+	long	fs_magic;		/* magic number */
+	u_char	fs_space[1];		/* list of blocks for each rotation */
+/* actually longer */
+};
+/*
+ * Filesystem idetification
+ */
+#define	FS_MAGIC	0x011954	/* the fast filesystem magic number */
+#define	FS_OKAY		0x7c269d38	/* superblock checksum */
+#define FS_42INODEFMT	-1		/* 4.2BSD inode format */
+#define FS_44INODEFMT	2		/* 4.4BSD inode format */
+/*
+ * Preference for optimization.
+ */
+#define FS_OPTTIME	0	/* minimize allocation time */
+#define FS_OPTSPACE	1	/* minimize disk fragmentation */
+
+/*
+ * Rotational layout table format types
+ */
+#define FS_42POSTBLFMT		-1	/* 4.2BSD rotational table format */
+#define FS_DYNAMICPOSTBLFMT	1	/* dynamic rotational table format */
+/*
+ * Macros for access to superblock array structures
+ */
+#define fs_postbl(fs, cylno) \
+    (((fs)->fs_postblformat == FS_42POSTBLFMT) \
+    ? ((fs)->fs_opostbl[cylno]) \
+    : ((short *)((char *)(fs) + (fs)->fs_postbloff) + (cylno) * (fs)->fs_nrpos))
+#define fs_rotbl(fs) \
+    (((fs)->fs_postblformat == FS_42POSTBLFMT) \
+    ? ((fs)->fs_space) \
+    : ((u_char *)((char *)(fs) + (fs)->fs_rotbloff)))
+
+/*
+ * The size of a cylinder group is calculated by CGSIZE. The maximum size
+ * is limited by the fact that cylinder groups are at most one block.
+ * Its size is derived from the size of the maps maintained in the 
+ * cylinder group and the (struct cg) size.
+ */
+#define CGSIZE(fs) \
+    /* base cg */	(sizeof(struct cg) + sizeof(long) + \
+    /* blktot size */	(fs)->fs_cpg * sizeof(long) + \
+    /* blks size */	(fs)->fs_cpg * (fs)->fs_nrpos * sizeof(short) + \
+    /* inode map */	howmany((fs)->fs_ipg, NBBY) + \
+    /* block map */	howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY) +\
+    /* if present */	((fs)->fs_contigsumsize <= 0 ? 0 : \
+    /* cluster sum */	(fs)->fs_contigsumsize * sizeof(long) + \
+    /* cluster map */	howmany((fs)->fs_cpg * (fs)->fs_spc / NSPB(fs), NBBY)))
+
+/*
+ * Convert cylinder group to base address of its global summary info.
+ *
+ * N.B. This macro assumes that sizeof(struct csum) is a power of two.
+ */
+#define fs_cs(fs, indx) \
+	fs_csp[(indx) >> (fs)->fs_csshift][(indx) & ~(fs)->fs_csmask]
+
+/*
+ * Cylinder group block for a file system.
+ */
+#define	CG_MAGIC	0x090255
+struct	cg {
+	struct	cg *cg_link;		/* linked list of cyl groups */
+	long	cg_magic;		/* magic number */
+	time_t	cg_time;		/* time last written */
+	long	cg_cgx;			/* we are the cgx'th cylinder group */
+	short	cg_ncyl;		/* number of cyl's this cg */
+	short	cg_niblk;		/* number of inode blocks this cg */
+	long	cg_ndblk;		/* number of data blocks this cg */
+	struct	csum cg_cs;		/* cylinder summary information */
+	long	cg_rotor;		/* position of last used block */
+	long	cg_frotor;		/* position of last used frag */
+	long	cg_irotor;		/* position of last used inode */
+	long	cg_frsum[MAXFRAG];	/* counts of available frags */
+	long	cg_btotoff;		/* (long) block totals per cylinder */
+	long	cg_boff;		/* (short) free block positions */
+	long	cg_iusedoff;		/* (char) used inode map */
+	long	cg_freeoff;		/* (u_char) free block map */
+	long	cg_nextfreeoff;		/* (u_char) next available space */
+	long	cg_clustersumoff;	/* (long) counts of avail clusters */
+	long	cg_clusteroff;		/* (char) free cluster map */
+	long	cg_nclusterblks;	/* number of clusters this cg */
+	long	cg_sparecon[13];	/* reserved for future use */
+	u_char	cg_space[1];		/* space for cylinder group maps */
+/* actually longer */
+};
+/*
+ * Macros for access to cylinder group array structures
+ */
+#define cg_blktot(cgp) \
+    (((cgp)->cg_magic != CG_MAGIC) \
+    ? (((struct ocg *)(cgp))->cg_btot) \
+    : ((long *)((char *)(cgp) + (cgp)->cg_btotoff)))
+#define cg_blks(fs, cgp, cylno) \
+    (((cgp)->cg_magic != CG_MAGIC) \
+    ? (((struct ocg *)(cgp))->cg_b[cylno]) \
+    : ((short *)((char *)(cgp) + (cgp)->cg_boff) + (cylno) * (fs)->fs_nrpos))
+#define cg_inosused(cgp) \
+    (((cgp)->cg_magic != CG_MAGIC) \
+    ? (((struct ocg *)(cgp))->cg_iused) \
+    : ((char *)((char *)(cgp) + (cgp)->cg_iusedoff)))
+#define cg_blksfree(cgp) \
+    (((cgp)->cg_magic != CG_MAGIC) \
+    ? (((struct ocg *)(cgp))->cg_free) \
+    : ((u_char *)((char *)(cgp) + (cgp)->cg_freeoff)))
+#define cg_chkmagic(cgp) \
+    ((cgp)->cg_magic == CG_MAGIC || ((struct ocg *)(cgp))->cg_magic == CG_MAGIC)
+#define cg_clustersfree(cgp) \
+    ((u_char *)((char *)(cgp) + (cgp)->cg_clusteroff))
+#define cg_clustersum(cgp) \
+    ((long *)((char *)(cgp) + (cgp)->cg_clustersumoff))
+
+/*
+ * The following structure is defined
+ * for compatibility with old file systems.
+ */
+struct	ocg {
+	struct	ocg *cg_link;		/* linked list of cyl groups */
+	struct	ocg *cg_rlink;		/*     used for incore cyl groups */
+	time_t	cg_time;		/* time last written */
+	long	cg_cgx;			/* we are the cgx'th cylinder group */
+	short	cg_ncyl;		/* number of cyl's this cg */
+	short	cg_niblk;		/* number of inode blocks this cg */
+	long	cg_ndblk;		/* number of data blocks this cg */
+	struct	csum cg_cs;		/* cylinder summary information */
+	long	cg_rotor;		/* position of last used block */
+	long	cg_frotor;		/* position of last used frag */
+	long	cg_irotor;		/* position of last used inode */
+	long	cg_frsum[8];		/* counts of available frags */
+	long	cg_btot[32];		/* block totals per cylinder */
+	short	cg_b[32][8];		/* positions of free blocks */
+	char	cg_iused[256];		/* used inode map */
+	long	cg_magic;		/* magic number */
+	u_char	cg_free[1];		/* free block map */
+/* actually longer */
+};
+
+/*
+ * Turn file system block numbers into disk block addresses.
+ * This maps file system blocks to device size blocks.
+ */
+#define fsbtodb(fs, b)	((b) << (fs)->fs_fsbtodb)
+#define	dbtofsb(fs, b)	((b) >> (fs)->fs_fsbtodb)
+
+/*
+ * Cylinder group macros to locate things in cylinder groups.
+ * They calc file system addresses of cylinder group data structures.
+ */
+#define	cgbase(fs, c)	((daddr_t)((fs)->fs_fpg * (c)))
+#define	cgdmin(fs, c)	(cgstart(fs, c) + (fs)->fs_dblkno)	/* 1st data */
+#define	cgimin(fs, c)	(cgstart(fs, c) + (fs)->fs_iblkno)	/* inode blk */
+#define	cgsblock(fs, c)	(cgstart(fs, c) + (fs)->fs_sblkno)	/* super blk */
+#define	cgtod(fs, c)	(cgstart(fs, c) + (fs)->fs_cblkno)	/* cg block */
+#define cgstart(fs, c)							\
+	(cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask)))
+
+/*
+ * Macros for handling inode numbers:
+ *     inode number to file system block offset.
+ *     inode number to cylinder group number.
+ *     inode number to file system block address.
+ */
+#define	ino_to_cg(fs, x)	((x) / (fs)->fs_ipg)
+#define	ino_to_fsba(fs, x)						\
+	((daddr_t)(cgimin(fs, ino_to_cg(fs, x)) +			\
+	    (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs))))))
+#define	ino_to_fsbo(fs, x)	((x) % INOPB(fs))
+
+/*
+ * Give cylinder group number for a file system block.
+ * Give cylinder group block number for a file system block.
+ */
+#define	dtog(fs, d)	((d) / (fs)->fs_fpg)
+#define	dtogd(fs, d)	((d) % (fs)->fs_fpg)
+
+/*
+ * Extract the bits for a block from a map.
+ * Compute the cylinder and rotational position of a cyl block addr.
+ */
+#define blkmap(fs, map, loc) \
+    (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag)))
+#define cbtocylno(fs, bno) \
+    ((bno) * NSPF(fs) / (fs)->fs_spc)
+#define cbtorpos(fs, bno) \
+    (((bno) * NSPF(fs) % (fs)->fs_spc / (fs)->fs_nsect * (fs)->fs_trackskew + \
+     (bno) * NSPF(fs) % (fs)->fs_spc % (fs)->fs_nsect * (fs)->fs_interleave) % \
+     (fs)->fs_nsect * (fs)->fs_nrpos / (fs)->fs_npsect)
+
+/*
+ * The following macros optimize certain frequently calculated
+ * quantities by using shifts and masks in place of divisions
+ * modulos and multiplications.
+ */
+#define blkoff(fs, loc)		/* calculates (loc % fs->fs_bsize) */ \
+	((loc) & (fs)->fs_qbmask)
+#define fragoff(fs, loc)	/* calculates (loc % fs->fs_fsize) */ \
+	((loc) & (fs)->fs_qfmask)
+#define lblktosize(fs, blk)	/* calculates (blk * fs->fs_bsize) */ \
+	((blk) << (fs)->fs_bshift)
+#define lblkno(fs, loc)		/* calculates (loc / fs->fs_bsize) */ \
+	((loc) >> (fs)->fs_bshift)
+#define numfrags(fs, loc)	/* calculates (loc / fs->fs_fsize) */ \
+	((loc) >> (fs)->fs_fshift)
+#define blkroundup(fs, size)	/* calculates roundup(size, fs->fs_bsize) */ \
+	(((size) + (fs)->fs_qbmask) & (fs)->fs_bmask)
+#define fragroundup(fs, size)	/* calculates roundup(size, fs->fs_fsize) */ \
+	(((size) + (fs)->fs_qfmask) & (fs)->fs_fmask)
+#define fragstoblks(fs, frags)	/* calculates (frags / fs->fs_frag) */ \
+	((frags) >> (fs)->fs_fragshift)
+#define blkstofrags(fs, blks)	/* calculates (blks * fs->fs_frag) */ \
+	((blks) << (fs)->fs_fragshift)
+#define fragnum(fs, fsb)	/* calculates (fsb % fs->fs_frag) */ \
+	((fsb) & ((fs)->fs_frag - 1))
+#define blknum(fs, fsb)		/* calculates rounddown(fsb, fs->fs_frag) */ \
+	((fsb) &~ ((fs)->fs_frag - 1))
+
+/*
+ * Determine the number of available frags given a
+ * percentage to hold in reserve
+ */
+#define freespace(fs, percentreserved) \
+	(blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \
+	(fs)->fs_cstotal.cs_nffree - ((fs)->fs_dsize * (percentreserved) / 100))
+
+/*
+ * Determining the size of a file block in the file system.
+ */
+#define blksize(fs, ip, lbn) \
+	(((lbn) >= NDADDR || (ip)->i_size >= ((lbn) + 1) << (fs)->fs_bshift) \
+	    ? (fs)->fs_bsize \
+	    : (fragroundup(fs, blkoff(fs, (ip)->i_size))))
+#define dblksize(fs, dip, lbn) \
+	(((lbn) >= NDADDR || (dip)->di_size >= ((lbn) + 1) << (fs)->fs_bshift) \
+	    ? (fs)->fs_bsize \
+	    : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
+
+/*
+ * Number of disk sectors per block; assumes DEV_BSIZE byte sector size.
+ */
+#define	NSPB(fs)	((fs)->fs_nspf << (fs)->fs_fragshift)
+#define	NSPF(fs)	((fs)->fs_nspf)
+
+/*
+ * INOPB is the number of inodes in a secondary storage block.
+ */
+#define	INOPB(fs)	((fs)->fs_inopb)
+#define	INOPF(fs)	((fs)->fs_inopb >> (fs)->fs_fragshift)
+
+/*
+ * NINDIR is the number of indirects in a file system block.
+ */
+#define	NINDIR(fs)	((fs)->fs_nindir)
+
+extern int inside[], around[];
+extern u_char *fragtbl[];
diff --git a/sys/ufs/lfs/README b/sys/ufs/lfs/README
new file mode 100644
index 0000000..724b18f
--- /dev/null
+++ b/sys/ufs/lfs/README
@@ -0,0 +1,139 @@
+#	@(#)README	8.1 (Berkeley) 6/11/93
+
+The file system is reasonably stable, but incomplete.  There are
+places where cleaning performance can be improved dramatically (see
+comments in lfs_syscalls.c).  For details on the implementation,
+performance and why garbage collection always wins, see Dr. Margo
+Seltzer's thesis available for anonymous ftp from toe.cs.berkeley.edu,
+in the directory pub/personal/margo/thesis.ps.Z, or the January 1993
+USENIX paper.
+
+Missing Functionality:
+	Multiple block sizes and/or fragments are not yet implemented.
+
+----------
+The disk is laid out in segments.  The first segment starts 8K into the
+disk (the first 8K is used for boot information).  Each segment is composed
+of the following:
+
+	An optional super block
+	One or more groups of:
+		segment summary
+		0 or more data blocks
+		0 or more inode blocks
+
+The segment summary and inode/data blocks start after the super block (if
+present), and grow toward the end of the segment.
+
+	_______________________________________________
+	|         |            |         |            |
+	| summary | data/inode | summary | data/inode |
+	|  block  |   blocks   |  block  |   blocks   | ...
+	|_________|____________|_________|____________|
+
+The data/inode blocks following a summary block are described by the
+summary block.  In order to permit the segment to be written in any order
+and in a forward direction only, a checksum is calculated across the
+blocks described by the summary.  Additionally, the summary is checksummed
+and timestamped.  Both of these are intended for recovery; the former is
+to make it easy to determine that it *is* a summary block and the latter
+is to make it easy to determine when recovery is finished for partially
+written segments.  These checksums are also used by the cleaner.
+
+	Summary block (detail)
+	________________
+	| sum cksum    |
+	| data cksum   |
+	| next segment |
+	| timestamp    |
+	| FINFO count  |
+	| inode count  |
+	| flags        |
+	|______________|
+	|   FINFO-1    | 0 or more file info structures, identifying the
+	|     .        | blocks in the segment.
+	|     .        |
+	|     .        |
+	|   FINFO-N    |
+	|   inode-N    |
+	|     .        |
+	|     .        |
+	|     .        | 0 or more inode daddr_t's, identifying the inode
+	|   inode-1    | blocks in the segment.
+	|______________|
+
+Inode blocks are blocks of on-disk inodes in the same format as those in
+the FFS.  However, spare[0] contains the inode number of the inode so we
+can find a particular inode on a page.  They are packed page_size /
+sizeof(inode) to a block.  Data blocks are exactly as in the FFS.  Both
+inodes and data blocks move around the file system at will.
+
+The file system is described by a super-block which is replicated and
+occurs as the first block of the first and other segments.  (The maximum
+number of super-blocks is MAXNUMSB).  Each super-block maintains a list
+of the disk addresses of all the super-blocks.  The super-block maintains
+a small amount of checkpoint information, essentially just enough to find
+the inode for the IFILE (fs->lfs_idaddr).
+
+The IFILE is visible in the file system, as inode number IFILE_INUM.  It
+contains information shared between the kernel and various user processes.
+
+	Ifile (detail)
+	________________
+	| cleaner info | Cleaner information per file system.  (Page
+	|              | granularity.)
+	|______________|
+	| segment      | Space available and last modified times per
+	| usage table  | segment.  (Page granularity.)
+	|______________|
+	|   IFILE-1    | Per inode status information: current version #,
+	|     .        | if currently allocated, last access time and
+	|     .        | current disk address of containing inode block.
+	|     .        | If current disk address is LFS_UNUSED_DADDR, the
+	|   IFILE-N    | inode is not in use, and it's on the free list.
+	|______________|
+
+
+First Segment at Creation Time:
+_____________________________________________________________
+|        |       |         |       |       |       |       |
+| 8K pad | Super | summary | inode | ifile | root  | l + f |
+|        | block |         | block |       | dir   | dir   |
+|________|_______|_________|_______|_______|_______|_______|
+	  ^
+           Segment starts here.
+
+Some differences from the Sprite LFS implementation.
+
+1. The LFS implementation placed the ifile metadata and the super block
+   at fixed locations.  This implementation replicates the super block
+   and puts each at a fixed location.  The checkpoint data is divided into
+   two parts -- just enough information to find the IFILE is stored in
+   two of the super blocks, although it is not toggled between them as in
+   the Sprite implementation.  (This was deliberate, to avoid a single
+   point of failure.)  The remaining checkpoint information is treated as
+   a regular file, which means that the cleaner info, the segment usage
+   table and the ifile meta-data are stored in normal log segments.
+   (Tastes great, less filling...)
+
+2. The segment layout is radically different in Sprite; this implementation
+   uses something a lot like network framing, where data/inode blocks are
+   written asynchronously, and a checksum is used to validate any set of
+   summary and data/inode blocks.  Sprite writes summary blocks synchronously
+   after the data/inode blocks have been written and the existence of the
+   summary block validates the data/inode blocks.  This permits us to write
+   everything contiguously, even partial segments and their summaries, whereas
+   Sprite is forced to seek (from the end of the data inode to the summary
+   which lives at the end of the segment).  Additionally, writing the summary
+   synchronously should cost about 1/2 a rotation per summary.
+
+3. Sprite LFS distinguishes between different types of blocks in the segment.
+   Other than inode blocks and data blocks, we don't.
+
+4. Sprite LFS traverses the IFILE looking for free blocks.  We maintain a
+   free list threaded through the IFILE entries.
+
+5. The cleaner runs in user space, as opposed to kernel space.  It shares
+   information with the kernel by reading/writing the IFILE and through
+   cleaner specific system calls.
+
diff --git a/sys/ufs/lfs/TODO b/sys/ufs/lfs/TODO
new file mode 100644
index 0000000..ace8f5e
--- /dev/null
+++ b/sys/ufs/lfs/TODO
@@ -0,0 +1,116 @@
+#	@(#)TODO	8.1 (Berkeley) 6/11/93
+
+NOTE: Changed the lookup on a page of inodes to search from the back
+in case the same inode gets written twice on the same page.
+
+Make sure that if you are writing a file, but not all the blocks
+make it into a single segment, that you do not write the inode in
+that segment.
+
+Keith:
+	Why not delete the lfs_bmapv call, just mark everything dirty
+		that isn't deleted/truncated?  Get some numbers about
+		what percentage of the stuff that the cleaner thinks
+		might be live is live.  If it's high, get rid of lfs_bmapv.
+
+	There is a nasty problem in that it may take *more* room to write
+	the data to clean a segment than is returned by the new segment
+	because of indirect blocks in segment 2 being dirtied by the data
+	being copied into the log from segment 1.  The suggested solution
+	at this point is to detect it when we have no space left on the
+	filesystem, write the extra data into the last segment (leaving
+	no clean ones), make it a checkpoint and shut down the file system
+	for fixing by a utility reading the raw partition.  Argument is
+	that this should never happen and is practically impossible to fix
+	since the cleaner would have to theoretically build a model of the
+	entire filesystem in memory to detect the condition occurring.
+	A file coalescing cleaner will help avoid the problem, and one
+	that reads/writes from the raw disk could fix it.
+
+DONE	Currently, inodes are being flushed to disk synchronously upon
+		creation -- see ufs_makeinode.  However, only the inode
+		is flushed, the directory "name" is written using VOP_BWRITE,
+		so it's not synchronous.  Possible solutions: 1: get some
+		ordering in the writes so that inode/directory entries get
+		stuffed into the same segment.  2: do both synchronously
+		3: add Mendel's information into the stream so we log
+		creation/deletion of inodes.  4: do some form of partial
+		segment when changing the inode (creation/deletion/rename).
+DONE	Fix i_block increment for indirect blocks.
+	If the file system is tar'd, extracted on top of another LFS, the
+		IFILE ain't worth diddly.  Is the cleaner writing the IFILE?
+		If not, let's make it read-only.
+DONE	Delete unnecessary source from utils in main-line source tree.
+DONE	Make sure that we're counting meta blocks in the inode i_block count.
+	Overlap the version and nextfree fields in the IFILE
+DONE	Vinvalbuf (Kirk):
+		Why writing blocks that are no longer useful?
+		Are the semantics of close such that blocks have to be flushed?
+		How specify in the buf chain the blocks that don't need
+		to be written?  (Different numbering of indirect blocks.)
+
+Margo:
+	Change so that only search one sector of inode block file for the
+		inode by using sector addresses in the ifile instead of
+		logical disk addresses.
+	Fix the use of the ifile version field to use the generation
+		number instead.
+DONE	Unmount; not doing a bgetvp (VHOLD) in lfs_newbuf call.
+DONE	Document in the README file where the checkpoint information is
+		on disk.
+	Variable block sizes (Margo/Keith).
+	Switch the byte accounting to sector accounting.
+DONE	Check lfs.h and make sure that the #defines/structures are all
+		actually needed.
+DONE	Add a check in lfs_segment.c so that if the segment is empty,
+		we don't write it.
+	Need to keep vnode v_numoutput up to date for pending writes?
+DONE	USENIX paper (Carl/Margo).
+
+
+Evelyn:
+	lfsck:	If delete a file that's being executed, the version number
+		isn't updated, and lfsck has to figure this out; case is			the same as if have an inode that no directory references,
+		so the file should be reattached into lost+found.
+	Recovery/fsck.
+
+Carl:
+	Investigate: clustering of reads (if blocks in the segment are ordered,
+		should read them all) and writes (McVoy paper).
+	Investigate: should the access time be part of the IFILE:
+		pro: theoretically, saves disk writes
+		con: cacheing inodes should obviate this advantage
+		     the IFILE is already humongous
+	Cleaner.
+	Port to OSF/1 (Carl/Keith).
+	Currently there's no notion of write error checking.
+		+ Failed data/inode writes should be rescheduled (kernel level
+		  bad blocking).
+		+ Failed superblock writes should cause selection of new
+		  superblock for checkpointing.
+
+FUTURE FANTASIES: ============
+
++ unrm, versioning
++ transactions
++ extended cleaner policies (hot/cold data, data placement)
+
+==============================
+Problem with the concept of multiple buffer headers referencing the segment:
+Positives:
+	Don't lock down 1 segment per file system of physical memory.
+	Don't copy from buffers to segment memory.
+	Don't tie down the bus to transfer 1M.
+	Works on controllers supporting less than large transfers.
+	Disk can start writing immediately instead of waiting 1/2 rotation
+	    and the full transfer.
+Negatives:
+	Have to do segment write then segment summary write, since the latter
+	is what verifies that the segment is okay.  (Is there another way
+	to do this?)
+==============================
+
+The algorithm for selecting the disk addresses of the super-blocks
+has to be available to the user program which checks the file system.
+
+(Currently in newfs, becomes a common subroutine.)
diff --git a/sys/ufs/lfs/lfs.h b/sys/ufs/lfs/lfs.h
new file mode 100644
index 0000000..87b8c22
--- /dev/null
+++ b/sys/ufs/lfs/lfs.h
@@ -0,0 +1,353 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs.h	8.3 (Berkeley) 9/23/93
+ */
+
+#define	LFS_LABELPAD	8192		/* LFS label size */
+#define	LFS_SBPAD	8192		/* LFS superblock size */
+
+/*
+ * XXX
+ * This is a kluge and NEEDS to go away.
+ *
+ * Right now, ufs code handles most of the calls for directory operations
+ * such as create, mkdir, link, etc.  As a result VOP_UPDATE is being
+ * called with waitfor set (since ffs does these things synchronously).
+ * Since LFS does not want to do these synchronously, we treat the last
+ * argument to lfs_update as a set of flags.  If LFS_SYNC is set, then
+ * the update should be synchronous, if not, do it asynchronously.
+ * Unfortunately, this means that LFS won't work with NFS yet because
+ * NFS goes through paths that will make normal calls to ufs which will
+ * call lfs with a last argument of 1.
+ */
+#define	LFS_SYNC	0x02
+
+/* On-disk and in-memory checkpoint segment usage structure. */
+typedef struct segusage SEGUSE;
+struct segusage {
+	u_long	su_nbytes;		/* number of live bytes */
+	u_long	su_lastmod;		/* SEGUSE last modified timestamp */
+	u_short	su_nsums;		/* number of summaries in segment */
+	u_short	su_ninos;		/* number of inode blocks in seg */
+#define	SEGUSE_ACTIVE		0x1	/* segment is currently being written */
+#define	SEGUSE_DIRTY		0x2	/* segment has data in it */
+#define	SEGUSE_SUPERBLOCK	0x4	/* segment contains a superblock */
+	u_long	su_flags;
+};
+
+#define	SEGUPB(fs)	(1 << (fs)->lfs_sushift)
+#define	SEGTABSIZE_SU(fs)						\
+	(((fs)->lfs_nseg + SEGUPB(fs) - 1) >> (fs)->lfs_sushift)
+
+/* On-disk file information.  One per file with data blocks in the segment. */
+typedef struct finfo FINFO;
+struct finfo {
+	u_long	fi_nblocks;		/* number of blocks */
+	u_long	fi_version;		/* version number */
+	u_long	fi_ino;			/* inode number */
+	long	fi_blocks[1];		/* array of logical block numbers */
+};
+
+/* On-disk and in-memory super block. */
+struct lfs {
+#define	LFS_MAGIC	0x070162
+	u_long	lfs_magic;		/* magic number */
+#define	LFS_VERSION	1
+	u_long	lfs_version;		/* version number */
+
+	u_long	lfs_size;		/* number of blocks in fs */
+	u_long	lfs_ssize;		/* number of blocks per segment */
+	u_long	lfs_dsize;		/* number of disk blocks in fs */
+	u_long	lfs_bsize;		/* file system block size */
+	u_long	lfs_fsize;		/* size of frag blocks in fs */
+	u_long	lfs_frag;		/* number of frags in a block in fs */
+
+/* Checkpoint region. */
+	ino_t	lfs_free;		/* start of the free list */
+	u_long	lfs_bfree;		/* number of free disk blocks */
+	u_long	lfs_nfiles;		/* number of allocated inodes */
+	long	lfs_avail;		/* blocks available for writing */
+	u_long  lfs_uinodes;		/* inodes in cache not yet on disk */
+	daddr_t	lfs_idaddr;		/* inode file disk address */
+	ino_t	lfs_ifile;		/* inode file inode number */
+	daddr_t	lfs_lastseg;		/* address of last segment written */
+	daddr_t	lfs_nextseg;		/* address of next segment to write */
+	daddr_t	lfs_curseg;		/* current segment being written */
+	daddr_t	lfs_offset;		/* offset in curseg for next partial */
+	daddr_t	lfs_lastpseg;		/* address of last partial written */
+	u_long	lfs_tstamp;		/* time stamp */
+
+/* These are configuration parameters. */
+	u_long	lfs_minfree;		/* minimum percentage of free blocks */
+
+/* These fields can be computed from the others. */
+	u_quad_t lfs_maxfilesize;	/* maximum representable file size */
+	u_long	lfs_dbpseg;		/* disk blocks per segment */
+	u_long	lfs_inopb;		/* inodes per block */
+	u_long	lfs_ifpb;		/* IFILE entries per block */
+	u_long	lfs_sepb;		/* SEGUSE entries per block */
+	u_long	lfs_nindir;		/* indirect pointers per block */
+	u_long	lfs_nseg;		/* number of segments */
+	u_long	lfs_nspf;		/* number of sectors per fragment */
+	u_long	lfs_cleansz;		/* cleaner info size in blocks */
+	u_long	lfs_segtabsz;		/* segment table size in blocks */
+
+	u_long	lfs_segmask;		/* calculate offset within a segment */
+	u_long	lfs_segshift;		/* fast mult/div for segments */
+	u_long	lfs_bmask;		/* calc block offset from file offset */
+	u_long	lfs_bshift;		/* calc block number from file offset */
+	u_long	lfs_ffmask;		/* calc frag offset from file offset */
+	u_long	lfs_ffshift;		/* fast mult/div for frag from file */
+	u_long	lfs_fbmask;		/* calc frag offset from block offset */
+	u_long	lfs_fbshift;		/* fast mult/div for frag from block */
+	u_long	lfs_fsbtodb;		/* fsbtodb and dbtofsb shift constant */
+	u_long	lfs_sushift;		/* fast mult/div for segusage table */
+
+#define	LFS_MIN_SBINTERVAL	5	/* minimum superblock segment spacing */
+#define	LFS_MAXNUMSB		10	/* superblock disk offsets */
+	daddr_t	lfs_sboffs[LFS_MAXNUMSB];
+
+/* These fields are set at mount time and are meaningless on disk. */
+	struct	segment *lfs_sp;	/* current segment being written */
+	struct	vnode *lfs_ivnode;	/* vnode for the ifile */
+	u_long	lfs_seglock;		/* single-thread the segment writer */
+	pid_t	lfs_lockpid;		/* pid of lock holder */
+	u_long	lfs_iocount;		/* number of ios pending */
+	u_long	lfs_writer;		/* don't allow any dirops to start */
+	u_long	lfs_dirops;		/* count of active directory ops */
+	u_long	lfs_doifile;		/* Write ifile blocks on next write */
+	u_long	lfs_nactive;		/* Number of segments since last ckp */
+	u_char	lfs_fmod;		/* super block modified flag */
+	u_char	lfs_clean;		/* file system is clean flag */
+	u_char	lfs_ronly;		/* mounted read-only flag */
+	u_char	lfs_flags;		/* currently unused flag */
+	u_char	lfs_fsmnt[MNAMELEN];	/* name mounted on */
+	u_char	pad[3];			/* long-align */
+
+/* Checksum; valid on disk. */
+	u_long	lfs_cksum;		/* checksum for superblock checking */
+};
+
+/*
+ * Inode 0 is the out-of-band inode number, inode 1 is the inode number for
+ * the IFILE, the root inode is 2 and the lost+found inode is 3.
+ */
+
+/* Fixed inode numbers. */
+#define	LFS_UNUSED_INUM	0		/* out of band inode number */
+#define	LFS_IFILE_INUM	1		/* IFILE inode number */
+#define	LOSTFOUNDINO	3		/* lost+found inode number */
+#define	LFS_FIRST_INUM	4		/* first free inode number */
+
+/* Address calculations for metadata located in the inode */
+#define	S_INDIR(fs)	-NDADDR
+#define	D_INDIR(fs)	(S_INDIR(fs) - NINDIR(fs) - 1)
+#define	T_INDIR(fs)	(D_INDIR(fs) - NINDIR(fs) * NINDIR(fs) - 1)
+
+/* Unassigned disk address. */
+#define	UNASSIGNED	-1
+
+/* Unused logical block number */
+#define LFS_UNUSED_LBN	-1
+
+typedef struct ifile IFILE;
+struct ifile {
+	u_long	if_version;		/* inode version number */
+#define	LFS_UNUSED_DADDR	0	/* out-of-band daddr */
+	daddr_t	if_daddr;		/* inode disk address */
+	ino_t	if_nextfree;		/* next-unallocated inode */
+};
+
+/*
+ * Cleaner information structure.  This resides in the ifile and is used
+ * to pass information between the cleaner and the kernel.
+ */
+typedef struct _cleanerinfo {
+	u_long	clean;			/* K: number of clean segments */
+	u_long	dirty;			/* K: number of dirty segments */
+} CLEANERINFO;
+
+#define	CLEANSIZE_SU(fs)						\
+	((sizeof(CLEANERINFO) + (fs)->lfs_bsize - 1) >> (fs)->lfs_bshift)
+
+/*
+ * All summary blocks are the same size, so we can always read a summary
+ * block easily from a segment.
+ */
+#define	LFS_SUMMARY_SIZE	512
+
+/* On-disk segment summary information */
+typedef struct segsum SEGSUM;
+struct segsum {
+	u_long	ss_sumsum;		/* check sum of summary block */
+	u_long	ss_datasum;		/* check sum of data */
+	daddr_t	ss_next;		/* next segment */
+	u_long	ss_create;		/* creation time stamp */
+	u_short	ss_nfinfo;		/* number of file info structures */
+	u_short	ss_ninos;		/* number of inodes in summary */
+#define	SS_DIROP	0x01		/* segment begins a dirop */
+#define	SS_CONT		0x02		/* more partials to finish this write*/
+	u_short	ss_flags;		/* used for directory operations */
+	u_short	ss_pad;			/* extra space */
+	/* FINFO's and inode daddr's... */
+};
+
+/* NINDIR is the number of indirects in a file system block. */
+#define	NINDIR(fs)	((fs)->lfs_nindir)
+
+/* INOPB is the number of inodes in a secondary storage block. */
+#define	INOPB(fs)	((fs)->lfs_inopb)
+
+#define	blksize(fs)		((fs)->lfs_bsize)
+#define	blkoff(fs, loc)		((loc) & (fs)->lfs_bmask)
+#define	fsbtodb(fs, b)		((b) << (fs)->lfs_fsbtodb)
+#define	dbtofsb(fs, b)		((b) >> (fs)->lfs_fsbtodb)
+#define	lblkno(fs, loc)		((loc) >> (fs)->lfs_bshift)
+#define	lblktosize(fs, blk)	((blk) << (fs)->lfs_bshift)
+#define numfrags(fs, loc)	/* calculates (loc / fs->fs_fsize) */	\
+	((loc) >> (fs)->lfs_bshift)
+
+#define	datosn(fs, daddr)	/* disk address to segment number */	\
+	(((daddr) - (fs)->lfs_sboffs[0]) / fsbtodb((fs), (fs)->lfs_ssize))
+#define sntoda(fs, sn) 		/* segment number to disk address */	\
+	((daddr_t)((sn) * ((fs)->lfs_ssize << (fs)->lfs_fsbtodb) +	\
+	    (fs)->lfs_sboffs[0]))
+
+/* Read in the block with the cleaner info from the ifile. */
+#define LFS_CLEANERINFO(CP, F, BP) {					\
+	VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS;			\
+	if (bread((F)->lfs_ivnode,					\
+	    (daddr_t)0, (F)->lfs_bsize, NOCRED, &(BP)))			\
+		panic("lfs: ifile read");				\
+	(CP) = (CLEANERINFO *)(BP)->b_data;				\
+}
+
+/* Read in the block with a specific inode from the ifile. */
+#define	LFS_IENTRY(IP, F, IN, BP) {					\
+	int _e;								\
+	VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS;			\
+	if (_e = bread((F)->lfs_ivnode,					\
+	    (IN) / (F)->lfs_ifpb + (F)->lfs_cleansz + (F)->lfs_segtabsz,\
+	    (F)->lfs_bsize, NOCRED, &(BP)))				\
+		panic("lfs: ifile read %d", _e);			\
+	(IP) = (IFILE *)(BP)->b_data + (IN) % (F)->lfs_ifpb;		\
+}
+
+/* Read in the block with a specific segment usage entry from the ifile. */
+#define	LFS_SEGENTRY(SP, F, IN, BP) {					\
+	int _e;								\
+	VTOI((F)->lfs_ivnode)->i_flag |= IN_ACCESS;			\
+	if (_e = bread((F)->lfs_ivnode,					\
+	    ((IN) >> (F)->lfs_sushift) + (F)->lfs_cleansz,		\
+	    (F)->lfs_bsize, NOCRED, &(BP)))				\
+		panic("lfs: ifile read: %d", _e);			\
+	(SP) = (SEGUSE *)(BP)->b_data + ((IN) & (F)->lfs_sepb - 1);	\
+}
+
+/* 
+ * Determine if there is enough room currently available to write db
+ * disk blocks.  We need enough blocks for the new blocks, the current,
+ * inode blocks, a summary block, plus potentially the ifile inode and
+ * the segment usage table, plus an ifile page.
+ */
+#define LFS_FITS(fs, db)						\
+	((long)((db + ((fs)->lfs_uinodes + INOPB((fs))) / INOPB((fs)) +	\
+	fsbtodb(fs, 1) + LFS_SUMMARY_SIZE / DEV_BSIZE +			\
+	(fs)->lfs_segtabsz)) < (fs)->lfs_avail)
+
+/* Determine if a buffer belongs to the ifile */
+#define IS_IFILE(bp)	(VTOI(bp->b_vp)->i_number == LFS_IFILE_INUM)
+
+/*
+ * Structures used by lfs_bmapv and lfs_markv to communicate information
+ * about inodes and data blocks.
+ */
+typedef struct block_info {
+	ino_t	bi_inode;		/* inode # */
+	daddr_t	bi_lbn;			/* logical block w/in file */
+	daddr_t	bi_daddr;		/* disk address of block */
+	time_t	bi_segcreate;		/* origin segment create time */
+	int	bi_version;		/* file version number */
+	void	*bi_bp;			/* data buffer */
+} BLOCK_INFO;
+
+/* In-memory description of a segment about to be written. */
+struct segment {
+	struct lfs	*fs;		/* file system pointer */
+	struct buf	**bpp;		/* pointer to buffer array */
+	struct buf	**cbpp;		/* pointer to next available bp */
+	struct buf	**start_bpp;	/* pointer to first bp in this set */
+	struct buf	*ibp;		/* buffer pointer to inode page */
+	struct finfo	*fip;		/* current fileinfo pointer */
+	struct vnode	*vp;		/* vnode being gathered */
+	void	*segsum;		/* segment summary info */
+	u_long	ninodes;		/* number of inodes in this segment */
+	u_long	seg_bytes_left;		/* bytes left in segment */
+	u_long	sum_bytes_left;		/* bytes left in summary block */
+	u_long	seg_number;		/* number of this segment */
+	daddr_t *start_lbp;		/* beginning lbn for this set */
+#define	SEGM_CKP	0x01		/* doing a checkpoint */
+#define	SEGM_CLEAN	0x02		/* cleaner call; don't sort */
+#define	SEGM_SYNC	0x04		/* wait for segment */
+	u_long	seg_flags;		/* run-time flags for this segment */
+};
+
+#define ISSPACE(F, BB, C)						\
+	(((C)->cr_uid == 0 && (F)->lfs_bfree >= (BB)) ||		\
+	((C)->cr_uid != 0 && IS_FREESPACE(F, BB)))
+
+#define IS_FREESPACE(F, BB)						\
+	((F)->lfs_bfree > ((F)->lfs_dsize * (F)->lfs_minfree / 100 + (BB)))
+
+#define ISSPACE_XXX(F, BB)						\
+	((F)->lfs_bfree >= (BB))
+
+#define DOSTATS
+#ifdef DOSTATS
+/* Statistics Counters */
+struct lfs_stats {
+	int	segsused;
+	int	psegwrites;
+	int	psyncwrites;
+	int	pcleanwrites;
+	int	blocktot;
+	int	cleanblocks;
+	int	ncheckpoints;
+	int	nwrites;
+	int	nsync_writes;
+	int	wait_exceeded;
+	int	write_exceeded;
+	int	flush_invoked;
+};
+extern struct lfs_stats lfs_stats;
+#endif
diff --git a/sys/ufs/lfs/lfs_alloc.c b/sys/ufs/lfs/lfs_alloc.c
new file mode 100644
index 0000000..3f06c81
--- /dev/null
+++ b/sys/ufs/lfs/lfs_alloc.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_alloc.c	8.4 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/syslog.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+extern u_long nextgennumber;
+
+/* Allocate a new inode. */
+/* ARGSUSED */
+int
+lfs_valloc(ap)
+	struct vop_valloc_args /* {
+		struct vnode *a_pvp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct vnode **a_vpp;
+	} */ *ap;
+{
+	struct lfs *fs;
+	struct buf *bp;
+	struct ifile *ifp;
+	struct inode *ip;
+	struct vnode *vp;
+	daddr_t blkno;
+	ino_t new_ino;
+	u_long i, max;
+	int error;
+
+	/* Get the head of the freelist. */
+	fs = VTOI(ap->a_pvp)->i_lfs;
+	new_ino = fs->lfs_free;
+#ifdef ALLOCPRINT
+	printf("lfs_ialloc: allocate inode %d\n", new_ino);
+#endif
+
+	/*
+	 * Remove the inode from the free list and write the new start
+	 * of the free list into the superblock.
+	 */
+	LFS_IENTRY(ifp, fs, new_ino, bp);
+	if (ifp->if_daddr != LFS_UNUSED_DADDR)
+		panic("lfs_ialloc: inuse inode on the free list");
+	fs->lfs_free = ifp->if_nextfree;
+	brelse(bp);
+
+	/* Extend IFILE so that the next lfs_valloc will succeed. */
+	if (fs->lfs_free == LFS_UNUSED_INUM) {
+		vp = fs->lfs_ivnode;
+		ip = VTOI(vp);
+		blkno = lblkno(fs, ip->i_size);
+		lfs_balloc(vp, fs->lfs_bsize, blkno, &bp);
+		ip->i_size += fs->lfs_bsize;
+		vnode_pager_setsize(vp, (u_long)ip->i_size);
+		vnode_pager_uncache(vp);
+
+		i = (blkno - fs->lfs_segtabsz - fs->lfs_cleansz) *
+		    fs->lfs_ifpb;
+		fs->lfs_free = i;
+		max = i + fs->lfs_ifpb;
+		for (ifp = (struct ifile *)bp->b_data; i < max; ++ifp) {
+			ifp->if_version = 1;
+			ifp->if_daddr = LFS_UNUSED_DADDR;
+			ifp->if_nextfree = ++i;
+		}
+		ifp--;
+		ifp->if_nextfree = LFS_UNUSED_INUM;
+		if (error = VOP_BWRITE(bp))
+			return (error);
+	}
+
+	/* Create a vnode to associate with the inode. */
+	if (error = lfs_vcreate(ap->a_pvp->v_mount, new_ino, &vp))
+		return (error);
+
+
+	ip = VTOI(vp);
+	/* Zero out the direct and indirect block addresses. */
+	bzero(&ip->i_din, sizeof(struct dinode));
+	ip->i_din.di_inumber = new_ino;
+
+	/* Set a new generation number for this inode. */
+	if (++nextgennumber < (u_long)time.tv_sec)
+		nextgennumber = time.tv_sec;
+	ip->i_gen = nextgennumber;
+
+	/* Insert into the inode hash table. */
+	ufs_ihashins(ip);
+
+	if (error = ufs_vinit(vp->v_mount, lfs_specop_p, LFS_FIFOOPS, &vp)) {
+		vput(vp);
+		*ap->a_vpp = NULL;
+		return (error);
+	}
+
+	*ap->a_vpp = vp;
+	vp->v_flag |= VDIROP;
+	VREF(ip->i_devvp);
+
+	/* Set superblock modified bit and increment file count. */
+	fs->lfs_fmod = 1;
+	++fs->lfs_nfiles;
+	return (0);
+}
+
+/* Create a new vnode/inode pair and initialize what fields we can. */
+int
+lfs_vcreate(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	extern int (**lfs_vnodeop_p)();
+	struct inode *ip;
+	struct ufsmount *ump;
+	int error, i;
+
+	/* Create the vnode. */
+	if (error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, vpp)) {
+		*vpp = NULL;
+		return (error);
+	}
+
+	/* Get a pointer to the private mount structure. */
+	ump = VFSTOUFS(mp);
+
+	/* Initialize the inode. */
+	MALLOC(ip, struct inode *, sizeof(struct inode), M_LFSNODE, M_WAITOK);
+	(*vpp)->v_data = ip;
+	ip->i_vnode = *vpp;
+	ip->i_devvp = ump->um_devvp;
+	ip->i_flag = IN_MODIFIED;
+	ip->i_dev = ump->um_dev;
+	ip->i_number = ip->i_din.di_inumber = ino;
+ip->i_din.di_spare[0] = 0xdeadbeef;
+ip->i_din.di_spare[1] = 0xdeadbeef;
+	ip->i_lfs = ump->um_lfs;
+#ifdef QUOTA
+	for (i = 0; i < MAXQUOTAS; i++)
+		ip->i_dquot[i] = NODQUOT;
+#endif
+	ip->i_lockf = 0;
+	ip->i_diroff = 0;
+	ip->i_mode = 0;
+	ip->i_size = 0;
+	ip->i_blocks = 0;
+	++ump->um_lfs->lfs_uinodes;
+	return (0);
+}
+
+/* Free an inode. */
+/* ARGUSED */
+int
+lfs_vfree(ap)
+	struct vop_vfree_args /* {
+		struct vnode *a_pvp;
+		ino_t a_ino;
+		int a_mode;
+	} */ *ap;
+{
+	SEGUSE *sup;
+	struct buf *bp;
+	struct ifile *ifp;
+	struct inode *ip;
+	struct lfs *fs;
+	daddr_t old_iaddr;
+	ino_t ino;
+
+	/* Get the inode number and file system. */
+	ip = VTOI(ap->a_pvp);
+	fs = ip->i_lfs;
+	ino = ip->i_number;
+	if (ip->i_flag & IN_MODIFIED) {
+		--fs->lfs_uinodes;
+		ip->i_flag &=
+		    ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
+	}
+	/*
+	 * Set the ifile's inode entry to unused, increment its version number
+	 * and link it into the free chain.
+	 */
+	LFS_IENTRY(ifp, fs, ino, bp);
+	old_iaddr = ifp->if_daddr;
+	ifp->if_daddr = LFS_UNUSED_DADDR;
+	++ifp->if_version;
+	ifp->if_nextfree = fs->lfs_free;
+	fs->lfs_free = ino;
+	(void) VOP_BWRITE(bp);
+
+	if (old_iaddr != LFS_UNUSED_DADDR) {
+		LFS_SEGENTRY(sup, fs, datosn(fs, old_iaddr), bp);
+#ifdef DIAGNOSTIC
+		if (sup->su_nbytes < sizeof(struct dinode))
+			panic("lfs_vfree: negative byte count (segment %d)\n",
+			    datosn(fs, old_iaddr));
+#endif
+		sup->su_nbytes -= sizeof(struct dinode);
+		(void) VOP_BWRITE(bp);
+	}
+
+	/* Set superblock modified bit and decrement file count. */
+	fs->lfs_fmod = 1;
+	--fs->lfs_nfiles;
+	return (0);
+}
diff --git a/sys/ufs/lfs/lfs_balloc.c b/sys/ufs/lfs/lfs_balloc.c
new file mode 100644
index 0000000..b56bc9e
--- /dev/null
+++ b/sys/ufs/lfs/lfs_balloc.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_balloc.c	8.1 (Berkeley) 6/11/93
+ */
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/trace.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+int
+lfs_balloc(vp, iosize, lbn, bpp)
+	struct vnode *vp;
+	u_long iosize;
+	daddr_t lbn;
+	struct buf **bpp;
+{
+	struct buf *ibp, *bp;
+	struct inode *ip;
+	struct lfs *fs;
+	struct indir indirs[NIADDR+2];
+	daddr_t daddr;
+	int bb, error, i, num;
+
+	ip = VTOI(vp);
+	fs = ip->i_lfs;
+
+	/* 
+	 * Three cases: it's a block beyond the end of file, it's a block in
+	 * the file that may or may not have been assigned a disk address or
+	 * we're writing an entire block.  Note, if the daddr is unassigned,
+	 * the block might still have existed in the cache (if it was read
+	 * or written earlier).  If it did, make sure we don't count it as a
+	 * new block or zero out its contents.  If it did not, make sure
+	 * we allocate any necessary indirect blocks.
+	 */
+
+	*bpp = NULL;
+	if (error = ufs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL ))
+		return (error);
+
+	*bpp = bp = getblk(vp, lbn, fs->lfs_bsize, 0, 0);
+	bb = VFSTOUFS(vp->v_mount)->um_seqinc;
+	if (daddr == UNASSIGNED)
+		/* May need to allocate indirect blocks */
+		for (i = 1; i < num; ++i)
+			if (!indirs[i].in_exists) {
+				ibp =
+				    getblk(vp, indirs[i].in_lbn, fs->lfs_bsize,
+					0, 0);
+				if (!(ibp->b_flags & (B_DONE | B_DELWRI))) {
+					if (!ISSPACE(fs, bb, curproc->p_ucred)){
+						ibp->b_flags |= B_INVAL;
+						brelse(ibp);
+						error = ENOSPC;
+					} else {
+						ip->i_blocks += bb;
+						ip->i_lfs->lfs_bfree -= bb;
+						clrbuf(ibp);
+						error = VOP_BWRITE(ibp);
+					}
+				} else
+					panic ("Indirect block should not exist");
+			}
+	if (error) {
+		if (bp)
+			brelse(bp);
+		return(error);
+	}
+
+
+	/* Now, we may need to allocate the data block */
+	if (!(bp->b_flags & (B_CACHE | B_DONE | B_DELWRI))) {
+		if (daddr == UNASSIGNED) 
+			if (!ISSPACE(fs, bb, curproc->p_ucred)) {
+				bp->b_flags |= B_INVAL;
+				brelse(bp);
+				return(ENOSPC);
+			} else {
+				ip->i_blocks += bb;
+				ip->i_lfs->lfs_bfree -= bb;
+				if (iosize != fs->lfs_bsize)
+					clrbuf(bp);
+			}
+		else if (iosize == fs->lfs_bsize)
+			bp->b_blkno = daddr;		/* Skip the I/O */
+		else  {
+			bp->b_blkno = daddr;
+			bp->b_flags |= B_READ;
+			VOP_STRATEGY(bp);
+			return(biowait(bp));
+		}
+	}
+	return (error);
+}
diff --git a/sys/ufs/lfs/lfs_bio.c b/sys/ufs/lfs/lfs_bio.c
new file mode 100644
index 0000000..0f021f1
--- /dev/null
+++ b/sys/ufs/lfs/lfs_bio.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_bio.c	8.4 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/resourcevar.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+/*
+ * LFS block write function.
+ *
+ * XXX
+ * No write cost accounting is done.
+ * This is almost certainly wrong for synchronous operations and NFS.
+ */
+int	lfs_allclean_wakeup;		/* Cleaner wakeup address. */
+int	locked_queue_count;		/* XXX Count of locked-down buffers. */
+int	lfs_writing;			/* Set if already kicked off a writer
+					   because of buffer space */
+/*
+#define WRITE_THRESHHOLD	((nbuf >> 2) - 10)
+#define WAIT_THRESHHOLD		((nbuf >> 1) - 10)
+*/
+#define WAIT_THRESHHOLD         (nbuf - (nbuf >> 2) - 10)
+#define WRITE_THRESHHOLD        ((nbuf >> 1) - 10)
+#define LFS_BUFWAIT	2
+
+int
+lfs_bwrite(ap)
+	struct vop_bwrite_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	register struct buf *bp = ap->a_bp;
+	struct lfs *fs;
+	struct inode *ip;
+	int error, s;
+
+	/*
+	 * Set the delayed write flag and use reassignbuf to move the buffer
+	 * from the clean list to the dirty one.
+	 *
+	 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move
+	 * the buffer onto the LOCKED free list.  This is necessary, otherwise
+	 * getnewbuf() would try to reclaim the buffers using bawrite, which
+	 * isn't going to work.
+	 *
+	 * XXX we don't let meta-data writes run out of space because they can
+	 * come from the segment writer.  We need to make sure that there is
+	 * enough space reserved so that there's room to write meta-data
+	 * blocks.
+	 */
+	if (!(bp->b_flags & B_LOCKED)) {
+		fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs;
+		while (!LFS_FITS(fs, fsbtodb(fs, 1)) && !IS_IFILE(bp) &&
+		    bp->b_lblkno > 0) {
+			/* Out of space, need cleaner to run */
+			wakeup(&lfs_allclean_wakeup);
+			if (error = tsleep(&fs->lfs_avail, PCATCH | PUSER,
+			    "cleaner", NULL)) {
+				brelse(bp);
+				return (error);
+			}
+		}
+		ip = VTOI((bp)->b_vp);
+		if (!(ip->i_flag & IN_MODIFIED))
+			++fs->lfs_uinodes;
+		ip->i_flag |= IN_CHANGE | IN_MODIFIED | IN_UPDATE;
+		fs->lfs_avail -= fsbtodb(fs, 1);
+		++locked_queue_count;
+		bp->b_flags |= B_DELWRI | B_LOCKED;
+		bp->b_flags &= ~(B_READ | B_ERROR);
+		s = splbio();
+		reassignbuf(bp, bp->b_vp);
+		splx(s);
+	}
+	brelse(bp);
+	return (0);
+}
+
+/*
+ * XXX
+ * This routine flushes buffers out of the B_LOCKED queue when LFS has too
+ * many locked down.  Eventually the pageout daemon will simply call LFS
+ * when pages need to be reclaimed.  Note, we have one static count of locked
+ * buffers, so we can't have more than a single file system.  To make this
+ * work for multiple file systems, put the count into the mount structure.
+ */
+void
+lfs_flush()
+{
+	register struct mount *mp;
+
+#ifdef DOSTATS
+	++lfs_stats.write_exceeded;
+#endif
+	if (lfs_writing)
+		return;
+	lfs_writing = 1;
+	for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+		/* The lock check below is to avoid races with unmount. */
+		if (mp->mnt_stat.f_type == MOUNT_LFS &&
+		    (mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_UNMOUNT)) == 0 &&
+		    !((((struct ufsmount *)mp->mnt_data))->ufsmount_u.lfs)->lfs_dirops ) {
+			/*
+			 * We set the queue to 0 here because we are about to
+			 * write all the dirty buffers we have.  If more come
+			 * in while we're writing the segment, they may not
+			 * get written, so we want the count to reflect these
+			 * new writes after the segwrite completes.
+			 */
+#ifdef DOSTATS
+			++lfs_stats.flush_invoked;
+#endif
+			lfs_segwrite(mp, 0);
+		}
+	}
+	lfs_writing = 0;
+}
+
+int
+lfs_check(vp, blkno)
+	struct vnode *vp;
+	daddr_t blkno;
+{
+	extern int lfs_allclean_wakeup;
+	int error;
+
+	error = 0;
+	if (incore(vp, blkno))
+		return (0);
+	if (locked_queue_count > WRITE_THRESHHOLD)
+		lfs_flush();
+
+	/* If out of buffers, wait on writer */
+	while (locked_queue_count > WAIT_THRESHHOLD) {
+#ifdef DOSTATS
+	    ++lfs_stats.wait_exceeded;
+#endif
+	    error = tsleep(&locked_queue_count, PCATCH | PUSER, "buffers",
+	        hz * LFS_BUFWAIT);
+	}
+
+	return (error);
+}
diff --git a/sys/ufs/lfs/lfs_cksum.c b/sys/ufs/lfs/lfs_cksum.c
new file mode 100644
index 0000000..77b011a
--- /dev/null
+++ b/sys/ufs/lfs/lfs_cksum.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_cksum.c	8.1 (Berkeley) 6/11/93
+ */
+
+#include <sys/types.h>
+
+/*
+ * Simple, general purpose, fast checksum.  Data must be short-aligned.
+ * Returns a u_long in case we ever want to do something more rigorous.
+ *
+ * XXX
+ * Use the TCP/IP checksum instead.
+ */
+u_long
+cksum(str, len)
+	register void *str;
+	register size_t len;
+{
+	register u_long sum;
+	
+	len &= ~(sizeof(u_short) - 1);
+	for (sum = 0; len; len -= sizeof(u_short)) {
+		sum ^= *(u_short *)str;
+		++(u_short *)str;
+	}
+	return (sum);
+}
diff --git a/sys/ufs/lfs/lfs_debug.c b/sys/ufs/lfs/lfs_debug.c
new file mode 100644
index 0000000..cc28d60
--- /dev/null
+++ b/sys/ufs/lfs/lfs_debug.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_debug.c	8.1 (Berkeley) 6/11/93
+ */
+
+#ifdef DEBUG
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+void 
+lfs_dump_super(lfsp)
+	struct lfs *lfsp;
+{
+	int i;
+
+	(void)printf("%s%lx\t%s%lx\t%s%d\t%s%d\n",
+		"magic    ", lfsp->lfs_magic,
+		"version  ", lfsp->lfs_version,
+		"size     ", lfsp->lfs_size,
+		"ssize    ", lfsp->lfs_ssize);
+	(void)printf("%s%d\t%s%d\t%s%d\t%s%d\n",
+		"dsize    ", lfsp->lfs_dsize,
+		"bsize    ", lfsp->lfs_bsize,
+		"fsize    ", lfsp->lfs_fsize,
+		"frag     ", lfsp->lfs_frag);
+
+	(void)printf("%s%d\t%s%d\t%s%d\t%s%d\n",
+		"minfree  ", lfsp->lfs_minfree,
+		"inopb    ", lfsp->lfs_inopb,
+		"ifpb     ", lfsp->lfs_ifpb,
+		"nindir   ", lfsp->lfs_nindir);
+
+	(void)printf("%s%d\t%s%d\t%s%d\t%s%d\n",
+		"nseg     ", lfsp->lfs_nseg,
+		"nspf     ", lfsp->lfs_nspf,
+		"cleansz  ", lfsp->lfs_cleansz,
+		"segtabsz ", lfsp->lfs_segtabsz);
+
+	(void)printf("%s%lx\t%s%d\t%s%lx\t%s%d\n",
+		"segmask  ", lfsp->lfs_segmask,
+		"segshift ", lfsp->lfs_segshift,
+		"bmask    ", lfsp->lfs_bmask,
+		"bshift   ", lfsp->lfs_bshift);
+
+	(void)printf("%s%lx\t%s%d\t%s%lx\t%s%d\n",
+		"ffmask   ", lfsp->lfs_ffmask,
+		"ffshift  ", lfsp->lfs_ffshift,
+		"fbmask   ", lfsp->lfs_fbmask,
+		"fbshift  ", lfsp->lfs_fbshift);
+
+	(void)printf("%s%d\t%s%d\t%s%lx\t%s%qx\n", 
+		"sushift  ", lfsp->lfs_sushift,
+		"fsbtodb  ", lfsp->lfs_fsbtodb,
+		"cksum    ", lfsp->lfs_cksum,
+		"maxfilesize ", lfsp->lfs_maxfilesize);
+
+	(void)printf("Superblock disk addresses:");
+	for (i = 0; i < LFS_MAXNUMSB; i++)
+		(void)printf(" %lx", lfsp->lfs_sboffs[i]);
+	(void)printf("\n");
+
+	(void)printf("Checkpoint Info\n");
+	(void)printf("%s%d\t%s%lx\t%s%d\n",
+		"free     ", lfsp->lfs_free,
+		"idaddr   ", lfsp->lfs_idaddr,
+		"ifile    ", lfsp->lfs_ifile);
+	(void)printf("%s%lx\t%s%d\t%s%lx\t%s%lx\t%s%lx\t%s%lx\n",
+		"bfree    ", lfsp->lfs_bfree,
+		"nfiles   ", lfsp->lfs_nfiles,
+		"lastseg  ", lfsp->lfs_lastseg,
+		"nextseg  ", lfsp->lfs_nextseg,
+		"curseg   ", lfsp->lfs_curseg,
+		"offset   ", lfsp->lfs_offset);
+	(void)printf("tstamp   %lx\n", lfsp->lfs_tstamp);
+}
+
+void
+lfs_dump_dinode(dip)
+	struct dinode *dip;
+{
+	int i;
+
+	(void)printf("%s%u\t%s%d\t%s%u\t%s%u\t%s%lu\n",
+		"mode  ", dip->di_mode,
+		"nlink ", dip->di_nlink,
+		"uid   ", dip->di_uid,
+		"gid   ", dip->di_gid,
+		"size  ", dip->di_size);
+	(void)printf("inum  %ld\n", dip->di_inumber);
+	(void)printf("Direct Addresses\n");
+	for (i = 0; i < NDADDR; i++) {
+		(void)printf("\t%lx", dip->di_db[i]);
+		if ((i % 6) == 5)
+			(void)printf("\n");
+	}
+	for (i = 0; i < NIADDR; i++)
+		(void)printf("\t%lx", dip->di_ib[i]);
+	(void)printf("\n");
+}
+#endif /* DEBUG */
diff --git a/sys/ufs/lfs/lfs_extern.h b/sys/ufs/lfs/lfs_extern.h
new file mode 100644
index 0000000..c1157ad
--- /dev/null
+++ b/sys/ufs/lfs/lfs_extern.h
@@ -0,0 +1,106 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_extern.h	8.2 (Berkeley) 4/16/94
+ */
+
+struct fid;
+struct mount;
+struct nameidata;
+struct proc;
+struct statfs;
+struct timeval;
+struct inode;
+struct uio;
+struct mbuf;
+
+__BEGIN_DECLS
+u_long	 cksum __P((void *, size_t));				/* XXX */
+int	 lfs_balloc __P((struct vnode *, u_long, daddr_t, struct buf **));
+int	 lfs_blkatoff __P((struct vop_blkatoff_args *));
+int	 lfs_bwrite __P((struct vop_bwrite_args *));
+int	 lfs_check __P((struct vnode *, daddr_t));
+int	 lfs_close __P((struct vop_close_args *));
+int	 lfs_create __P((struct vop_create_args *));
+int	 lfs_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
+	    struct vnode **, int *, struct ucred **));
+int	 lfs_fsync __P((struct vop_fsync_args *));
+int	 lfs_getattr __P((struct vop_getattr_args *));
+struct dinode *
+	 lfs_ifind __P((struct lfs *, ino_t, struct dinode *));
+int	 lfs_inactive __P((struct vop_inactive_args *));
+int	 lfs_init __P((void));
+int	 lfs_initseg __P((struct lfs *));
+int	 lfs_link __P((struct vop_link_args *));
+int	 lfs_makeinode __P((int, struct nameidata *, struct inode **));
+int	 lfs_mkdir __P((struct vop_mkdir_args *));
+int	 lfs_mknod __P((struct vop_mknod_args *));
+int	 lfs_mount __P((struct mount *,
+	    char *, caddr_t, struct nameidata *, struct proc *));
+int	 lfs_mountroot __P((void));
+struct buf *
+	 lfs_newbuf __P((struct vnode *, daddr_t, size_t));
+int	 lfs_read __P((struct vop_read_args *));
+int	 lfs_remove __P((struct vop_remove_args *));
+int	 lfs_rmdir __P((struct vop_rmdir_args *));
+int	 lfs_rename __P((struct vop_rename_args *));
+void	 lfs_seglock __P((struct lfs *, unsigned long flags));
+void	 lfs_segunlock __P((struct lfs *));
+int	 lfs_segwrite __P((struct mount *, int));
+int	 lfs_statfs __P((struct mount *, struct statfs *, struct proc *));
+int	 lfs_symlink __P((struct vop_symlink_args *));
+int	 lfs_sync __P((struct mount *, int, struct ucred *, struct proc *));
+int	 lfs_truncate __P((struct vop_truncate_args *));
+int	 lfs_unmount __P((struct mount *, int, struct proc *));
+int	 lfs_update __P((struct vop_update_args *));
+int	 lfs_valloc __P((struct vop_valloc_args *));
+int	 lfs_vcreate __P((struct mount *, ino_t, struct vnode **));
+int	 lfs_vfree __P((struct vop_vfree_args *));
+int	 lfs_vflush __P((struct vnode *));
+int	 lfs_vget __P((struct mount *, ino_t, struct vnode **));
+int	 lfs_vptofh __P((struct vnode *, struct fid *));
+int	 lfs_vref __P((struct vnode *));
+void	 lfs_vunref __P((struct vnode *));
+int	 lfs_write __P((struct vop_write_args *));
+#ifdef DEBUG
+void	lfs_dump_dinode __P((struct dinode *));
+void	lfs_dump_super __P((struct lfs *));
+#endif
+__END_DECLS
+extern int (**lfs_vnodeop_p)();
+extern int (**lfs_specop_p)();
+#ifdef FIFO
+extern int (**lfs_fifoop_p)();
+#define LFS_FIFOOPS lfs_fifoop_p
+#else
+#define LFS_FIFOOPS NULL
+#endif
diff --git a/sys/ufs/lfs/lfs_inode.c b/sys/ufs/lfs/lfs_inode.c
new file mode 100644
index 0000000..1a06aa2
--- /dev/null
+++ b/sys/ufs/lfs/lfs_inode.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (c) 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_inode.c	8.5 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+int
+lfs_init()
+{
+	return (ufs_init());
+}
+
+/* Search a block for a specific dinode. */
+struct dinode *
+lfs_ifind(fs, ino, dip)
+	struct lfs *fs;
+	ino_t ino;
+	register struct dinode *dip;
+{
+	register int cnt;
+	register struct dinode *ldip;
+
+	for (cnt = INOPB(fs), ldip = dip + (cnt - 1); cnt--; --ldip)
+		if (ldip->di_inumber == ino)
+			return (ldip);
+
+	panic("lfs_ifind: dinode %u not found", ino);
+	/* NOTREACHED */
+}
+
+int
+lfs_update(ap)
+	struct vop_update_args /* {
+		struct vnode *a_vp;
+		struct timeval *a_access;
+		struct timeval *a_modify;
+		int a_waitfor;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip;
+
+	if (vp->v_mount->mnt_flag & MNT_RDONLY)
+		return (0);
+	ip = VTOI(vp);
+	if ((ip->i_flag &
+	    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)
+		return (0);
+	if (ip->i_flag & IN_ACCESS)
+		ip->i_atime.ts_sec = ap->a_access->tv_sec;
+	if (ip->i_flag & IN_UPDATE) {
+		ip->i_mtime.ts_sec = ap->a_modify->tv_sec;
+		(ip)->i_modrev++;
+	}
+	if (ip->i_flag & IN_CHANGE)
+		ip->i_ctime.ts_sec = time.tv_sec;
+	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
+
+	if (!(ip->i_flag & IN_MODIFIED))
+		++(VFSTOUFS(vp->v_mount)->um_lfs->lfs_uinodes);
+	ip->i_flag |= IN_MODIFIED;
+
+	/* If sync, push back the vnode and any dirty blocks it may have. */
+	return (ap->a_waitfor & LFS_SYNC ? lfs_vflush(vp) : 0);
+}
+
+/* Update segment usage information when removing a block. */
+#define UPDATE_SEGUSE \
+	if (lastseg != -1) { \
+		LFS_SEGENTRY(sup, fs, lastseg, sup_bp); \
+		if ((num << fs->lfs_bshift) > sup->su_nbytes) \
+			panic("lfs_truncate: negative bytes in segment %d\n", \
+			    lastseg); \
+		sup->su_nbytes -= num << fs->lfs_bshift; \
+		e1 = VOP_BWRITE(sup_bp); \
+		blocksreleased += num; \
+	}
+
+#define SEGDEC { \
+	if (daddr != 0) { \
+		if (lastseg != (seg = datosn(fs, daddr))) { \
+			UPDATE_SEGUSE; \
+			num = 1; \
+			lastseg = seg; \
+		} else \
+			++num; \
+	} \
+}
+
+/*
+ * Truncate the inode ip to at most length size.  Update segment usage
+ * table information.
+ */
+/* ARGSUSED */
+int
+lfs_truncate(ap)
+	struct vop_truncate_args /* {
+		struct vnode *a_vp;
+		off_t a_length;
+		int a_flags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct indir *inp;
+	register int i;
+	register daddr_t *daddrp;
+	register struct vnode *vp = ap->a_vp;
+	off_t length = ap->a_length;
+	struct buf *bp, *sup_bp;
+	struct timeval tv;
+	struct ifile *ifp;
+	struct inode *ip;
+	struct lfs *fs;
+	struct indir a[NIADDR + 2], a_end[NIADDR + 2];
+	SEGUSE *sup;
+	daddr_t daddr, lastblock, lbn, olastblock;
+	long off, a_released, blocksreleased, i_released;
+	int e1, e2, depth, lastseg, num, offset, seg, size;
+
+	ip = VTOI(vp);
+	tv = time;
+	if (vp->v_type == VLNK && vp->v_mount->mnt_maxsymlinklen > 0) {
+#ifdef DIAGNOSTIC
+		if (length != 0)
+			panic("lfs_truncate: partial truncate of symlink");
+#endif
+		bzero((char *)&ip->i_shortlink, (u_int)ip->i_size);
+		ip->i_size = 0;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (VOP_UPDATE(vp, &tv, &tv, 0));
+	}
+	vnode_pager_setsize(vp, (u_long)length);
+
+	fs = ip->i_lfs;
+
+	/* If length is larger than the file, just update the times. */
+	if (ip->i_size <= length) {
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (VOP_UPDATE(vp, &tv, &tv, 0));
+	}
+
+	/*
+	 * Calculate index into inode's block list of last direct and indirect
+	 * blocks (if any) which we want to keep.  Lastblock is 0 when the
+	 * file is truncated to 0.
+	 */
+	lastblock = lblkno(fs, length + fs->lfs_bsize - 1);
+	olastblock = lblkno(fs, ip->i_size + fs->lfs_bsize - 1) - 1;
+
+	/*
+	 * Update the size of the file. If the file is not being truncated to
+	 * a block boundry, the contents of the partial block following the end
+	 * of the file must be zero'ed in case it ever become accessable again
+	 * because of subsequent file growth.
+	 */
+	offset = blkoff(fs, length);
+	if (offset == 0)
+		ip->i_size = length;
+	else {
+		lbn = lblkno(fs, length);
+#ifdef QUOTA
+		if (e1 = getinoquota(ip))
+			return (e1);
+#endif	
+		if (e1 = bread(vp, lbn, fs->lfs_bsize, NOCRED, &bp))
+			return (e1);
+		ip->i_size = length;
+		size = blksize(fs);
+		(void)vnode_pager_uncache(vp);
+		bzero((char *)bp->b_data + offset, (u_int)(size - offset));
+		allocbuf(bp, size);
+		if (e1 = VOP_BWRITE(bp))
+			return (e1);
+	}
+	/*
+	 * Modify sup->su_nbyte counters for each deleted block; keep track
+	 * of number of blocks removed for ip->i_blocks.
+	 */
+	blocksreleased = 0;
+	num = 0;
+	lastseg = -1;
+
+	for (lbn = olastblock; lbn >= lastblock;) {
+		/* XXX use run length from bmap array to make this faster */
+		ufs_bmaparray(vp, lbn, &daddr, a, &depth, NULL);
+		if (lbn == olastblock)
+			for (i = NIADDR + 2; i--;)
+				a_end[i] = a[i];
+		switch (depth) {
+		case 0:				/* Direct block. */
+			daddr = ip->i_db[lbn];
+			SEGDEC;
+			ip->i_db[lbn] = 0;
+			--lbn;
+			break;
+#ifdef DIAGNOSTIC
+		case 1:				/* An indirect block. */
+			panic("lfs_truncate: ufs_bmaparray returned depth 1");
+			/* NOTREACHED */
+#endif
+		default:			/* Chain of indirect blocks. */
+			inp = a + --depth;
+			if (inp->in_off > 0 && lbn != lastblock) {
+				lbn -= inp->in_off < lbn - lastblock ?
+				    inp->in_off : lbn - lastblock;
+				break;
+			}
+			for (; depth && (inp->in_off == 0 || lbn == lastblock);
+			    --inp, --depth) {
+				if (bread(vp,
+				    inp->in_lbn, fs->lfs_bsize, NOCRED, &bp))
+					panic("lfs_truncate: bread bno %d",
+					    inp->in_lbn);
+				daddrp = (daddr_t *)bp->b_data + inp->in_off;
+				for (i = inp->in_off;
+				    i++ <= a_end[depth].in_off;) {
+					daddr = *daddrp++;
+					SEGDEC;
+				}
+				a_end[depth].in_off = NINDIR(fs) - 1;
+				if (inp->in_off == 0)
+					brelse (bp);
+				else {
+					bzero((daddr_t *)bp->b_data +
+					    inp->in_off, fs->lfs_bsize - 
+					    inp->in_off * sizeof(daddr_t));
+					if (e1 = VOP_BWRITE(bp)) 
+						return (e1);
+				}
+			}
+			if (depth == 0 && a[1].in_off == 0) {
+				off = a[0].in_off;
+				daddr = ip->i_ib[off];
+				SEGDEC;
+				ip->i_ib[off] = 0;
+			}
+			if (lbn == lastblock || lbn <= NDADDR)
+				--lbn;
+			else {
+				lbn -= NINDIR(fs);
+				if (lbn < lastblock)
+					lbn = lastblock;
+			}
+		}
+	}
+	UPDATE_SEGUSE;
+
+	/* If truncating the file to 0, update the version number. */
+	if (length == 0) {
+		LFS_IENTRY(ifp, fs, ip->i_number, bp);
+		++ifp->if_version;
+		(void) VOP_BWRITE(bp);
+	}
+
+#ifdef DIAGNOSTIC
+	if (ip->i_blocks < fsbtodb(fs, blocksreleased)) {
+		printf("lfs_truncate: block count < 0\n");
+		blocksreleased = ip->i_blocks;
+	}
+#endif
+	ip->i_blocks -= fsbtodb(fs, blocksreleased);
+	fs->lfs_bfree +=  fsbtodb(fs, blocksreleased);
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	/*
+	 * Traverse dirty block list counting number of dirty buffers
+	 * that are being deleted out of the cache, so that the lfs_avail
+	 * field can be updated.
+	 */
+	a_released = 0;
+	i_released = 0;
+	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next)
+		if (bp->b_flags & B_LOCKED) {
+			++a_released;
+			/*
+			 * XXX
+			 * When buffers are created in the cache, their block
+			 * number is set equal to their logical block number.
+			 * If that is still true, we are assuming that the
+			 * blocks are new (not yet on disk) and weren't
+			 * counted above.  However, there is a slight chance
+			 * that a block's disk address is equal to its logical
+			 * block number in which case, we'll get an overcounting
+			 * here.
+			 */
+			if (bp->b_blkno == bp->b_lblkno)
+				++i_released;
+		}
+	blocksreleased = fsbtodb(fs, i_released);
+#ifdef DIAGNOSTIC
+	if (blocksreleased > ip->i_blocks) {
+		printf("lfs_inode: Warning! %s\n",
+		    "more blocks released from inode than are in inode");
+		blocksreleased = ip->i_blocks;
+	}
+#endif
+	fs->lfs_bfree += blocksreleased;
+	ip->i_blocks -= blocksreleased;
+#ifdef DIAGNOSTIC
+	if (length == 0 && ip->i_blocks != 0)
+		printf("lfs_inode: Warning! %s%d%s\n",
+		    "Truncation to zero, but ", ip->i_blocks,
+		    " blocks left on inode");
+#endif
+	fs->lfs_avail += fsbtodb(fs, a_released);
+	e1 = vinvalbuf(vp, (length > 0) ? V_SAVE : 0, ap->a_cred, ap->a_p,
+	    0, 0); 
+	e2 = VOP_UPDATE(vp, &tv, &tv, 0);
+	return (e1 ? e1 : e2 ? e2 : 0);
+}
diff --git a/sys/ufs/lfs/lfs_segment.c b/sys/ufs/lfs/lfs_segment.c
new file mode 100644
index 0000000..249d59d
--- /dev/null
+++ b/sys/ufs/lfs/lfs_segment.c
@@ -0,0 +1,1111 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_segment.c	8.5 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/resourcevar.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+extern int count_lock_queue __P((void));
+
+#define MAX_ACTIVE	10
+/*
+ * Determine if it's OK to start a partial in this segment, or if we need
+ * to go on to a new segment.
+ */
+#define	LFS_PARTIAL_FITS(fs) \
+	((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \
+	1 << (fs)->lfs_fsbtodb)
+
+void	 lfs_callback __P((struct buf *));
+void	 lfs_gather __P((struct lfs *, struct segment *,
+	     struct vnode *, int (*) __P((struct lfs *, struct buf *))));
+int	 lfs_gatherblock __P((struct segment *, struct buf *, int *));
+void	 lfs_iset __P((struct inode *, daddr_t, time_t));
+int	 lfs_match_data __P((struct lfs *, struct buf *));
+int	 lfs_match_dindir __P((struct lfs *, struct buf *));
+int	 lfs_match_indir __P((struct lfs *, struct buf *));
+int	 lfs_match_tindir __P((struct lfs *, struct buf *));
+void	 lfs_newseg __P((struct lfs *));
+void	 lfs_shellsort __P((struct buf **, daddr_t *, register int));
+void	 lfs_supercallback __P((struct buf *));
+void	 lfs_updatemeta __P((struct segment *));
+int	 lfs_vref __P((struct vnode *));
+void	 lfs_vunref __P((struct vnode *));
+void	 lfs_writefile __P((struct lfs *, struct segment *, struct vnode *));
+int	 lfs_writeinode __P((struct lfs *, struct segment *, struct inode *));
+int	 lfs_writeseg __P((struct lfs *, struct segment *));
+void	 lfs_writesuper __P((struct lfs *));
+void	 lfs_writevnodes __P((struct lfs *fs, struct mount *mp,
+	    struct segment *sp, int dirops));
+
+int	lfs_allclean_wakeup;		/* Cleaner wakeup address. */
+
+/* Statistics Counters */
+#define DOSTATS
+struct lfs_stats lfs_stats;
+
+/* op values to lfs_writevnodes */
+#define	VN_REG	0
+#define	VN_DIROP	1
+#define	VN_EMPTY	2
+
+/*
+ * Ifile and meta data blocks are not marked busy, so segment writes MUST be
+ * single threaded.  Currently, there are two paths into lfs_segwrite, sync()
+ * and getnewbuf().  They both mark the file system busy.  Lfs_vflush()
+ * explicitly marks the file system busy.  So lfs_segwrite is safe.  I think.
+ */
+
+int
+lfs_vflush(vp)
+	struct vnode *vp;
+{
+	struct inode *ip;
+	struct lfs *fs;
+	struct segment *sp;
+
+	fs = VFSTOUFS(vp->v_mount)->um_lfs;
+	if (fs->lfs_nactive > MAX_ACTIVE)
+		return(lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP));
+	lfs_seglock(fs, SEGM_SYNC);
+	sp = fs->lfs_sp;
+
+
+	ip = VTOI(vp);
+	if (vp->v_dirtyblkhd.lh_first == NULL)
+		lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY);
+
+	do {
+		do {
+			if (vp->v_dirtyblkhd.lh_first != NULL)
+				lfs_writefile(fs, sp, vp);
+		} while (lfs_writeinode(fs, sp, ip));
+
+	} while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM);
+
+#ifdef DOSTATS
+	++lfs_stats.nwrites;
+	if (sp->seg_flags & SEGM_SYNC)
+		++lfs_stats.nsync_writes;
+	if (sp->seg_flags & SEGM_CKP)
+		++lfs_stats.ncheckpoints;
+#endif
+	lfs_segunlock(fs);
+	return (0);
+}
+
+void
+lfs_writevnodes(fs, mp, sp, op)
+	struct lfs *fs;
+	struct mount *mp;
+	struct segment *sp;
+	int op;
+{
+	struct inode *ip;
+	struct vnode *vp;
+
+loop:
+	for (vp = mp->mnt_vnodelist.lh_first;
+	     vp != NULL;
+	     vp = vp->v_mntvnodes.le_next) {
+		/*
+		 * If the vnode that we are about to sync is no longer
+		 * associated with this mount point, start over.
+		 */
+		if (vp->v_mount != mp)
+			goto loop;
+
+		/* XXX ignore dirops for now
+		if (op == VN_DIROP && !(vp->v_flag & VDIROP) ||
+		    op != VN_DIROP && (vp->v_flag & VDIROP))
+			continue;
+		*/
+
+		if (op == VN_EMPTY && vp->v_dirtyblkhd.lh_first)
+			continue;
+
+		if (vp->v_type == VNON)
+			continue;
+
+		if (lfs_vref(vp))
+			continue;
+
+		/*
+		 * Write the inode/file if dirty and it's not the
+		 * the IFILE.
+		 */
+		ip = VTOI(vp);
+		if ((ip->i_flag &
+		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE) ||
+		    vp->v_dirtyblkhd.lh_first != NULL) &&
+		    ip->i_number != LFS_IFILE_INUM) {
+			if (vp->v_dirtyblkhd.lh_first != NULL)
+				lfs_writefile(fs, sp, vp);
+			(void) lfs_writeinode(fs, sp, ip);
+		}
+		vp->v_flag &= ~VDIROP;
+		lfs_vunref(vp);
+	}
+}
+
+int
+lfs_segwrite(mp, flags)
+	struct mount *mp;
+	int flags;			/* Do a checkpoint. */
+{
+	struct buf *bp;
+	struct inode *ip;
+	struct lfs *fs;
+	struct segment *sp;
+	struct vnode *vp;
+	SEGUSE *segusep;
+	daddr_t ibno;
+	CLEANERINFO *cip;
+	int clean, do_ckp, error, i;
+
+	fs = VFSTOUFS(mp)->um_lfs;
+
+ 	/*
+ 	 * If we have fewer than 2 clean segments, wait until cleaner
+	 * writes.
+ 	 */
+	do {
+		LFS_CLEANERINFO(cip, fs, bp);
+		clean = cip->clean;
+		brelse(bp);
+		if (clean <= 2) {
+			printf ("segs clean: %d\n", clean);
+			wakeup(&lfs_allclean_wakeup);
+			if (error = tsleep(&fs->lfs_avail, PRIBIO + 1,
+			    "lfs writer", 0))
+				return (error);
+		}
+	} while (clean <= 2 );
+
+	/*
+	 * Allocate a segment structure and enough space to hold pointers to
+	 * the maximum possible number of buffers which can be described in a
+	 * single summary block.
+	 */
+	do_ckp = flags & SEGM_CKP || fs->lfs_nactive > MAX_ACTIVE;
+	lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0));
+	sp = fs->lfs_sp;
+
+	lfs_writevnodes(fs, mp, sp, VN_REG);
+
+	/* XXX ignore ordering of dirops for now */
+	/* XXX
+	fs->lfs_writer = 1;
+	if (fs->lfs_dirops && (error =
+	    tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) {
+		free(sp->bpp, M_SEGMENT);
+		free(sp, M_SEGMENT); 
+		fs->lfs_writer = 0;
+		return (error);
+	}
+
+	lfs_writevnodes(fs, mp, sp, VN_DIROP);
+	*/
+
+	/*
+	 * If we are doing a checkpoint, mark everything since the
+	 * last checkpoint as no longer ACTIVE.
+	 */
+	if (do_ckp)
+		for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz;
+		     --ibno >= fs->lfs_cleansz; ) {
+			if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize,
+			    NOCRED, &bp))
+
+				panic("lfs: ifile read");
+			segusep = (SEGUSE *)bp->b_data;
+			for (i = fs->lfs_sepb; i--; segusep++)
+				segusep->su_flags &= ~SEGUSE_ACTIVE;
+				
+			error = VOP_BWRITE(bp);
+		}
+
+	if (do_ckp || fs->lfs_doifile) {
+redo:
+		vp = fs->lfs_ivnode;
+		while (vget(vp, 1));
+		ip = VTOI(vp);
+		if (vp->v_dirtyblkhd.lh_first != NULL)
+			lfs_writefile(fs, sp, vp);
+		(void)lfs_writeinode(fs, sp, ip);
+		vput(vp);
+		if (lfs_writeseg(fs, sp) && do_ckp)
+			goto redo;
+	} else
+		(void) lfs_writeseg(fs, sp);
+
+	/*
+	 * If the I/O count is non-zero, sleep until it reaches zero.  At the
+	 * moment, the user's process hangs around so we can sleep.
+	 */
+	/* XXX ignore dirops for now
+	fs->lfs_writer = 0;
+	fs->lfs_doifile = 0;
+	wakeup(&fs->lfs_dirops);
+	*/
+
+#ifdef DOSTATS
+	++lfs_stats.nwrites;
+	if (sp->seg_flags & SEGM_SYNC)
+		++lfs_stats.nsync_writes;
+	if (sp->seg_flags & SEGM_CKP)
+		++lfs_stats.ncheckpoints;
+#endif
+	lfs_segunlock(fs);
+	return (0);
+}
+
+/*
+ * Write the dirty blocks associated with a vnode.
+ */
+void
+lfs_writefile(fs, sp, vp)
+	struct lfs *fs;
+	struct segment *sp;
+	struct vnode *vp;
+{
+	struct buf *bp;
+	struct finfo *fip;
+	IFILE *ifp;
+
+	if (sp->seg_bytes_left < fs->lfs_bsize ||
+	    sp->sum_bytes_left < sizeof(struct finfo))
+		(void) lfs_writeseg(fs, sp);
+
+	sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t);
+	++((SEGSUM *)(sp->segsum))->ss_nfinfo;
+
+	fip = sp->fip;
+	fip->fi_nblocks = 0;
+	fip->fi_ino = VTOI(vp)->i_number;
+	LFS_IENTRY(ifp, fs, fip->fi_ino, bp);
+	fip->fi_version = ifp->if_version;
+	brelse(bp);
+
+	/*
+	 * It may not be necessary to write the meta-data blocks at this point,
+	 * as the roll-forward recovery code should be able to reconstruct the
+	 * list.
+	 */
+	lfs_gather(fs, sp, vp, lfs_match_data);
+	lfs_gather(fs, sp, vp, lfs_match_indir);
+	lfs_gather(fs, sp, vp, lfs_match_dindir);
+#ifdef TRIPLE
+	lfs_gather(fs, sp, vp, lfs_match_tindir);
+#endif
+
+	fip = sp->fip;
+	if (fip->fi_nblocks != 0) {
+		sp->fip =
+		    (struct finfo *)((caddr_t)fip + sizeof(struct finfo) +
+		    sizeof(daddr_t) * (fip->fi_nblocks - 1));
+		sp->start_lbp = &sp->fip->fi_blocks[0];
+	} else {
+		sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t);
+		--((SEGSUM *)(sp->segsum))->ss_nfinfo;
+	}
+}
+
+int
+lfs_writeinode(fs, sp, ip)
+	struct lfs *fs;
+	struct segment *sp;
+	struct inode *ip;
+{
+	struct buf *bp, *ibp;
+	IFILE *ifp;
+	SEGUSE *sup;
+	daddr_t daddr;
+	ino_t ino;
+	int error, i, ndx;
+	int redo_ifile = 0;
+
+	if (!(ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)))
+		return(0);
+
+	/* Allocate a new inode block if necessary. */
+	if (sp->ibp == NULL) {
+		/* Allocate a new segment if necessary. */
+		if (sp->seg_bytes_left < fs->lfs_bsize ||
+		    sp->sum_bytes_left < sizeof(daddr_t))
+			(void) lfs_writeseg(fs, sp);
+
+		/* Get next inode block. */
+		daddr = fs->lfs_offset;
+		fs->lfs_offset += fsbtodb(fs, 1);
+		sp->ibp = *sp->cbpp++ =
+		    lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr,
+		    fs->lfs_bsize);
+		/* Zero out inode numbers */
+		for (i = 0; i < INOPB(fs); ++i)
+			((struct dinode *)sp->ibp->b_data)[i].di_inumber = 0;
+		++sp->start_bpp;
+		fs->lfs_avail -= fsbtodb(fs, 1);
+		/* Set remaining space counters. */
+		sp->seg_bytes_left -= fs->lfs_bsize;
+		sp->sum_bytes_left -= sizeof(daddr_t);
+		ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) -
+		    sp->ninodes / INOPB(fs) - 1;
+		((daddr_t *)(sp->segsum))[ndx] = daddr;
+	}
+
+	/* Update the inode times and copy the inode onto the inode page. */
+	if (ip->i_flag & IN_MODIFIED)
+		--fs->lfs_uinodes;
+	ITIMES(ip, &time, &time);
+	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
+	bp = sp->ibp;
+	((struct dinode *)bp->b_data)[sp->ninodes % INOPB(fs)] = ip->i_din;
+	/* Increment inode count in segment summary block. */
+	++((SEGSUM *)(sp->segsum))->ss_ninos;
+
+	/* If this page is full, set flag to allocate a new page. */
+	if (++sp->ninodes % INOPB(fs) == 0)
+		sp->ibp = NULL;
+
+	/*
+	 * If updating the ifile, update the super-block.  Update the disk
+	 * address and access times for this inode in the ifile.
+	 */
+	ino = ip->i_number;
+	if (ino == LFS_IFILE_INUM) {
+		daddr = fs->lfs_idaddr;
+		fs->lfs_idaddr = bp->b_blkno;
+	} else {
+		LFS_IENTRY(ifp, fs, ino, ibp);
+		daddr = ifp->if_daddr;
+		ifp->if_daddr = bp->b_blkno;
+		error = VOP_BWRITE(ibp);
+	}
+
+	/*
+	 * No need to update segment usage if there was no former inode address
+	 * or if the last inode address is in the current partial segment.
+	 */
+	if (daddr != LFS_UNUSED_DADDR && 
+	    !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) {
+		LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
+#ifdef DIAGNOSTIC
+		if (sup->su_nbytes < sizeof(struct dinode)) {
+			/* XXX -- Change to a panic. */
+			printf("lfs: negative bytes (segment %d)\n",
+			    datosn(fs, daddr));
+			panic("negative bytes");
+		}
+#endif
+		sup->su_nbytes -= sizeof(struct dinode);
+		redo_ifile =
+		    (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
+		error = VOP_BWRITE(bp);
+	}
+	return (redo_ifile);
+}
+
+int
+lfs_gatherblock(sp, bp, sptr)
+	struct segment *sp;
+	struct buf *bp;
+	int *sptr;
+{
+	struct lfs *fs;
+	int version;
+
+	/*
+	 * If full, finish this segment.  We may be doing I/O, so
+	 * release and reacquire the splbio().
+	 */
+#ifdef DIAGNOSTIC
+	if (sp->vp == NULL)
+		panic ("lfs_gatherblock: Null vp in segment");
+#endif
+	fs = sp->fs;
+	if (sp->sum_bytes_left < sizeof(daddr_t) ||
+	    sp->seg_bytes_left < fs->lfs_bsize) {
+		if (sptr)
+			splx(*sptr);
+		lfs_updatemeta(sp);
+
+		version = sp->fip->fi_version;
+		(void) lfs_writeseg(fs, sp);
+
+		sp->fip->fi_version = version;
+		sp->fip->fi_ino = VTOI(sp->vp)->i_number;
+		/* Add the current file to the segment summary. */
+		++((SEGSUM *)(sp->segsum))->ss_nfinfo;
+		sp->sum_bytes_left -= 
+		    sizeof(struct finfo) - sizeof(daddr_t);
+
+		if (sptr)
+			*sptr = splbio();
+		return(1);
+	}
+
+	/* Insert into the buffer list, update the FINFO block. */
+	bp->b_flags |= B_GATHERED;
+	*sp->cbpp++ = bp;
+	sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno;
+
+	sp->sum_bytes_left -= sizeof(daddr_t);
+	sp->seg_bytes_left -= fs->lfs_bsize;
+	return(0);
+}
+
+void
+lfs_gather(fs, sp, vp, match)
+	struct lfs *fs;
+	struct segment *sp;
+	struct vnode *vp;
+	int (*match) __P((struct lfs *, struct buf *));
+{
+	struct buf *bp;
+	int s;
+
+	sp->vp = vp;
+	s = splbio();
+loop:	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) {
+		if (bp->b_flags & B_BUSY || !match(fs, bp) ||
+		    bp->b_flags & B_GATHERED)
+			continue;
+#ifdef DIAGNOSTIC
+		if (!(bp->b_flags & B_DELWRI))
+			panic("lfs_gather: bp not B_DELWRI");
+		if (!(bp->b_flags & B_LOCKED))
+			panic("lfs_gather: bp not B_LOCKED");
+#endif
+		if (lfs_gatherblock(sp, bp, &s))
+			goto loop;
+	}
+	splx(s);
+	lfs_updatemeta(sp);
+	sp->vp = NULL;
+}
+
+
+/*
+ * Update the metadata that points to the blocks listed in the FINFO
+ * array.
+ */
+void
+lfs_updatemeta(sp)
+	struct segment *sp;
+{
+	SEGUSE *sup;
+	struct buf *bp;
+	struct lfs *fs;
+	struct vnode *vp;
+	struct indir a[NIADDR + 2], *ap;
+	struct inode *ip;
+	daddr_t daddr, lbn, off;
+	int db_per_fsb, error, i, nblocks, num;
+
+	vp = sp->vp;
+	nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp;
+	if (vp == NULL || nblocks == 0) 
+		return;
+
+	/* Sort the blocks. */
+	if (!(sp->seg_flags & SEGM_CLEAN))
+		lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks);
+
+	/*
+	 * Assign disk addresses, and update references to the logical
+	 * block and the segment usage information.
+	 */
+	fs = sp->fs;
+	db_per_fsb = fsbtodb(fs, 1);
+	for (i = nblocks; i--; ++sp->start_bpp) {
+		lbn = *sp->start_lbp++;
+		(*sp->start_bpp)->b_blkno = off = fs->lfs_offset;
+		fs->lfs_offset += db_per_fsb;
+
+		if (error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL))
+			panic("lfs_updatemeta: ufs_bmaparray %d", error);
+		ip = VTOI(vp);
+		switch (num) {
+		case 0:
+			ip->i_db[lbn] = off;
+			break;
+		case 1:
+			ip->i_ib[a[0].in_off] = off;
+			break;
+		default:
+			ap = &a[num - 1];
+			if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
+				panic("lfs_updatemeta: bread bno %d",
+				    ap->in_lbn);
+			/*
+			 * Bread may create a new indirect block which needs
+			 * to get counted for the inode.
+			 */
+			if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) {
+printf ("Updatemeta allocating indirect block: shouldn't happen\n");
+				ip->i_blocks += btodb(fs->lfs_bsize);
+				fs->lfs_bfree -= btodb(fs->lfs_bsize);
+			}
+			((daddr_t *)bp->b_data)[ap->in_off] = off;
+			VOP_BWRITE(bp);
+		}
+
+		/* Update segment usage information. */
+		if (daddr != UNASSIGNED &&
+		    !(daddr >= fs->lfs_lastpseg && daddr <= off)) {
+			LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
+#ifdef DIAGNOSTIC
+			if (sup->su_nbytes < fs->lfs_bsize) {
+				/* XXX -- Change to a panic. */
+				printf("lfs: negative bytes (segment %d)\n",
+				    datosn(fs, daddr));
+				panic ("Negative Bytes");
+			}
+#endif
+			sup->su_nbytes -= fs->lfs_bsize;
+			error = VOP_BWRITE(bp);
+		}
+	}
+}
+
+/*
+ * Start a new segment.
+ */
+int
+lfs_initseg(fs)
+	struct lfs *fs;
+{
+	struct segment *sp;
+	SEGUSE *sup;
+	SEGSUM *ssp;
+	struct buf *bp;
+	int repeat;
+
+	sp = fs->lfs_sp;
+
+	repeat = 0;
+	/* Advance to the next segment. */
+	if (!LFS_PARTIAL_FITS(fs)) {
+		/* Wake up any cleaning procs waiting on this file system. */
+		wakeup(&lfs_allclean_wakeup);
+
+		lfs_newseg(fs);
+		repeat = 1;
+		fs->lfs_offset = fs->lfs_curseg;
+		sp->seg_number = datosn(fs, fs->lfs_curseg);
+		sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE;
+
+		/*
+		 * If the segment contains a superblock, update the offset
+		 * and summary address to skip over it.
+		 */
+		LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
+		if (sup->su_flags & SEGUSE_SUPERBLOCK) {
+			fs->lfs_offset += LFS_SBPAD / DEV_BSIZE;
+			sp->seg_bytes_left -= LFS_SBPAD;
+		}
+		brelse(bp);
+	} else {
+		sp->seg_number = datosn(fs, fs->lfs_curseg);
+		sp->seg_bytes_left = (fs->lfs_dbpseg -
+		    (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE;
+	}
+	fs->lfs_lastpseg = fs->lfs_offset;
+
+	sp->fs = fs;
+	sp->ibp = NULL;
+	sp->ninodes = 0;
+
+	/* Get a new buffer for SEGSUM and enter it into the buffer list. */
+	sp->cbpp = sp->bpp;
+	*sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset,
+	     LFS_SUMMARY_SIZE);
+	sp->segsum = (*sp->cbpp)->b_data;
+	bzero(sp->segsum, LFS_SUMMARY_SIZE);
+	sp->start_bpp = ++sp->cbpp;
+	fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE;
+
+	/* Set point to SEGSUM, initialize it. */
+	ssp = sp->segsum;
+	ssp->ss_next = fs->lfs_nextseg;
+	ssp->ss_nfinfo = ssp->ss_ninos = 0;
+
+	/* Set pointer to first FINFO, initialize it. */
+	sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM));
+	sp->fip->fi_nblocks = 0;
+	sp->start_lbp = &sp->fip->fi_blocks[0];
+
+	sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
+	sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM);
+
+	return(repeat);
+}
+
+/*
+ * Return the next segment to write.
+ */
+void
+lfs_newseg(fs)
+	struct lfs *fs;
+{
+	CLEANERINFO *cip;
+	SEGUSE *sup;
+	struct buf *bp;
+	int curseg, isdirty, sn;
+
+        LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp);
+        sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
+	sup->su_nbytes = 0;
+	sup->su_nsums = 0;
+	sup->su_ninos = 0;
+        (void) VOP_BWRITE(bp);
+
+	LFS_CLEANERINFO(cip, fs, bp);
+	--cip->clean;
+	++cip->dirty;
+	(void) VOP_BWRITE(bp);
+
+	fs->lfs_lastseg = fs->lfs_curseg;
+	fs->lfs_curseg = fs->lfs_nextseg;
+	for (sn = curseg = datosn(fs, fs->lfs_curseg);;) {
+		sn = (sn + 1) % fs->lfs_nseg;
+		if (sn == curseg)
+			panic("lfs_nextseg: no clean segments");
+		LFS_SEGENTRY(sup, fs, sn, bp);
+		isdirty = sup->su_flags & SEGUSE_DIRTY;
+		brelse(bp);
+		if (!isdirty)
+			break;
+	}
+
+	++fs->lfs_nactive;
+	fs->lfs_nextseg = sntoda(fs, sn);
+#ifdef DOSTATS
+	++lfs_stats.segsused;
+#endif
+}
+
+int
+lfs_writeseg(fs, sp)
+	struct lfs *fs;
+	struct segment *sp;
+{
+	extern int locked_queue_count;
+	struct buf **bpp, *bp, *cbp;
+	SEGUSE *sup;
+	SEGSUM *ssp;
+	dev_t i_dev;
+	size_t size;
+	u_long *datap, *dp;
+	int ch_per_blk, do_again, i, nblocks, num, s;
+	int (*strategy)__P((struct vop_strategy_args *));
+	struct vop_strategy_args vop_strategy_a;
+	u_short ninos;
+	char *p;
+
+	/*
+	 * If there are no buffers other than the segment summary to write
+	 * and it is not a checkpoint, don't do anything.  On a checkpoint,
+	 * even if there aren't any buffers, you need to write the superblock.
+	 */
+	if ((nblocks = sp->cbpp - sp->bpp) == 1)
+		return (0);
+
+	ssp = (SEGSUM *)sp->segsum;
+
+	/* Update the segment usage information. */
+	LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
+	ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs);
+	sup->su_nbytes += nblocks - 1 - ninos << fs->lfs_bshift;
+	sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode);
+	sup->su_nbytes += LFS_SUMMARY_SIZE;
+	sup->su_lastmod = time.tv_sec;
+	sup->su_ninos += ninos;
+	++sup->su_nsums;
+	do_again = !(bp->b_flags & B_GATHERED);
+	(void)VOP_BWRITE(bp);
+	/*
+	 * Compute checksum across data and then across summary; the first
+	 * block (the summary block) is skipped.  Set the create time here
+	 * so that it's guaranteed to be later than the inode mod times.
+	 *
+	 * XXX
+	 * Fix this to do it inline, instead of malloc/copy.
+	 */
+	datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK);
+	for (bpp = sp->bpp, i = nblocks - 1; i--;) {
+		if ((*++bpp)->b_flags & B_INVAL) {
+			if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long)))
+				panic("lfs_writeseg: copyin failed");
+		} else
+			*dp++ = ((u_long *)(*bpp)->b_data)[0];
+	}
+	ssp->ss_create = time.tv_sec;
+	ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long));
+	ssp->ss_sumsum =
+	    cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum));
+	free(datap, M_SEGMENT);
+#ifdef DIAGNOSTIC
+	if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE)
+		panic("lfs_writeseg: No diskspace for summary");
+#endif
+	fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE);
+
+	i_dev = VTOI(fs->lfs_ivnode)->i_dev;
+	strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
+
+	/*
+	 * When we simply write the blocks we lose a rotation for every block
+	 * written.  To avoid this problem, we allocate memory in chunks, copy
+	 * the buffers into the chunk and write the chunk.  MAXPHYS is the
+	 * largest size I/O devices can handle.
+	 * When the data is copied to the chunk, turn off the the B_LOCKED bit
+	 * and brelse the buffer (which will move them to the LRU list).  Add
+	 * the B_CALL flag to the buffer header so we can count I/O's for the
+	 * checkpoints and so we can release the allocated memory.
+	 *
+	 * XXX
+	 * This should be removed if the new virtual memory system allows us to
+	 * easily make the buffers contiguous in kernel memory and if that's
+	 * fast enough.
+	 */
+	ch_per_blk = MAXPHYS / fs->lfs_bsize;
+	for (bpp = sp->bpp, i = nblocks; i;) {
+		num = ch_per_blk;
+		if (num > i)
+			num = i;
+		i -= num;
+		size = num * fs->lfs_bsize;
+
+		cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp,
+		    (*bpp)->b_blkno, size);
+		cbp->b_dev = i_dev;
+		cbp->b_flags |= B_ASYNC | B_BUSY;
+
+		s = splbio();
+		++fs->lfs_iocount;
+		for (p = cbp->b_data; num--;) {
+			bp = *bpp++;
+			/*
+			 * Fake buffers from the cleaner are marked as B_INVAL.
+			 * We need to copy the data from user space rather than
+			 * from the buffer indicated.
+			 * XXX == what do I do on an error?
+			 */
+			if (bp->b_flags & B_INVAL) {
+				if (copyin(bp->b_saveaddr, p, bp->b_bcount))
+					panic("lfs_writeseg: copyin failed");
+			} else
+				bcopy(bp->b_data, p, bp->b_bcount);
+			p += bp->b_bcount;
+			if (bp->b_flags & B_LOCKED)
+				--locked_queue_count;
+			bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
+			     B_LOCKED | B_GATHERED);
+			if (bp->b_flags & B_CALL) {
+				/* if B_CALL, it was created with newbuf */
+				brelvp(bp);
+				if (!(bp->b_flags & B_INVAL))
+					free(bp->b_data, M_SEGMENT);
+				free(bp, M_SEGMENT);
+			} else {
+				bremfree(bp);
+				bp->b_flags |= B_DONE;
+				reassignbuf(bp, bp->b_vp);
+				brelse(bp);
+			}
+		}
+		++cbp->b_vp->v_numoutput;
+		splx(s);
+		cbp->b_bcount = p - (char *)cbp->b_data;
+		/*
+		 * XXXX This is a gross and disgusting hack.  Since these
+		 * buffers are physically addressed, they hang off the
+		 * device vnode (devvp).  As a result, they have no way
+		 * of getting to the LFS superblock or lfs structure to
+		 * keep track of the number of I/O's pending.  So, I am
+		 * going to stuff the fs into the saveaddr field of
+		 * the buffer (yuk).
+		 */
+		cbp->b_saveaddr = (caddr_t)fs;
+		vop_strategy_a.a_desc = VDESC(vop_strategy);
+		vop_strategy_a.a_bp = cbp;
+		(strategy)(&vop_strategy_a);
+	}
+	/*
+	 * XXX
+	 * Vinvalbuf can move locked buffers off the locked queue
+	 * and we have no way of knowing about this.  So, after
+	 * doing a big write, we recalculate how many bufers are
+	 * really still left on the locked queue.
+	 */
+	locked_queue_count = count_lock_queue();
+	wakeup(&locked_queue_count);
+#ifdef DOSTATS
+	++lfs_stats.psegwrites;
+	lfs_stats.blocktot += nblocks - 1;
+	if (fs->lfs_sp->seg_flags & SEGM_SYNC)
+		++lfs_stats.psyncwrites;
+	if (fs->lfs_sp->seg_flags & SEGM_CLEAN) {
+		++lfs_stats.pcleanwrites;
+		lfs_stats.cleanblocks += nblocks - 1;
+	}
+#endif
+	return (lfs_initseg(fs) || do_again);
+}
+
+void
+lfs_writesuper(fs)
+	struct lfs *fs;
+{
+	struct buf *bp;
+	dev_t i_dev;
+	int (*strategy) __P((struct vop_strategy_args *));
+	int s;
+	struct vop_strategy_args vop_strategy_a;
+
+	i_dev = VTOI(fs->lfs_ivnode)->i_dev;
+	strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
+
+	/* Checksum the superblock and copy it into a buffer. */
+	fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum));
+	bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0],
+	    LFS_SBPAD);
+	*(struct lfs *)bp->b_data = *fs;
+
+	/* XXX Toggle between first two superblocks; for now just write first */
+	bp->b_dev = i_dev;
+	bp->b_flags |= B_BUSY | B_CALL | B_ASYNC;
+	bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
+	bp->b_iodone = lfs_supercallback;
+	vop_strategy_a.a_desc = VDESC(vop_strategy);
+	vop_strategy_a.a_bp = bp;
+	s = splbio();
+	++bp->b_vp->v_numoutput;
+	splx(s);
+	(strategy)(&vop_strategy_a);
+}
+
+/*
+ * Logical block number match routines used when traversing the dirty block
+ * chain.
+ */
+int
+lfs_match_data(fs, bp)
+	struct lfs *fs;
+	struct buf *bp;
+{
+	return (bp->b_lblkno >= 0);
+}
+
+int
+lfs_match_indir(fs, bp)
+	struct lfs *fs;
+	struct buf *bp;
+{
+	int lbn;
+
+	lbn = bp->b_lblkno;
+	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
+}
+
+int
+lfs_match_dindir(fs, bp)
+	struct lfs *fs;
+	struct buf *bp;
+{
+	int lbn;
+
+	lbn = bp->b_lblkno;
+	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
+}
+
+int
+lfs_match_tindir(fs, bp)
+	struct lfs *fs;
+	struct buf *bp;
+{
+	int lbn;
+
+	lbn = bp->b_lblkno;
+	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
+}
+
+/*
+ * Allocate a new buffer header.
+ */
+struct buf *
+lfs_newbuf(vp, daddr, size)
+	struct vnode *vp;
+	daddr_t daddr;
+	size_t size;
+{
+	struct buf *bp;
+	size_t nbytes;
+
+	nbytes = roundup(size, DEV_BSIZE);
+	bp = malloc(sizeof(struct buf), M_SEGMENT, M_WAITOK);
+	bzero(bp, sizeof(struct buf));
+	if (nbytes)
+		bp->b_data = malloc(nbytes, M_SEGMENT, M_WAITOK);
+	bgetvp(vp, bp);
+	bp->b_bufsize = size;
+	bp->b_bcount = size;
+	bp->b_lblkno = daddr;
+	bp->b_blkno = daddr;
+	bp->b_error = 0;
+	bp->b_resid = 0;
+	bp->b_iodone = lfs_callback;
+	bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE;
+	return (bp);
+}
+
+void
+lfs_callback(bp)
+	struct buf *bp;
+{
+	struct lfs *fs;
+
+	fs = (struct lfs *)bp->b_saveaddr;
+#ifdef DIAGNOSTIC
+	if (fs->lfs_iocount == 0)
+		panic("lfs_callback: zero iocount\n");
+#endif
+	if (--fs->lfs_iocount == 0)
+		wakeup(&fs->lfs_iocount);
+
+	brelvp(bp);
+	free(bp->b_data, M_SEGMENT);
+	free(bp, M_SEGMENT);
+}
+
+void
+lfs_supercallback(bp)
+	struct buf *bp;
+{
+	brelvp(bp);
+	free(bp->b_data, M_SEGMENT);
+	free(bp, M_SEGMENT);
+}
+
+/*
+ * Shellsort (diminishing increment sort) from Data Structures and
+ * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
+ * see also Knuth Vol. 3, page 84.  The increments are selected from
+ * formula (8), page 95.  Roughly O(N^3/2).
+ */
+/*
+ * This is our own private copy of shellsort because we want to sort
+ * two parallel arrays (the array of buffer pointers and the array of
+ * logical block numbers) simultaneously.  Note that we cast the array
+ * of logical block numbers to a unsigned in this routine so that the
+ * negative block numbers (meta data blocks) sort AFTER the data blocks.
+ */
+void
+lfs_shellsort(bp_array, lb_array, nmemb)
+	struct buf **bp_array;
+	daddr_t *lb_array;
+	register int nmemb;
+{
+	static int __rsshell_increments[] = { 4, 1, 0 };
+	register int incr, *incrp, t1, t2;
+	struct buf *bp_temp;
+	u_long lb_temp;
+
+	for (incrp = __rsshell_increments; incr = *incrp++;)
+		for (t1 = incr; t1 < nmemb; ++t1)
+			for (t2 = t1 - incr; t2 >= 0;)
+				if (lb_array[t2] > lb_array[t2 + incr]) {
+					lb_temp = lb_array[t2];
+					lb_array[t2] = lb_array[t2 + incr];
+					lb_array[t2 + incr] = lb_temp;
+					bp_temp = bp_array[t2];
+					bp_array[t2] = bp_array[t2 + incr];
+					bp_array[t2 + incr] = bp_temp;
+					t2 -= incr;
+				} else
+					break;
+}
+
+/*
+ * Check VXLOCK.  Return 1 if the vnode is locked.  Otherwise, vget it.
+ */
+lfs_vref(vp)
+	register struct vnode *vp;
+{
+
+	if (vp->v_flag & VXLOCK)
+		return(1);
+	return (vget(vp, 0));
+}
+
+void
+lfs_vunref(vp)
+	register struct vnode *vp;
+{
+	extern int lfs_no_inactive;
+
+	/*
+	 * This is vrele except that we do not want to VOP_INACTIVE
+	 * this vnode. Rather than inline vrele here, we use a global
+	 * flag to tell lfs_inactive not to run. Yes, its gross.
+	 */
+	lfs_no_inactive = 1;
+	vrele(vp);
+	lfs_no_inactive = 0;
+}
diff --git a/sys/ufs/lfs/lfs_subr.c b/sys/ufs/lfs/lfs_subr.c
new file mode 100644
index 0000000..afcd8c2
--- /dev/null
+++ b/sys/ufs/lfs/lfs_subr.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_subr.c	8.2 (Berkeley) 9/21/93
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+/*
+ * Return buffer with the contents of block "offset" from the beginning of
+ * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+int
+lfs_blkatoff(ap)
+	struct vop_blkatoff_args /* {
+		struct vnode *a_vp;
+		off_t a_offset;
+		char **a_res;
+		struct buf **a_bpp;
+	} */ *ap;
+{
+	register struct lfs *fs;
+	struct inode *ip;
+	struct buf *bp;
+	daddr_t lbn;
+	int bsize, error;
+
+	ip = VTOI(ap->a_vp);
+	fs = ip->i_lfs;
+	lbn = lblkno(fs, ap->a_offset);
+	bsize = blksize(fs);
+
+	*ap->a_bpp = NULL;
+	if (error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) {
+		brelse(bp);
+		return (error);
+	}
+	if (ap->a_res)
+		*ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset);
+	*ap->a_bpp = bp;
+	return (0);
+}
+
+
+/*
+ * lfs_seglock --
+ *	Single thread the segment writer.
+ */
+void
+lfs_seglock(fs, flags)
+	struct lfs *fs;
+	unsigned long flags;
+{
+	struct segment *sp;
+	int s;
+
+	if (fs->lfs_seglock)
+		if (fs->lfs_lockpid == curproc->p_pid) {
+			++fs->lfs_seglock;
+			fs->lfs_sp->seg_flags |= flags;
+			return;			
+		} else while (fs->lfs_seglock)
+			(void)tsleep(&fs->lfs_seglock, PRIBIO + 1,
+			    "lfs seglock", 0);
+
+	fs->lfs_seglock = 1;
+	fs->lfs_lockpid = curproc->p_pid;
+
+	sp = fs->lfs_sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
+	sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
+	    sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
+	sp->seg_flags = flags;
+	sp->vp = NULL;
+	(void) lfs_initseg(fs);
+
+	/*
+	 * Keep a cumulative count of the outstanding I/O operations.  If the
+	 * disk drive catches up with us it could go to zero before we finish,
+	 * so we artificially increment it by one until we've scheduled all of
+	 * the writes we intend to do.
+	 */
+	s = splbio();
+	++fs->lfs_iocount;
+	splx(s);
+}
+/*
+ * lfs_segunlock --
+ *	Single thread the segment writer.
+ */
+void
+lfs_segunlock(fs)
+	struct lfs *fs;
+{
+	struct segment *sp;
+	unsigned long sync, ckp;
+	int s;
+
+	if (fs->lfs_seglock == 1) {
+
+		sp = fs->lfs_sp;
+		sync = sp->seg_flags & SEGM_SYNC;
+		ckp = sp->seg_flags & SEGM_CKP;
+		if (sp->bpp != sp->cbpp) {
+			/* Free allocated segment summary */
+			fs->lfs_offset -= LFS_SUMMARY_SIZE / DEV_BSIZE;
+			brelvp(*sp->bpp);
+			free((*sp->bpp)->b_data, M_SEGMENT);
+			free(*sp->bpp, M_SEGMENT);
+		} else
+			printf ("unlock to 0 with no summary");
+		free(sp->bpp, M_SEGMENT);
+		free(sp, M_SEGMENT);
+
+		/*
+		 * If the I/O count is non-zero, sleep until it reaches zero.
+		 * At the moment, the user's process hangs around so we can
+		 * sleep.
+		 */
+		s = splbio();
+		--fs->lfs_iocount;
+		/*
+		 * We let checkpoints happen asynchronously.  That means
+		 * that during recovery, we have to roll forward between
+		 * the two segments described by the first and second
+		 * superblocks to make sure that the checkpoint described
+		 * by a superblock completed.
+		 */
+		if (sync && fs->lfs_iocount)
+		    (void)tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0);
+		splx(s);
+		if (ckp) {
+			fs->lfs_nactive = 0;
+			lfs_writesuper(fs);
+		}
+		--fs->lfs_seglock;
+		fs->lfs_lockpid = 0;
+		wakeup(&fs->lfs_seglock);
+	} else if (fs->lfs_seglock == 0) {
+		panic ("Seglock not held");
+	} else {
+		--fs->lfs_seglock;
+	}
+}
diff --git a/sys/ufs/lfs/lfs_syscalls.c b/sys/ufs/lfs/lfs_syscalls.c
new file mode 100644
index 0000000..666595e
--- /dev/null
+++ b/sys/ufs/lfs/lfs_syscalls.c
@@ -0,0 +1,562 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_syscalls.c	8.5 (Berkeley) 4/20/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+#define BUMP_FIP(SP) \
+	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
+
+#define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
+#define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
+
+/*
+ * Before committing to add something to a segment summary, make sure there
+ * is enough room.  S is the bytes added to the summary.
+ */
+#define	CHECK_SEG(s)			\
+if (sp->sum_bytes_left < (s)) {		\
+	(void) lfs_writeseg(fs, sp);	\
+}
+struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
+
+/*
+ * lfs_markv:
+ *
+ * This will mark inodes and blocks dirty, so they are written into the log.
+ * It will block until all the blocks have been written.  The segment create
+ * time passed in the block_info and inode_info structures is used to decide
+ * if the data is valid for each block (in case some process dirtied a block
+ * or inode that is being cleaned between the determination that a block is
+ * live and the lfs_markv call).
+ *
+ *  0 on success
+ * -1/errno is return on error.
+ */
+struct lfs_markv_args {
+	fsid_t *fsidp;		/* file system */
+	BLOCK_INFO *blkiov;	/* block array */
+	int blkcnt;		/* count of block array entries */
+};
+int
+lfs_markv(p, uap, retval)
+	struct proc *p;
+	struct lfs_markv_args *uap;
+	int *retval;
+{
+	struct segment *sp;
+	BLOCK_INFO *blkp;
+	IFILE *ifp;
+	struct buf *bp, **bpp;
+	struct inode *ip;
+	struct lfs *fs;
+	struct mount *mntp;
+	struct vnode *vp;
+	fsid_t fsid;
+	void *start;
+	ino_t lastino;
+	daddr_t b_daddr, v_daddr;
+	u_long bsize;
+	int cnt, error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+
+	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+		return (error);
+	if ((mntp = getvfs(&fsid)) == NULL)
+		return (EINVAL);
+
+	cnt = uap->blkcnt;
+	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
+	if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
+		goto err1;
+
+	/* Mark blocks/inodes dirty.  */
+	fs = VFSTOUFS(mntp)->um_lfs;
+	bsize = fs->lfs_bsize;
+	error = 0;
+
+	lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
+	sp = fs->lfs_sp;
+	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
+	    blkp = start; cnt--; ++blkp) {
+		/*
+		 * Get the IFILE entry (only once) and see if the file still
+		 * exists.
+		 */
+		if (lastino != blkp->bi_inode) {
+			if (lastino != LFS_UNUSED_INUM) {
+				/* Finish up last file */
+				if (sp->fip->fi_nblocks == 0) {
+					DEC_FINFO(sp);
+					sp->sum_bytes_left +=
+					    sizeof(FINFO) - sizeof(daddr_t);
+				} else {
+					lfs_updatemeta(sp);
+					BUMP_FIP(sp);
+				}
+
+				lfs_writeinode(fs, sp, ip);
+				lfs_vunref(vp);
+			}
+
+			/* Start a new file */
+			CHECK_SEG(sizeof(FINFO));
+			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
+			INC_FINFO(sp);
+			sp->start_lbp = &sp->fip->fi_blocks[0];
+			sp->vp = NULL;
+			sp->fip->fi_version = blkp->bi_version;
+			sp->fip->fi_nblocks = 0;
+			sp->fip->fi_ino = blkp->bi_inode;
+			lastino = blkp->bi_inode;
+			if (blkp->bi_inode == LFS_IFILE_INUM)
+				v_daddr = fs->lfs_idaddr;
+			else {
+				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
+				v_daddr = ifp->if_daddr;
+				brelse(bp);
+			}
+			if (v_daddr == LFS_UNUSED_DADDR)
+				continue;
+
+			/* Get the vnode/inode. */
+			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
+			    blkp->bi_lbn == LFS_UNUSED_LBN ? 
+			    blkp->bi_bp : NULL)) {
+#ifdef DIAGNOSTIC
+				printf("lfs_markv: VFS_VGET failed (%d)\n",
+				    blkp->bi_inode);
+#endif
+				lastino = LFS_UNUSED_INUM;
+				v_daddr = LFS_UNUSED_DADDR;
+				continue;
+			}
+			sp->vp = vp;
+			ip = VTOI(vp);
+		} else if (v_daddr == LFS_UNUSED_DADDR)
+			continue;
+
+		/* If this BLOCK_INFO didn't contain a block, keep going. */
+		if (blkp->bi_lbn == LFS_UNUSED_LBN)
+			continue;
+		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
+		    b_daddr != blkp->bi_daddr)
+			continue;
+		/*
+		 * If we got to here, then we are keeping the block.  If it
+		 * is an indirect block, we want to actually put it in the
+		 * buffer cache so that it can be updated in the finish_meta
+		 * section.  If it's not, we need to allocate a fake buffer
+		 * so that writeseg can perform the copyin and write the buffer.
+		 */
+		if (blkp->bi_lbn >= 0)	/* Data Block */
+			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
+			    blkp->bi_bp);
+		else {
+			bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
+			if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
+			    (error = copyin(blkp->bi_bp, bp->b_data,
+			    bsize)))
+				goto err2;
+			if (error = VOP_BWRITE(bp))
+				goto err2;
+		}
+		while (lfs_gatherblock(sp, bp, NULL));
+	}
+	if (sp->vp) {
+		if (sp->fip->fi_nblocks == 0) {
+			DEC_FINFO(sp);
+			sp->sum_bytes_left +=
+			    sizeof(FINFO) - sizeof(daddr_t);
+		} else
+			lfs_updatemeta(sp);
+
+		lfs_writeinode(fs, sp, ip);
+		lfs_vunref(vp);
+	}
+	(void) lfs_writeseg(fs, sp);
+	lfs_segunlock(fs);
+	free(start, M_SEGMENT);
+	return (error);
+
+/*
+ * XXX
+ * If we come in to error 2, we might have indirect blocks that were
+ * updated and now have bad block pointers.  I don't know what to do
+ * about this.
+ */
+
+err2:	lfs_vunref(vp);
+	/* Free up fakebuffers */
+	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
+		if ((*bpp)->b_flags & B_CALL) {
+			brelvp(*bpp);
+			free(*bpp, M_SEGMENT);
+		} else
+			brelse(*bpp);
+	lfs_segunlock(fs);
+err1:	
+	free(start, M_SEGMENT);
+	return (error);
+}
+
+/*
+ * lfs_bmapv:
+ *
+ * This will fill in the current disk address for arrays of blocks.
+ *
+ *  0 on success
+ * -1/errno is return on error.
+ */
+struct lfs_bmapv_args {
+	fsid_t *fsidp;		/* file system */
+	BLOCK_INFO *blkiov;	/* block array */
+	int blkcnt;		/* count of block array entries */
+};
+int
+lfs_bmapv(p, uap, retval)
+	struct proc *p;
+	struct lfs_bmapv_args *uap;
+	int *retval;
+{
+	BLOCK_INFO *blkp;
+	struct mount *mntp;
+	struct vnode *vp;
+	fsid_t fsid;
+	void *start;
+	daddr_t daddr;
+	int cnt, error, step;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+
+	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+		return (error);
+	if ((mntp = getvfs(&fsid)) == NULL)
+		return (EINVAL);
+
+	cnt = uap->blkcnt;
+	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
+	if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
+		free(blkp, M_SEGMENT);
+		return (error);
+	}
+
+	for (step = cnt; step--; ++blkp) {
+		if (blkp->bi_lbn == LFS_UNUSED_LBN)
+			continue;
+		/* Could be a deadlock ? */
+		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
+			daddr = LFS_UNUSED_DADDR;
+		else {
+			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
+				daddr = LFS_UNUSED_DADDR;
+			vput(vp);
+		}
+		blkp->bi_daddr = daddr;
+        }
+	copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
+	free(start, M_SEGMENT);
+	return (0);
+}
+
+/*
+ * lfs_segclean:
+ *
+ * Mark the segment clean.
+ *
+ *  0 on success
+ * -1/errno is return on error.
+ */
+struct lfs_segclean_args {
+	fsid_t *fsidp;		/* file system */
+	u_long segment;		/* segment number */
+}; 
+int
+lfs_segclean(p, uap, retval)
+	struct proc *p;
+	struct lfs_segclean_args *uap;
+	int *retval;
+{
+	CLEANERINFO *cip;
+	SEGUSE *sup;
+	struct buf *bp;
+	struct mount *mntp;
+	struct lfs *fs;
+	fsid_t fsid;
+	int error;
+
+	if (error = suser(p->p_ucred, &p->p_acflag))
+		return (error);
+
+	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+		return (error);
+	if ((mntp = getvfs(&fsid)) == NULL)
+		return (EINVAL);
+
+	fs = VFSTOUFS(mntp)->um_lfs;
+
+	if (datosn(fs, fs->lfs_curseg) == uap->segment)
+		return (EBUSY);
+
+	LFS_SEGENTRY(sup, fs, uap->segment, bp);
+	if (sup->su_flags & SEGUSE_ACTIVE) {
+		brelse(bp);
+		return (EBUSY);
+	}
+	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
+	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
+	    sup->su_ninos * btodb(fs->lfs_bsize);
+	sup->su_flags &= ~SEGUSE_DIRTY;
+	(void) VOP_BWRITE(bp);
+
+	LFS_CLEANERINFO(cip, fs, bp);
+	++cip->clean;
+	--cip->dirty;
+	(void) VOP_BWRITE(bp);
+	wakeup(&fs->lfs_avail);
+	return (0);
+}
+
+/*
+ * lfs_segwait:
+ *
+ * This will block until a segment in file system fsid is written.  A timeout
+ * in milliseconds may be specified which will awake the cleaner automatically.
+ * An fsid of -1 means any file system, and a timeout of 0 means forever.
+ *
+ *  0 on success
+ *  1 on timeout
+ * -1/errno is return on error.
+ */
+struct lfs_segwait_args {
+	fsid_t *fsidp;		/* file system */
+	struct timeval *tv;	/* timeout */
+};
+int
+lfs_segwait(p, uap, retval)
+	struct proc *p;
+	struct lfs_segwait_args *uap;
+	int *retval;
+{
+	extern int lfs_allclean_wakeup;
+	struct mount *mntp;
+	struct timeval atv;
+	fsid_t fsid;
+	void *addr;
+	u_long timeout;
+	int error, s;
+
+	if (error = suser(p->p_ucred, &p->p_acflag)) {
+		return (error);
+}
+#ifdef WHEN_QUADS_WORK
+	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+		return (error);
+	if (fsid == (fsid_t)-1)
+		addr = &lfs_allclean_wakeup;
+	else {
+		if ((mntp = getvfs(&fsid)) == NULL)
+			return (EINVAL);
+		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
+	}
+#else
+	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
+		return (error);
+	if ((mntp = getvfs(&fsid)) == NULL)
+		addr = &lfs_allclean_wakeup;
+	else
+		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
+#endif
+
+	if (uap->tv) {
+		if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
+			return (error);
+		if (itimerfix(&atv))
+			return (EINVAL);
+		s = splclock();
+		timevaladd(&atv, (struct timeval *)&time);
+		timeout = hzto(&atv);
+		splx(s);
+	} else
+		timeout = 0;
+
+	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
+	return (error == ERESTART ? EINTR : 0);
+}
+
+/*
+ * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
+ * daddr from the ifile, so don't look it up again.  If the cleaner is
+ * processing IINFO structures, it may have the ondisk inode already, so
+ * don't go retrieving it again.
+ */
+int
+lfs_fastvget(mp, ino, daddr, vpp, dinp)
+	struct mount *mp;
+	ino_t ino;
+	daddr_t daddr;
+	struct vnode **vpp;
+	struct dinode *dinp;
+{
+	register struct inode *ip;
+	struct vnode *vp;
+	struct ufsmount *ump;
+	struct buf *bp;
+	dev_t dev;
+	int error;
+
+	ump = VFSTOUFS(mp);
+	dev = ump->um_dev;
+	/*
+	 * This is playing fast and loose.  Someone may have the inode
+	 * locked, in which case they are going to be distinctly unhappy
+	 * if we trash something.
+	 */
+	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
+		lfs_vref(*vpp);
+		if ((*vpp)->v_flag & VXLOCK)
+			printf ("Cleaned vnode VXLOCKED\n");
+		ip = VTOI(*vpp);
+		if (ip->i_flags & IN_LOCKED)
+			printf("cleaned vnode locked\n");
+		if (!(ip->i_flag & IN_MODIFIED)) {
+			++ump->um_lfs->lfs_uinodes;
+			ip->i_flag |= IN_MODIFIED;
+		}
+		ip->i_flag |= IN_MODIFIED;
+		return (0);
+	}
+
+	/* Allocate new vnode/inode. */
+	if (error = lfs_vcreate(mp, ino, &vp)) {
+		*vpp = NULL;
+		return (error);
+	}
+
+	/*
+	 * Put it onto its hash chain and lock it so that other requests for
+	 * this inode will block if they arrive while we are sleeping waiting
+	 * for old data structures to be purged or for the contents of the
+	 * disk portion of this inode to be read.
+	 */
+	ip = VTOI(vp);
+	ufs_ihashins(ip);
+
+	/*
+	 * XXX
+	 * This may not need to be here, logically it should go down with
+	 * the i_devvp initialization.
+	 * Ask Kirk.
+	 */
+	ip->i_lfs = ump->um_lfs;
+
+	/* Read in the disk contents for the inode, copy into the inode. */
+	if (dinp)
+		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
+			return (error);
+	else {
+		if (error = bread(ump->um_devvp, daddr,
+		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
+			/*
+			 * The inode does not contain anything useful, so it
+			 * would be misleading to leave it on its hash chain.
+			 * Iput() will return it to the free list.
+			 */
+			ufs_ihashrem(ip);
+
+			/* Unlock and discard unneeded inode. */
+			lfs_vunref(vp);
+			brelse(bp);
+			*vpp = NULL;
+			return (error);
+		}
+		ip->i_din =
+		    *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
+		brelse(bp);
+	}
+
+	/* Inode was just read from user space or disk, make sure it's locked */
+	ip->i_flag |= IN_LOCKED;
+
+	/*
+	 * Initialize the vnode from the inode, check for aliases.  In all
+	 * cases re-init ip, the underlying vnode/inode may have changed.
+	 */
+	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
+		lfs_vunref(vp);
+		*vpp = NULL;
+		return (error);
+	}
+	/*
+	 * Finish inode initialization now that aliasing has been resolved.
+	 */
+	ip->i_devvp = ump->um_devvp;
+	ip->i_flag |= IN_MODIFIED;
+	++ump->um_lfs->lfs_uinodes;
+	VREF(ip->i_devvp);
+	*vpp = vp;
+	return (0);
+}
+struct buf *
+lfs_fakebuf(vp, lbn, size, uaddr)
+	struct vnode *vp;
+	int lbn;
+	size_t size;
+	caddr_t uaddr;
+{
+	struct buf *bp;
+
+	bp = lfs_newbuf(vp, lbn, 0);
+	bp->b_saveaddr = uaddr;
+	bp->b_bufsize = size;
+	bp->b_bcount = size;
+	bp->b_flags |= B_INVAL;
+	return (bp);
+}
diff --git a/sys/ufs/lfs/lfs_vfsops.c b/sys/ufs/lfs/lfs_vfsops.c
new file mode 100644
index 0000000..0c8186e
--- /dev/null
+++ b/sys/ufs/lfs/lfs_vfsops.c
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) 1989, 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_vfsops.c	8.7 (Berkeley) 4/16/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/file.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+int lfs_mountfs __P((struct vnode *, struct mount *, struct proc *));
+
+struct vfsops lfs_vfsops = {
+	lfs_mount,
+	ufs_start,
+	lfs_unmount,
+	ufs_root,
+	ufs_quotactl,
+	lfs_statfs,
+	lfs_sync,
+	lfs_vget,
+	lfs_fhtovp,
+	lfs_vptofh,
+	lfs_init,
+};
+
+int
+lfs_mountroot()
+{
+	panic("lfs_mountroot");		/* XXX -- implement */
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+lfs_mount(mp, path, data, ndp, p)
+	register struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *devvp;
+	struct ufs_args args;
+	struct ufsmount *ump;
+	register struct lfs *fs;				/* LFS */
+	u_int size;
+	int error;
+
+	if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)))
+		return (error);
+
+	/* Until LFS can do NFS right.		XXX */
+	if (args.export.ex_flags & MNT_EXPORTED)
+		return (EINVAL);
+
+	/*
+	 * If updating, check whether changing from read-only to
+	 * read/write; if there is no device name, that's all we do.
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		ump = VFSTOUFS(mp);
+#ifdef NOTLFS							/* LFS */
+		fs = ump->um_fs;
+		if (fs->fs_ronly && (mp->mnt_flag & MNT_RDONLY) == 0)
+			fs->fs_ronly = 0;
+#else
+		fs = ump->um_lfs;
+		if (fs->lfs_ronly && (mp->mnt_flag & MNT_RDONLY) == 0)
+			fs->lfs_ronly = 0;
+#endif
+		if (args.fspec == 0) {
+			/*
+			 * Process export requests.
+			 */
+			return (vfs_export(mp, &ump->um_export, &args.export));
+		}
+	}
+	/*
+	 * Not an update, or updating the name: look up the name
+	 * and verify that it refers to a sensible block device.
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+	if (error = namei(ndp))
+		return (error);
+	devvp = ndp->ni_vp;
+	if (devvp->v_type != VBLK) {
+		vrele(devvp);
+		return (ENOTBLK);
+	}
+	if (major(devvp->v_rdev) >= nblkdev) {
+		vrele(devvp);
+		return (ENXIO);
+	}
+	if ((mp->mnt_flag & MNT_UPDATE) == 0)
+		error = lfs_mountfs(devvp, mp, p);		/* LFS */
+	else {
+		if (devvp != ump->um_devvp)
+			error = EINVAL;	/* needs translation */
+		else
+			vrele(devvp);
+	}
+	if (error) {
+		vrele(devvp);
+		return (error);
+	}
+	ump = VFSTOUFS(mp);
+	fs = ump->um_lfs;					/* LFS */
+#ifdef NOTLFS							/* LFS */
+	(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
+	bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
+	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) ufs_statfs(mp, &mp->mnt_stat, p);
+#else
+	(void)copyinstr(path, fs->lfs_fsmnt, sizeof(fs->lfs_fsmnt) - 1, &size);
+	bzero(fs->lfs_fsmnt + size, sizeof(fs->lfs_fsmnt) - size);
+	bcopy((caddr_t)fs->lfs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) lfs_statfs(mp, &mp->mnt_stat, p);
+#endif
+	return (0);
+}
+
+/*
+ * Common code for mount and mountroot
+ * LFS specific
+ */
+int
+lfs_mountfs(devvp, mp, p)
+	register struct vnode *devvp;
+	struct mount *mp;
+	struct proc *p;
+{
+	extern struct vnode *rootvp;
+	register struct lfs *fs;
+	register struct ufsmount *ump;
+	struct vnode *vp;
+	struct buf *bp;
+	struct partinfo dpart;
+	dev_t dev;
+	int error, i, ronly, size;
+
+	/*
+	 * Disallow multiple mounts of the same device.
+	 * Disallow mounting of a device that is currently in use
+	 * (except for root, which might share swap device for miniroot).
+	 * Flush out any old buffers remaining from a previous use.
+	 */
+	if (error = vfs_mountedon(devvp))
+		return (error);
+	if (vcount(devvp) > 1 && devvp != rootvp)
+		return (EBUSY);
+	if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))
+		return (error);
+
+	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+	if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
+		return (error);
+
+	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
+		size = DEV_BSIZE;
+	else {
+		size = dpart.disklab->d_secsize;
+#ifdef NEVER_USED
+		dpart.part->p_fstype = FS_LFS;
+		dpart.part->p_fsize = fs->lfs_fsize;	/* frag size */
+		dpart.part->p_frag = fs->lfs_frag;	/* frags per block */
+		dpart.part->p_cpg = fs->lfs_segshift;	/* segment shift */
+#endif
+	}
+
+	/* Don't free random space on error. */
+	bp = NULL;
+	ump = NULL;
+
+	/* Read in the superblock. */
+	if (error = bread(devvp, LFS_LABELPAD / size, LFS_SBPAD, NOCRED, &bp))
+		goto out;
+	fs = (struct lfs *)bp->b_data;
+
+	/* Check the basics. */
+	if (fs->lfs_magic != LFS_MAGIC || fs->lfs_bsize > MAXBSIZE ||
+	    fs->lfs_bsize < sizeof(struct lfs)) {
+		error = EINVAL;		/* XXX needs translation */
+		goto out;
+	}
+
+	/* Allocate the mount structure, copy the superblock into it. */
+	ump = (struct ufsmount *)malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
+	fs = ump->um_lfs = malloc(sizeof(struct lfs), M_UFSMNT, M_WAITOK);
+	bcopy(bp->b_data, fs, sizeof(struct lfs));
+	if (sizeof(struct lfs) < LFS_SBPAD)			/* XXX why? */
+		bp->b_flags |= B_INVAL;
+	brelse(bp);
+	bp = NULL;
+
+	/* Set up the I/O information */
+	fs->lfs_iocount = 0;
+
+	/* Set up the ifile and lock aflags */
+	fs->lfs_doifile = 0;
+	fs->lfs_writer = 0;
+	fs->lfs_dirops = 0;
+	fs->lfs_seglock = 0;
+
+	/* Set the file system readonly/modify bits. */
+	fs->lfs_ronly = ronly;
+	if (ronly == 0)
+		fs->lfs_fmod = 1;
+
+	/* Initialize the mount structure. */
+	dev = devvp->v_rdev;
+	mp->mnt_data = (qaddr_t)ump;
+	mp->mnt_stat.f_fsid.val[0] = (long)dev;
+	mp->mnt_stat.f_fsid.val[1] = MOUNT_LFS;
+	mp->mnt_flag |= MNT_LOCAL;
+	ump->um_mountp = mp;
+	ump->um_dev = dev;
+	ump->um_devvp = devvp;
+	ump->um_bptrtodb = 0;
+	ump->um_seqinc = 1 << fs->lfs_fsbtodb;
+	ump->um_nindir = fs->lfs_nindir;
+	for (i = 0; i < MAXQUOTAS; i++)
+		ump->um_quotas[i] = NULLVP;
+	devvp->v_specflags |= SI_MOUNTEDON;
+
+	/*
+	 * We use the ifile vnode for almost every operation.  Instead of
+	 * retrieving it from the hash table each time we retrieve it here,
+	 * artificially increment the reference count and keep a pointer
+	 * to it in the incore copy of the superblock.
+	 */
+	if (error = VFS_VGET(mp, LFS_IFILE_INUM, &vp))
+		goto out;
+	fs->lfs_ivnode = vp;
+	VREF(vp);
+	vput(vp);
+
+	return (0);
+out:
+	if (bp)
+		brelse(bp);
+	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+	if (ump) {
+		free(ump->um_lfs, M_UFSMNT);
+		free(ump, M_UFSMNT);
+		mp->mnt_data = (qaddr_t)0;
+	}
+	return (error);
+}
+
+/*
+ * unmount system call
+ */
+lfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	extern int doforce;
+	register struct ufsmount *ump;
+	register struct lfs *fs;
+	int i, error, flags, ronly;
+
+	flags = 0;
+	if (mntflags & MNT_FORCE) {
+		if (!doforce || (mp->mnt_flag & MNT_ROOTFS))
+			return (EINVAL);
+		flags |= FORCECLOSE;
+	}
+
+	ump = VFSTOUFS(mp);
+	fs = ump->um_lfs;
+#ifdef QUOTA
+	if (mp->mnt_flag & MNT_QUOTA) {
+		if (error = vflush(mp, fs->lfs_ivnode, SKIPSYSTEM|flags))
+			return (error);
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if (ump->um_quotas[i] == NULLVP)
+				continue;
+			quotaoff(p, mp, i);
+		}
+		/*
+		 * Here we fall through to vflush again to ensure
+		 * that we have gotten rid of all the system vnodes.
+		 */
+	}
+#endif
+	if (error = vflush(mp, fs->lfs_ivnode, flags))
+		return (error);
+	fs->lfs_clean = 1;
+	if (error = VFS_SYNC(mp, 1, p->p_ucred, p))
+		return (error);
+	if (fs->lfs_ivnode->v_dirtyblkhd.lh_first)
+		panic("lfs_unmount: still dirty blocks on ifile vnode\n");
+	vrele(fs->lfs_ivnode);
+	vgone(fs->lfs_ivnode);
+
+	ronly = !fs->lfs_ronly;
+	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
+	error = VOP_CLOSE(ump->um_devvp,
+	    ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+	vrele(ump->um_devvp);
+	free(fs, M_UFSMNT);
+	free(ump, M_UFSMNT);
+	mp->mnt_data = (qaddr_t)0;
+	mp->mnt_flag &= ~MNT_LOCAL;
+	return (error);
+}
+
+/*
+ * Get file system statistics.
+ */
+lfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	register struct statfs *sbp;
+	struct proc *p;
+{
+	register struct lfs *fs;
+	register struct ufsmount *ump;
+
+	ump = VFSTOUFS(mp);
+	fs = ump->um_lfs;
+	if (fs->lfs_magic != LFS_MAGIC)
+		panic("lfs_statfs: magic");
+	sbp->f_type = MOUNT_LFS;
+	sbp->f_bsize = fs->lfs_bsize;
+	sbp->f_iosize = fs->lfs_bsize;
+	sbp->f_blocks = dbtofsb(fs,fs->lfs_dsize);
+	sbp->f_bfree = dbtofsb(fs, fs->lfs_bfree);
+	sbp->f_bavail = (fs->lfs_dsize * (100 - fs->lfs_minfree) / 100) -
+		(fs->lfs_dsize - fs->lfs_bfree);
+	sbp->f_bavail = dbtofsb(fs, sbp->f_bavail);
+	sbp->f_files = fs->lfs_nfiles;
+	sbp->f_ffree = sbp->f_bfree * INOPB(fs);
+	if (sbp != &mp->mnt_stat) {
+		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
+			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
+		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
+			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
+	}
+	return (0);
+}
+
+/*
+ * Go through the disk queues to initiate sandbagged IO;
+ * go through the inodes to write those that have been modified;
+ * initiate the writing of the super block if it has been modified.
+ *
+ * Note: we are always called with the filesystem marked `MPBUSY'.
+ */
+lfs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	int error;
+
+	/* All syncs must be checkpoints until roll-forward is implemented. */
+	error = lfs_segwrite(mp, SEGM_CKP | (waitfor ? SEGM_SYNC : 0));
+#ifdef QUOTA
+	qsync(mp);
+#endif
+	return (error);
+}
+
+/*
+ * Look up an LFS dinode number to find its incore vnode.  If not already
+ * in core, read it in from the specified device.  Return the inode locked.
+ * Detection and handling of mount points must be done by the calling routine.
+ */
+int
+lfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	register struct lfs *fs;
+	register struct inode *ip;
+	struct buf *bp;
+	struct ifile *ifp;
+	struct vnode *vp;
+	struct ufsmount *ump;
+	daddr_t daddr;
+	dev_t dev;
+	int error;
+
+	ump = VFSTOUFS(mp);
+	dev = ump->um_dev;
+	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
+		return (0);
+
+	/* Translate the inode number to a disk address. */
+	fs = ump->um_lfs;
+	if (ino == LFS_IFILE_INUM)
+		daddr = fs->lfs_idaddr;
+	else {
+		LFS_IENTRY(ifp, fs, ino, bp);
+		daddr = ifp->if_daddr;
+		brelse(bp);
+		if (daddr == LFS_UNUSED_DADDR)
+			return (ENOENT);
+	}
+
+	/* Allocate new vnode/inode. */
+	if (error = lfs_vcreate(mp, ino, &vp)) {
+		*vpp = NULL;
+		return (error);
+	}
+
+	/*
+	 * Put it onto its hash chain and lock it so that other requests for
+	 * this inode will block if they arrive while we are sleeping waiting
+	 * for old data structures to be purged or for the contents of the
+	 * disk portion of this inode to be read.
+	 */
+	ip = VTOI(vp);
+	ufs_ihashins(ip);
+
+	/*
+	 * XXX
+	 * This may not need to be here, logically it should go down with
+	 * the i_devvp initialization.
+	 * Ask Kirk.
+	 */
+	ip->i_lfs = ump->um_lfs;
+
+	/* Read in the disk contents for the inode, copy into the inode. */
+	if (error =
+	    bread(ump->um_devvp, daddr, (int)fs->lfs_bsize, NOCRED, &bp)) {
+		/*
+		 * The inode does not contain anything useful, so it would
+		 * be misleading to leave it on its hash chain. With mode
+		 * still zero, it will be unlinked and returned to the free
+		 * list by vput().
+		 */
+		vput(vp);
+		brelse(bp);
+		*vpp = NULL;
+		return (error);
+	}
+	ip->i_din = *lfs_ifind(fs, ino, (struct dinode *)bp->b_data);
+	brelse(bp);
+
+	/*
+	 * Initialize the vnode from the inode, check for aliases.  In all
+	 * cases re-init ip, the underlying vnode/inode may have changed.
+	 */
+	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
+		vput(vp);
+		*vpp = NULL;
+		return (error);
+	}
+	/*
+	 * Finish inode initialization now that aliasing has been resolved.
+	 */
+	ip->i_devvp = ump->um_devvp;
+	VREF(ip->i_devvp);
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is valid
+ * - call lfs_vget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the given client host has export rights and return
+ *   those rights via. exflagsp and credanonp
+ *
+ * XXX
+ * use ifile to see if inode is allocated instead of reading off disk
+ * what is the relationship between my generational number and the NFS
+ * generational number.
+ */
+int
+lfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+	register struct mount *mp;
+	struct fid *fhp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+	register struct ufid *ufhp;
+
+	ufhp = (struct ufid *)fhp;
+	if (ufhp->ufid_ino < ROOTINO)
+		return (ESTALE);
+	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+lfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	register struct inode *ip;
+	register struct ufid *ufhp;
+
+	ip = VTOI(vp);
+	ufhp = (struct ufid *)fhp;
+	ufhp->ufid_len = sizeof(struct ufid);
+	ufhp->ufid_ino = ip->i_number;
+	ufhp->ufid_gen = ip->i_gen;
+	return (0);
+}
diff --git a/sys/ufs/lfs/lfs_vnops.c b/sys/ufs/lfs/lfs_vnops.c
new file mode 100644
index 0000000..fc6bd48
--- /dev/null
+++ b/sys/ufs/lfs/lfs_vnops.c
@@ -0,0 +1,487 @@
+/*
+ * Copyright (c) 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lfs_vnops.c	8.5 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+/* Global vfs data structures for lfs. */
+int (**lfs_vnodeop_p)();
+struct vnodeopv_entry_desc lfs_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, ufs_lookup },		/* lookup */
+	{ &vop_create_desc, ufs_create },		/* create */
+	{ &vop_mknod_desc, ufs_mknod },			/* mknod */
+	{ &vop_open_desc, ufs_open },			/* open */
+	{ &vop_close_desc, lfs_close },			/* close */
+	{ &vop_access_desc, ufs_access },		/* access */
+	{ &vop_getattr_desc, lfs_getattr },		/* getattr */
+	{ &vop_setattr_desc, ufs_setattr },		/* setattr */
+	{ &vop_read_desc, lfs_read },			/* read */
+	{ &vop_write_desc, lfs_write },			/* write */
+	{ &vop_ioctl_desc, ufs_ioctl },			/* ioctl */
+	{ &vop_select_desc, ufs_select },		/* select */
+	{ &vop_mmap_desc, ufs_mmap },			/* mmap */
+	{ &vop_fsync_desc, lfs_fsync },			/* fsync */
+	{ &vop_seek_desc, ufs_seek },			/* seek */
+	{ &vop_remove_desc, ufs_remove },		/* remove */
+	{ &vop_link_desc, ufs_link },			/* link */
+	{ &vop_rename_desc, ufs_rename },		/* rename */
+	{ &vop_mkdir_desc, ufs_mkdir },			/* mkdir */
+	{ &vop_rmdir_desc, ufs_rmdir },			/* rmdir */
+	{ &vop_symlink_desc, ufs_symlink },		/* symlink */
+	{ &vop_readdir_desc, ufs_readdir },		/* readdir */
+	{ &vop_readlink_desc, ufs_readlink },		/* readlink */
+	{ &vop_abortop_desc, ufs_abortop },		/* abortop */
+	{ &vop_inactive_desc, lfs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, ufs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, ufs_lock },			/* lock */
+	{ &vop_unlock_desc, ufs_unlock },		/* unlock */
+	{ &vop_bmap_desc, ufs_bmap },			/* bmap */
+	{ &vop_strategy_desc, ufs_strategy },		/* strategy */
+	{ &vop_print_desc, ufs_print },			/* print */
+	{ &vop_islocked_desc, ufs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, ufs_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, ufs_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, lfs_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, lfs_valloc },		/* valloc */
+	{ &vop_vfree_desc, lfs_vfree },			/* vfree */
+	{ &vop_truncate_desc, lfs_truncate },		/* truncate */
+	{ &vop_update_desc, lfs_update },		/* update */
+	{ &vop_bwrite_desc, lfs_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc lfs_vnodeop_opv_desc =
+	{ &lfs_vnodeop_p, lfs_vnodeop_entries };
+
+int (**lfs_specop_p)();
+struct vnodeopv_entry_desc lfs_specop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, spec_lookup },		/* lookup */
+	{ &vop_create_desc, spec_create },		/* create */
+	{ &vop_mknod_desc, spec_mknod },		/* mknod */
+	{ &vop_open_desc, spec_open },			/* open */
+	{ &vop_close_desc, ufsspec_close },		/* close */
+	{ &vop_access_desc, ufs_access },		/* access */
+	{ &vop_getattr_desc, lfs_getattr },		/* getattr */
+	{ &vop_setattr_desc, ufs_setattr },		/* setattr */
+	{ &vop_read_desc, ufsspec_read },		/* read */
+	{ &vop_write_desc, ufsspec_write },		/* write */
+	{ &vop_ioctl_desc, spec_ioctl },		/* ioctl */
+	{ &vop_select_desc, spec_select },		/* select */
+	{ &vop_mmap_desc, spec_mmap },			/* mmap */
+	{ &vop_fsync_desc, spec_fsync },		/* fsync */
+	{ &vop_seek_desc, spec_seek },			/* seek */
+	{ &vop_remove_desc, spec_remove },		/* remove */
+	{ &vop_link_desc, spec_link },			/* link */
+	{ &vop_rename_desc, spec_rename },		/* rename */
+	{ &vop_mkdir_desc, spec_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, spec_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, spec_symlink },		/* symlink */
+	{ &vop_readdir_desc, spec_readdir },		/* readdir */
+	{ &vop_readlink_desc, spec_readlink },		/* readlink */
+	{ &vop_abortop_desc, spec_abortop },		/* abortop */
+	{ &vop_inactive_desc, lfs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, ufs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, ufs_lock },			/* lock */
+	{ &vop_unlock_desc, ufs_unlock },		/* unlock */
+	{ &vop_bmap_desc, spec_bmap },			/* bmap */
+	{ &vop_strategy_desc, spec_strategy },		/* strategy */
+	{ &vop_print_desc, ufs_print },			/* print */
+	{ &vop_islocked_desc, ufs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, spec_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, spec_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, spec_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, spec_valloc },		/* valloc */
+	{ &vop_vfree_desc, lfs_vfree },			/* vfree */
+	{ &vop_truncate_desc, spec_truncate },		/* truncate */
+	{ &vop_update_desc, lfs_update },		/* update */
+	{ &vop_bwrite_desc, lfs_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc lfs_specop_opv_desc =
+	{ &lfs_specop_p, lfs_specop_entries };
+
+#ifdef FIFO
+int (**lfs_fifoop_p)();
+struct vnodeopv_entry_desc lfs_fifoop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, fifo_lookup },		/* lookup */
+	{ &vop_create_desc, fifo_create },		/* create */
+	{ &vop_mknod_desc, fifo_mknod },		/* mknod */
+	{ &vop_open_desc, fifo_open },			/* open */
+	{ &vop_close_desc, ufsfifo_close },		/* close */
+	{ &vop_access_desc, ufs_access },		/* access */
+	{ &vop_getattr_desc, lfs_getattr },		/* getattr */
+	{ &vop_setattr_desc, ufs_setattr },		/* setattr */
+	{ &vop_read_desc, ufsfifo_read },		/* read */
+	{ &vop_write_desc, ufsfifo_write },		/* write */
+	{ &vop_ioctl_desc, fifo_ioctl },		/* ioctl */
+	{ &vop_select_desc, fifo_select },		/* select */
+	{ &vop_mmap_desc, fifo_mmap },			/* mmap */
+	{ &vop_fsync_desc, fifo_fsync },		/* fsync */
+	{ &vop_seek_desc, fifo_seek },			/* seek */
+	{ &vop_remove_desc, fifo_remove },		/* remove */
+	{ &vop_link_desc, fifo_link },			/* link */
+	{ &vop_rename_desc, fifo_rename },		/* rename */
+	{ &vop_mkdir_desc, fifo_mkdir },		/* mkdir */
+	{ &vop_rmdir_desc, fifo_rmdir },		/* rmdir */
+	{ &vop_symlink_desc, fifo_symlink },		/* symlink */
+	{ &vop_readdir_desc, fifo_readdir },		/* readdir */
+	{ &vop_readlink_desc, fifo_readlink },		/* readlink */
+	{ &vop_abortop_desc, fifo_abortop },		/* abortop */
+	{ &vop_inactive_desc, lfs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, ufs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, ufs_lock },			/* lock */
+	{ &vop_unlock_desc, ufs_unlock },		/* unlock */
+	{ &vop_bmap_desc, fifo_bmap },			/* bmap */
+	{ &vop_strategy_desc, fifo_strategy },		/* strategy */
+	{ &vop_print_desc, ufs_print },			/* print */
+	{ &vop_islocked_desc, ufs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, fifo_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, fifo_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, fifo_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, fifo_valloc },		/* valloc */
+	{ &vop_vfree_desc, lfs_vfree },			/* vfree */
+	{ &vop_truncate_desc, fifo_truncate },		/* truncate */
+	{ &vop_update_desc, lfs_update },		/* update */
+	{ &vop_bwrite_desc, lfs_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc lfs_fifoop_opv_desc =
+	{ &lfs_fifoop_p, lfs_fifoop_entries };
+#endif /* FIFO */
+
+#define	LFS_READWRITE
+#include <ufs/ufs/ufs_readwrite.c>
+#undef	LFS_READWRITE
+
+/*
+ * Synch an open file.
+ */
+/* ARGSUSED */
+lfs_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct timeval tv;
+
+	tv = time;
+	return (VOP_UPDATE(ap->a_vp, &tv, &tv,
+	    ap->a_waitfor == MNT_WAIT ? LFS_SYNC : 0));
+}
+
+/*
+ * These macros are used to bracket UFS directory ops, so that we can
+ * identify all the pages touched during directory ops which need to
+ * be ordered and flushed atomically, so that they may be recovered.
+ */
+#define	SET_DIROP(fs) {							\
+	if ((fs)->lfs_writer)						\
+		tsleep(&(fs)->lfs_dirops, PRIBIO + 1, "lfs_dirop", 0);	\
+	++(fs)->lfs_dirops;						\
+	(fs)->lfs_doifile = 1;						\
+}
+
+#define	SET_ENDOP(fs) {							\
+	--(fs)->lfs_dirops;						\
+	if (!(fs)->lfs_dirops)						\
+		wakeup(&(fs)->lfs_writer);				\
+}
+
+#define	MARK_VNODE(dvp)	(dvp)->v_flag |= VDIROP
+
+int
+lfs_symlink(ap)
+	struct vop_symlink_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+		char *a_target;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+	MARK_VNODE(ap->a_dvp);
+	ret = ufs_symlink(ap);
+	SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_mknod(ap)
+	struct vop_mknod_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+	MARK_VNODE(ap->a_dvp);
+	ret = ufs_mknod(ap);
+	SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_create(ap)
+	struct vop_create_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+	MARK_VNODE(ap->a_dvp);
+	ret = ufs_create(ap);
+	SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_mkdir(ap)
+	struct vop_mkdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+	MARK_VNODE(ap->a_dvp);
+	ret = ufs_mkdir(ap);
+	SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_remove(ap)
+	struct vop_remove_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+	MARK_VNODE(ap->a_dvp);
+	MARK_VNODE(ap->a_vp);
+	ret = ufs_remove(ap);
+	SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_rmdir(ap)
+	struct vop_rmdir_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_dvp)->i_lfs);
+	MARK_VNODE(ap->a_dvp);
+	MARK_VNODE(ap->a_vp);
+	ret = ufs_rmdir(ap);
+	SET_ENDOP(VTOI(ap->a_dvp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_link(ap)
+	struct vop_link_args /* {
+		struct vnode *a_vp;
+		struct vnode *a_tdvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_vp)->i_lfs);
+	MARK_VNODE(ap->a_vp);
+	ret = ufs_link(ap);
+	SET_ENDOP(VTOI(ap->a_vp)->i_lfs);
+	return (ret);
+}
+
+int
+lfs_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	int ret;
+
+	SET_DIROP(VTOI(ap->a_fdvp)->i_lfs);
+	MARK_VNODE(ap->a_fdvp);
+	MARK_VNODE(ap->a_tdvp);
+	ret = ufs_rename(ap);
+	SET_ENDOP(VTOI(ap->a_fdvp)->i_lfs);
+	return (ret);
+}
+/* XXX hack to avoid calling ITIMES in getattr */
+int
+lfs_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	register struct vattr *vap = ap->a_vap;
+	/*
+	 * Copy from inode table
+	 */
+	vap->va_fsid = ip->i_dev;
+	vap->va_fileid = ip->i_number;
+	vap->va_mode = ip->i_mode & ~IFMT;
+	vap->va_nlink = ip->i_nlink;
+	vap->va_uid = ip->i_uid;
+	vap->va_gid = ip->i_gid;
+	vap->va_rdev = (dev_t)ip->i_rdev;
+	vap->va_size = ip->i_din.di_size;
+	vap->va_atime = ip->i_atime;
+	vap->va_mtime = ip->i_mtime;
+	vap->va_ctime = ip->i_ctime;
+	vap->va_flags = ip->i_flags;
+	vap->va_gen = ip->i_gen;
+	/* this doesn't belong here */
+	if (vp->v_type == VBLK)
+		vap->va_blocksize = BLKDEV_IOSIZE;
+	else if (vp->v_type == VCHR)
+		vap->va_blocksize = MAXBSIZE;
+	else
+		vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+	vap->va_bytes = dbtob(ip->i_blocks);
+	vap->va_type = vp->v_type;
+	vap->va_filerev = ip->i_modrev;
+	return (0);
+}
+/*
+ * Close called
+ *
+ * XXX -- we were using ufs_close, but since it updates the
+ * times on the inode, we might need to bump the uinodes
+ * count.
+ */
+/* ARGSUSED */
+int
+lfs_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	int mod;
+
+	if (vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED)) {
+		mod = ip->i_flag & IN_MODIFIED;
+		ITIMES(ip, &time, &time);
+		if (!mod && ip->i_flag & IN_MODIFIED)
+			ip->i_lfs->lfs_uinodes++;
+	}
+	return (0);
+}
+
+/*
+ * Stub inactive routine that avoid calling ufs_inactive in some cases.
+ */
+int lfs_no_inactive = 0;
+
+int
+lfs_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	
+	if (lfs_no_inactive)
+		return (0);
+	return (ufs_inactive(ap));
+}
diff --git a/sys/ufs/mfs/mfs_extern.h b/sys/ufs/mfs/mfs_extern.h
new file mode 100644
index 0000000..e357faf6
--- /dev/null
+++ b/sys/ufs/mfs/mfs_extern.h
@@ -0,0 +1,60 @@
+/*-
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mfs_extern.h	8.1 (Berkeley) 6/11/93
+ */
+
+struct buf;
+struct mount;
+struct nameidata;
+struct proc;
+struct statfs;
+struct ucred;
+struct vnode;
+
+__BEGIN_DECLS
+int	mfs_badop __P((void));
+int	mfs_bmap __P((struct vop_bmap_args *));
+int	mfs_close __P((struct vop_close_args *));
+void	mfs_doio __P((struct buf *bp, caddr_t base));
+int	mfs_inactive __P((struct vop_inactive_args *)); /* XXX */
+int	mfs_reclaim __P((struct vop_reclaim_args *)); /* XXX */
+int	mfs_init __P((void));
+int	mfs_ioctl __P((struct vop_ioctl_args *));
+int	mfs_mount __P((struct mount *mp,
+	    char *path, caddr_t data, struct nameidata *ndp, struct proc *p));
+int	mfs_open __P((struct vop_open_args *));
+int	mfs_print __P((struct vop_print_args *)); /* XXX */
+int	mfs_start __P((struct mount *mp, int flags, struct proc *p));
+int	mfs_statfs __P((struct mount *mp, struct statfs *sbp, struct proc *p));
+int	mfs_strategy __P((struct vop_strategy_args *)); /* XXX */
+__END_DECLS
diff --git a/sys/ufs/mfs/mfs_vfsops.c b/sys/ufs/mfs/mfs_vfsops.c
new file mode 100644
index 0000000..3fcbdf3
--- /dev/null
+++ b/sys/ufs/mfs/mfs_vfsops.c
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 1989, 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mfs_vfsops.c	8.4 (Berkeley) 4/16/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+#include <ufs/mfs/mfsnode.h>
+#include <ufs/mfs/mfs_extern.h>
+
+caddr_t	mfs_rootbase;	/* address of mini-root in kernel virtual memory */
+u_long	mfs_rootsize;	/* size of mini-root in bytes */
+
+static	int mfs_minor;	/* used for building internal dev_t */
+
+extern int (**mfs_vnodeop_p)();
+
+/*
+ * mfs vfs operations.
+ */
+struct vfsops mfs_vfsops = {
+	mfs_mount,
+	mfs_start,
+	ffs_unmount,
+	ufs_root,
+	ufs_quotactl,
+	mfs_statfs,
+	ffs_sync,
+	ffs_vget,
+	ffs_fhtovp,
+	ffs_vptofh,
+	mfs_init,
+};
+
+/*
+ * Called by main() when mfs is going to be mounted as root.
+ *
+ * Name is updated by mount(8) after booting.
+ */
+#define ROOTNAME	"mfs_root"
+
+mfs_mountroot()
+{
+	extern struct vnode *rootvp;
+	register struct fs *fs;
+	register struct mount *mp;
+	struct proc *p = curproc;	/* XXX */
+	struct ufsmount *ump;
+	struct mfsnode *mfsp;
+	u_int size;
+	int error;
+
+	/*
+	 * Get vnodes for swapdev and rootdev.
+	 */
+	if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
+		panic("mfs_mountroot: can't setup bdevvp's");
+
+	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	mp->mnt_op = &mfs_vfsops;
+	mp->mnt_flag = MNT_RDONLY;
+	mfsp = malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
+	rootvp->v_data = mfsp;
+	rootvp->v_op = mfs_vnodeop_p;
+	rootvp->v_tag = VT_MFS;
+	mfsp->mfs_baseoff = mfs_rootbase;
+	mfsp->mfs_size = mfs_rootsize;
+	mfsp->mfs_vnode = rootvp;
+	mfsp->mfs_pid = p->p_pid;
+	mfsp->mfs_buflist = (struct buf *)0;
+	if (error = ffs_mountfs(rootvp, mp, p)) {
+		free(mp, M_MOUNT);
+		free(mfsp, M_MFSNODE);
+		return (error);
+	}
+	if (error = vfs_lock(mp)) {
+		(void)ffs_unmount(mp, 0, p);
+		free(mp, M_MOUNT);
+		free(mfsp, M_MFSNODE);
+		return (error);
+	}
+	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	mp->mnt_flag |= MNT_ROOTFS;
+	mp->mnt_vnodecovered = NULLVP;
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
+	fs->fs_fsmnt[0] = '/';
+	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+	    MNAMELEN);
+	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void)ffs_statfs(mp, &mp->mnt_stat, p);
+	vfs_unlock(mp);
+	inittodr((time_t)0);
+	return (0);
+}
+
+/*
+ * This is called early in boot to set the base address and size
+ * of the mini-root.
+ */
+mfs_initminiroot(base)
+	caddr_t base;
+{
+	struct fs *fs = (struct fs *)(base + SBOFF);
+	extern int (*mountroot)();
+
+	/* check for valid super block */
+	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
+	    fs->fs_bsize < sizeof(struct fs))
+		return (0);
+	mountroot = mfs_mountroot;
+	mfs_rootbase = base;
+	mfs_rootsize = fs->fs_fsize * fs->fs_size;
+	rootdev = makedev(255, mfs_minor++);
+	return (mfs_rootsize);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+/* ARGSUSED */
+int
+mfs_mount(mp, path, data, ndp, p)
+	register struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *devvp;
+	struct mfs_args args;
+	struct ufsmount *ump;
+	register struct fs *fs;
+	register struct mfsnode *mfsp;
+	u_int size;
+	int flags, error;
+
+	if (error = copyin(data, (caddr_t)&args, sizeof (struct mfs_args)))
+		return (error);
+
+	/*
+	 * If updating, check whether changing from read-only to
+	 * read/write; if there is no device name, that's all we do.
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		ump = VFSTOUFS(mp);
+		fs = ump->um_fs;
+		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+			flags = WRITECLOSE;
+			if (mp->mnt_flag & MNT_FORCE)
+				flags |= FORCECLOSE;
+			if (vfs_busy(mp))
+				return (EBUSY);
+			error = ffs_flushfiles(mp, flags, p);
+			vfs_unbusy(mp);
+			if (error)
+				return (error);
+		}
+		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR))
+			fs->fs_ronly = 0;
+#ifdef EXPORTMFS
+		if (args.fspec == 0)
+			return (vfs_export(mp, &ump->um_export, &args.export));
+#endif
+		return (0);
+	}
+	error = getnewvnode(VT_MFS, (struct mount *)0, mfs_vnodeop_p, &devvp);
+	if (error)
+		return (error);
+	devvp->v_type = VBLK;
+	if (checkalias(devvp, makedev(255, mfs_minor++), (struct mount *)0))
+		panic("mfs_mount: dup dev");
+	mfsp = (struct mfsnode *)malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
+	devvp->v_data = mfsp;
+	mfsp->mfs_baseoff = args.base;
+	mfsp->mfs_size = args.size;
+	mfsp->mfs_vnode = devvp;
+	mfsp->mfs_pid = p->p_pid;
+	mfsp->mfs_buflist = (struct buf *)0;
+	if (error = ffs_mountfs(devvp, mp, p)) {
+		mfsp->mfs_buflist = (struct buf *)-1;
+		vrele(devvp);
+		return (error);
+	}
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
+	bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
+	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+		MNAMELEN);
+	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+		&size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) mfs_statfs(mp, &mp->mnt_stat, p);
+	return (0);
+}
+
+int	mfs_pri = PWAIT | PCATCH;		/* XXX prob. temp */
+
+/*
+ * Used to grab the process and keep it in the kernel to service
+ * memory filesystem I/O requests.
+ *
+ * Loop servicing I/O requests.
+ * Copy the requested data into or out of the memory filesystem
+ * address space.
+ */
+/* ARGSUSED */
+int
+mfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	register struct vnode *vp = VFSTOUFS(mp)->um_devvp;
+	register struct mfsnode *mfsp = VTOMFS(vp);
+	register struct buf *bp;
+	register caddr_t base;
+	int error = 0;
+
+	base = mfsp->mfs_baseoff;
+	while (mfsp->mfs_buflist != (struct buf *)(-1)) {
+		while (bp = mfsp->mfs_buflist) {
+			mfsp->mfs_buflist = bp->b_actf;
+			mfs_doio(bp, base);
+			wakeup((caddr_t)bp);
+		}
+		/*
+		 * If a non-ignored signal is received, try to unmount.
+		 * If that fails, clear the signal (it has been "processed"),
+		 * otherwise we will loop here, as tsleep will always return
+		 * EINTR/ERESTART.
+		 */
+		if (error = tsleep((caddr_t)vp, mfs_pri, "mfsidl", 0))
+			if (dounmount(mp, 0, p) != 0)
+				CLRSIG(p, CURSIG(p));
+	}
+	return (error);
+}
+
+/*
+ * Get file system statistics.
+ */
+mfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	int error;
+
+	error = ffs_statfs(mp, sbp, p);
+	sbp->f_type = MOUNT_MFS;
+	return (error);
+}
diff --git a/sys/ufs/mfs/mfs_vnops.c b/sys/ufs/mfs/mfs_vnops.c
new file mode 100644
index 0000000..71adf06
--- /dev/null
+++ b/sys/ufs/mfs/mfs_vnops.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mfs_vnops.c	8.3 (Berkeley) 9/21/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/map.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <machine/vmparam.h>
+
+#include <ufs/mfs/mfsnode.h>
+#include <ufs/mfs/mfsiom.h>
+#include <ufs/mfs/mfs_extern.h>
+
+#if !defined(hp300) && !defined(i386) && !defined(mips) && !defined(sparc) && !defined(luna68k)
+static int mfsmap_want;		/* 1 => need kernel I/O resources */
+struct map mfsmap[MFS_MAPSIZE];
+extern char mfsiobuf[];
+#endif
+
+/*
+ * mfs vnode operations.
+ */
+int (**mfs_vnodeop_p)();
+struct vnodeopv_entry_desc mfs_vnodeop_entries[] = {
+	{ &vop_default_desc, vn_default_error },
+	{ &vop_lookup_desc, mfs_lookup },		/* lookup */
+	{ &vop_create_desc, mfs_create },		/* create */
+	{ &vop_mknod_desc, mfs_mknod },			/* mknod */
+	{ &vop_open_desc, mfs_open },			/* open */
+	{ &vop_close_desc, mfs_close },			/* close */
+	{ &vop_access_desc, mfs_access },		/* access */
+	{ &vop_getattr_desc, mfs_getattr },		/* getattr */
+	{ &vop_setattr_desc, mfs_setattr },		/* setattr */
+	{ &vop_read_desc, mfs_read },			/* read */
+	{ &vop_write_desc, mfs_write },			/* write */
+	{ &vop_ioctl_desc, mfs_ioctl },			/* ioctl */
+	{ &vop_select_desc, mfs_select },		/* select */
+	{ &vop_mmap_desc, mfs_mmap },			/* mmap */
+	{ &vop_fsync_desc, spec_fsync },		/* fsync */
+	{ &vop_seek_desc, mfs_seek },			/* seek */
+	{ &vop_remove_desc, mfs_remove },		/* remove */
+	{ &vop_link_desc, mfs_link },			/* link */
+	{ &vop_rename_desc, mfs_rename },		/* rename */
+	{ &vop_mkdir_desc, mfs_mkdir },			/* mkdir */
+	{ &vop_rmdir_desc, mfs_rmdir },			/* rmdir */
+	{ &vop_symlink_desc, mfs_symlink },		/* symlink */
+	{ &vop_readdir_desc, mfs_readdir },		/* readdir */
+	{ &vop_readlink_desc, mfs_readlink },		/* readlink */
+	{ &vop_abortop_desc, mfs_abortop },		/* abortop */
+	{ &vop_inactive_desc, mfs_inactive },		/* inactive */
+	{ &vop_reclaim_desc, mfs_reclaim },		/* reclaim */
+	{ &vop_lock_desc, mfs_lock },			/* lock */
+	{ &vop_unlock_desc, mfs_unlock },		/* unlock */
+	{ &vop_bmap_desc, mfs_bmap },			/* bmap */
+	{ &vop_strategy_desc, mfs_strategy },		/* strategy */
+	{ &vop_print_desc, mfs_print },			/* print */
+	{ &vop_islocked_desc, mfs_islocked },		/* islocked */
+	{ &vop_pathconf_desc, mfs_pathconf },		/* pathconf */
+	{ &vop_advlock_desc, mfs_advlock },		/* advlock */
+	{ &vop_blkatoff_desc, mfs_blkatoff },		/* blkatoff */
+	{ &vop_valloc_desc, mfs_valloc },		/* valloc */
+	{ &vop_vfree_desc, mfs_vfree },			/* vfree */
+	{ &vop_truncate_desc, mfs_truncate },		/* truncate */
+	{ &vop_update_desc, mfs_update },		/* update */
+	{ &vop_bwrite_desc, mfs_bwrite },		/* bwrite */
+	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc mfs_vnodeop_opv_desc =
+	{ &mfs_vnodeop_p, mfs_vnodeop_entries };
+
+/*
+ * Vnode Operations.
+ *
+ * Open called to allow memory filesystem to initialize and
+ * validate before actual IO. Record our process identifier
+ * so we can tell when we are doing I/O to ourself.
+ */
+/* ARGSUSED */
+int
+mfs_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	if (ap->a_vp->v_type != VBLK) {
+		panic("mfs_ioctl not VBLK");
+		/* NOTREACHED */
+	}
+	return (0);
+}
+
+/*
+ * Ioctl operation.
+ */
+/* ARGSUSED */
+int
+mfs_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (ENOTTY);
+}
+
+/*
+ * Pass I/O requests to the memory filesystem process.
+ */
+int
+mfs_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	register struct buf *bp = ap->a_bp;
+	register struct mfsnode *mfsp;
+	struct vnode *vp;
+	struct proc *p = curproc;		/* XXX */
+
+	if (!vfinddev(bp->b_dev, VBLK, &vp) || vp->v_usecount == 0)
+		panic("mfs_strategy: bad dev");
+	mfsp = VTOMFS(vp);
+	/* check for mini-root access */
+	if (mfsp->mfs_pid == 0) {
+		caddr_t base;
+
+		base = mfsp->mfs_baseoff + (bp->b_blkno << DEV_BSHIFT);
+		if (bp->b_flags & B_READ)
+			bcopy(base, bp->b_data, bp->b_bcount);
+		else
+			bcopy(bp->b_data, base, bp->b_bcount);
+		biodone(bp);
+	} else if (mfsp->mfs_pid == p->p_pid) {
+		mfs_doio(bp, mfsp->mfs_baseoff);
+	} else {
+		bp->b_actf = mfsp->mfs_buflist;
+		mfsp->mfs_buflist = bp;
+		wakeup((caddr_t)vp);
+	}
+	return (0);
+}
+
+#if defined(vax) || defined(tahoe)
+/*
+ * Memory file system I/O.
+ *
+ * Essentially play ubasetup() and disk interrupt service routine by
+ * doing the copies to or from the memfs process. If doing physio
+ * (i.e. pagein), we must map the I/O through the kernel virtual
+ * address space.
+ */
+void
+mfs_doio(bp, base)
+	register struct buf *bp;
+	caddr_t base;
+{
+	register struct pte *pte, *ppte;
+	register caddr_t vaddr;
+	int off, npf, npf2, reg;
+	caddr_t kernaddr, offset;
+
+	/*
+	 * For phys I/O, map the b_data into kernel virtual space using
+	 * the Mfsiomap pte's.
+	 */
+	if ((bp->b_flags & B_PHYS) == 0) {
+		kernaddr = bp->b_data;
+	} else {
+		if (bp->b_flags & (B_PAGET | B_UAREA | B_DIRTY))
+			panic("swap on memfs?");
+		off = (int)bp->b_data & PGOFSET;
+		npf = btoc(bp->b_bcount + off);
+		/*
+		 * Get some mapping page table entries
+		 */
+		while ((reg = rmalloc(mfsmap, (long)npf)) == 0) {
+			mfsmap_want++;
+			sleep((caddr_t)&mfsmap_want, PZERO-1);
+		}
+		reg--;
+		pte = vtopte(bp->b_proc, btop(bp->b_data));
+		/*
+		 * Do vmaccess() but with the Mfsiomap page table.
+		 */
+		ppte = &Mfsiomap[reg];
+		vaddr = &mfsiobuf[reg * NBPG];
+		kernaddr = vaddr + off;
+		for (npf2 = npf; npf2; npf2--) {
+			mapin(ppte, (u_int)vaddr, pte->pg_pfnum,
+				(int)(PG_V|PG_KW));
+#if defined(tahoe)
+			if ((bp->b_flags & B_READ) == 0)
+				mtpr(P1DC, vaddr);
+#endif
+			ppte++;
+			pte++;
+			vaddr += NBPG;
+		}
+	}
+	offset = base + (bp->b_blkno << DEV_BSHIFT);
+	if (bp->b_flags & B_READ)
+		bp->b_error = copyin(offset, kernaddr, bp->b_bcount);
+	else
+		bp->b_error = copyout(kernaddr, offset, bp->b_bcount);
+	if (bp->b_error)
+		bp->b_flags |= B_ERROR;
+	/*
+	 * Release pte's used by physical I/O.
+	 */
+	if (bp->b_flags & B_PHYS) {
+		rmfree(mfsmap, (long)npf, (long)++reg);
+		if (mfsmap_want) {
+			mfsmap_want = 0;
+			wakeup((caddr_t)&mfsmap_want);
+		}
+	}
+	biodone(bp);
+}
+#endif	/* vax || tahoe */
+
+#if defined(hp300) || defined(i386) || defined(mips) || defined(sparc) || defined(luna68k)
+/*
+ * Memory file system I/O.
+ *
+ * Trivial on the HP since buffer has already been mapping into KVA space.
+ */
+void
+mfs_doio(bp, base)
+	register struct buf *bp;
+	caddr_t base;
+{
+
+	base += (bp->b_blkno << DEV_BSHIFT);
+	if (bp->b_flags & B_READ)
+		bp->b_error = copyin(base, bp->b_data, bp->b_bcount);
+	else
+		bp->b_error = copyout(bp->b_data, base, bp->b_bcount);
+	if (bp->b_error)
+		bp->b_flags |= B_ERROR;
+	biodone(bp);
+}
+#endif
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+int
+mfs_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ap->a_vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn;
+	return (0);
+}
+
+/*
+ * Memory filesystem close routine
+ */
+/* ARGSUSED */
+int
+mfs_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct mfsnode *mfsp = VTOMFS(vp);
+	register struct buf *bp;
+	int error;
+
+	/*
+	 * Finish any pending I/O requests.
+	 */
+	while (bp = mfsp->mfs_buflist) {
+		mfsp->mfs_buflist = bp->b_actf;
+		mfs_doio(bp, mfsp->mfs_baseoff);
+		wakeup((caddr_t)bp);
+	}
+	/*
+	 * On last close of a memory filesystem
+	 * we must invalidate any in core blocks, so that
+	 * we can, free up its vnode.
+	 */
+	if (error = vinvalbuf(vp, 1, ap->a_cred, ap->a_p, 0, 0))
+		return (error);
+	/*
+	 * There should be no way to have any more uses of this
+	 * vnode, so if we find any other uses, it is a panic.
+	 */
+	if (vp->v_usecount > 1)
+		printf("mfs_close: ref count %d > 1\n", vp->v_usecount);
+	if (vp->v_usecount > 1 || mfsp->mfs_buflist)
+		panic("mfs_close");
+	/*
+	 * Send a request to the filesystem server to exit.
+	 */
+	mfsp->mfs_buflist = (struct buf *)(-1);
+	wakeup((caddr_t)vp);
+	return (0);
+}
+
+/*
+ * Memory filesystem inactive routine
+ */
+/* ARGSUSED */
+int
+mfs_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct mfsnode *mfsp = VTOMFS(ap->a_vp);
+
+	if (mfsp->mfs_buflist && mfsp->mfs_buflist != (struct buf *)(-1))
+		panic("mfs_inactive: not inactive (mfs_buflist %x)",
+			mfsp->mfs_buflist);
+	return (0);
+}
+
+/*
+ * Reclaim a memory filesystem devvp so that it can be reused.
+ */
+int
+mfs_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	FREE(ap->a_vp->v_data, M_MFSNODE);
+	ap->a_vp->v_data = NULL;
+	return (0);
+}
+
+/*
+ * Print out the contents of an mfsnode.
+ */
+int
+mfs_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct mfsnode *mfsp = VTOMFS(ap->a_vp);
+
+	printf("tag VT_MFS, pid %d, base %d, size %d\n", mfsp->mfs_pid,
+		mfsp->mfs_baseoff, mfsp->mfs_size);
+	return (0);
+}
+
+/*
+ * Block device bad operation
+ */
+int
+mfs_badop()
+{
+
+	panic("mfs_badop called\n");
+	/* NOTREACHED */
+}
+
+/*
+ * Memory based filesystem initialization.
+ */
+mfs_init()
+{
+
+#if !defined(hp300) && !defined(i386) && !defined(mips) && !defined(sparc) && !defined(luna68k)
+	rminit(mfsmap, (long)MFS_MAPREG, (long)1, "mfs mapreg", MFS_MAPSIZE);
+#endif
+}
diff --git a/sys/ufs/mfs/mfsiom.h b/sys/ufs/mfs/mfsiom.h
new file mode 100644
index 0000000..98aca85
--- /dev/null
+++ b/sys/ufs/mfs/mfsiom.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mfsiom.h	8.1 (Berkeley) 6/11/93
+ */
+
+#define MFS_MAPREG	(MAXPHYS/NBPG + 2) /* Kernel mapping pte's */
+#define MFS_MAPSIZE	10		   /* Size of alloc map for pte's */
diff --git a/sys/ufs/mfs/mfsnode.h b/sys/ufs/mfs/mfsnode.h
new file mode 100644
index 0000000..4480ab0
--- /dev/null
+++ b/sys/ufs/mfs/mfsnode.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)mfsnode.h	8.2 (Berkeley) 8/11/93
+ */
+
+/*
+ * This structure defines the control data for the memory based file system.
+ */
+
+struct mfsnode {
+	struct	vnode *mfs_vnode;	/* vnode associated with this mfsnode */
+	caddr_t	mfs_baseoff;		/* base of file system in memory */
+	long	mfs_size;		/* size of memory file system */
+	pid_t	mfs_pid;		/* supporting process pid */
+	struct	buf *mfs_buflist;	/* list of I/O requests */
+	long	mfs_spare[4];
+};
+
+/*
+ * Convert between mfsnode pointers and vnode pointers
+ */
+#define VTOMFS(vp)	((struct mfsnode *)(vp)->v_data)
+#define MFSTOV(mfsp)	((mfsp)->mfs_vnode)
+
+/* Prototypes for MFS operations on vnodes. */
+#define mfs_lookup ((int (*) __P((struct  vop_lookup_args *)))mfs_badop)
+#define mfs_create ((int (*) __P((struct  vop_create_args *)))mfs_badop)
+#define mfs_mknod ((int (*) __P((struct  vop_mknod_args *)))mfs_badop)
+#define mfs_access ((int (*) __P((struct  vop_access_args *)))mfs_badop)
+#define mfs_getattr ((int (*) __P((struct  vop_getattr_args *)))mfs_badop)
+#define mfs_setattr ((int (*) __P((struct  vop_setattr_args *)))mfs_badop)
+#define mfs_read ((int (*) __P((struct  vop_read_args *)))mfs_badop)
+#define mfs_write ((int (*) __P((struct  vop_write_args *)))mfs_badop)
+#define mfs_select ((int (*) __P((struct  vop_select_args *)))mfs_badop)
+#define mfs_mmap ((int (*) __P((struct  vop_mmap_args *)))mfs_badop)
+#define mfs_seek ((int (*) __P((struct  vop_seek_args *)))mfs_badop)
+#define mfs_remove ((int (*) __P((struct  vop_remove_args *)))mfs_badop)
+#define mfs_link ((int (*) __P((struct  vop_link_args *)))mfs_badop)
+#define mfs_rename ((int (*) __P((struct  vop_rename_args *)))mfs_badop)
+#define mfs_mkdir ((int (*) __P((struct  vop_mkdir_args *)))mfs_badop)
+#define mfs_rmdir ((int (*) __P((struct  vop_rmdir_args *)))mfs_badop)
+#define mfs_symlink ((int (*) __P((struct  vop_symlink_args *)))mfs_badop)
+#define mfs_readdir ((int (*) __P((struct  vop_readdir_args *)))mfs_badop)
+#define mfs_readlink ((int (*) __P((struct  vop_readlink_args *)))mfs_badop)
+#define mfs_abortop ((int (*) __P((struct  vop_abortop_args *)))mfs_badop)
+#define mfs_lock ((int (*) __P((struct  vop_lock_args *)))nullop)
+#define mfs_unlock ((int (*) __P((struct  vop_unlock_args *)))nullop)
+#define mfs_islocked ((int (*) __P((struct  vop_islocked_args *)))nullop)
+#define mfs_pathconf ((int (*) __P((struct  vop_pathconf_args *)))mfs_badop)
+#define mfs_advlock ((int (*) __P((struct  vop_advlock_args *)))mfs_badop)
+#define mfs_blkatoff ((int (*) __P((struct  vop_blkatoff_args *)))mfs_badop)
+#define mfs_valloc ((int (*) __P((struct  vop_valloc_args *)))mfs_badop)
+#define mfs_vfree ((int (*) __P((struct  vop_vfree_args *)))mfs_badop)
+#define mfs_truncate ((int (*) __P((struct  vop_truncate_args *)))mfs_badop)
+#define mfs_update ((int (*) __P((struct  vop_update_args *)))mfs_badop)
+#define mfs_bwrite ((int (*) __P((struct  vop_bwrite_args *)))vn_bwrite)
diff --git a/sys/ufs/ufs/dinode.h b/sys/ufs/ufs/dinode.h
new file mode 100644
index 0000000..5b9915d
--- /dev/null
+++ b/sys/ufs/ufs/dinode.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dinode.h	8.3 (Berkeley) 1/21/94
+ */
+
+/*
+ * The root inode is the root of the file system.  Inode 0 can't be used for
+ * normal purposes and historically bad blocks were linked to inode 1, thus
+ * the root inode is 2.  (Inode 1 is no longer used for this purpose, however
+ * numerous dump tapes make this assumption, so we are stuck with it).
+ */
+#define	ROOTINO	((ino_t)2)
+
+/*
+ * A dinode contains all the meta-data associated with a UFS file.
+ * This structure defines the on-disk format of a dinode.
+ */
+
+#define	NDADDR	12			/* Direct addresses in inode. */
+#define	NIADDR	3			/* Indirect addresses in inode. */
+
+struct dinode {
+	u_short		di_mode;	/*   0: IFMT and permissions. */
+	short		di_nlink;	/*   2: File link count. */
+	union {
+		u_short	oldids[2];	/*   4: Ffs: old user and group ids. */
+		ino_t	inumber;	/*   4: Lfs: inode number. */
+	} di_u;
+	u_quad_t	di_size;	/*   8: File byte count. */
+	struct timespec	di_atime;	/*  16: Last access time. */
+	struct timespec	di_mtime;	/*  24: Last modified time. */
+	struct timespec	di_ctime;	/*  32: Last inode change time. */
+	daddr_t		di_db[NDADDR];	/*  40: Direct disk blocks. */
+	daddr_t		di_ib[NIADDR];	/*  88: Indirect disk blocks. */
+	u_long		di_flags;	/* 100: Status flags (chflags). */
+	long		di_blocks;	/* 104: Blocks actually held. */
+	long		di_gen;		/* 108: Generation number. */
+	u_long		di_uid;		/* 112: File owner. */
+	u_long		di_gid;		/* 116: File group. */
+	long		di_spare[2];	/* 120: Reserved; currently unused */
+};
+
+/*
+ * The di_db fields may be overlaid with other information for
+ * file types that do not have associated disk storage. Block
+ * and character devices overlay the first data block with their
+ * dev_t value. Short symbolic links place their path in the
+ * di_db area.
+ */
+#define	di_inumber	di_u.inumber
+#define	di_ogid		di_u.oldids[1]
+#define	di_ouid		di_u.oldids[0]
+#define	di_rdev		di_db[0]
+#define	di_shortlink	di_db
+#define	MAXSYMLINKLEN	((NDADDR + NIADDR) * sizeof(daddr_t))
+
+/* File modes. */
+#define	IEXEC		0000100		/* Executable. */
+#define	IWRITE		0000200		/* Writeable. */
+#define	IREAD		0000400		/* Readable. */
+#define	ISVTX		0001000		/* Sticky bit. */
+#define	ISGID		0002000		/* Set-gid. */
+#define	ISUID		0004000		/* Set-uid. */
+
+/* File types. */
+#define	IFMT		0170000		/* Mask of file type. */
+#define	IFIFO		0010000		/* Named pipe (fifo). */
+#define	IFCHR		0020000		/* Character device. */
+#define	IFDIR		0040000		/* Directory file. */
+#define	IFBLK		0060000		/* Block device. */
+#define	IFREG		0100000		/* Regular file. */
+#define	IFLNK		0120000		/* Symbolic link. */
+#define	IFSOCK		0140000		/* UNIX domain socket. */
diff --git a/sys/ufs/ufs/dir.h b/sys/ufs/ufs/dir.h
new file mode 100644
index 0000000..c51bd1c
--- /dev/null
+++ b/sys/ufs/ufs/dir.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dir.h	8.2 (Berkeley) 1/21/94
+ */
+
+#ifndef _DIR_H_
+#define	_DIR_H_
+
+/*
+ * A directory consists of some number of blocks of DIRBLKSIZ
+ * bytes, where DIRBLKSIZ is chosen such that it can be transferred
+ * to disk in a single atomic operation (e.g. 512 bytes on most machines).
+ *
+ * Each DIRBLKSIZ byte block contains some number of directory entry
+ * structures, which are of variable length.  Each directory entry has
+ * a struct direct at the front of it, containing its inode number,
+ * the length of the entry, and the length of the name contained in
+ * the entry.  These are followed by the name padded to a 4 byte boundary
+ * with null bytes.  All names are guaranteed null terminated.
+ * The maximum length of a name in a directory is MAXNAMLEN.
+ *
+ * The macro DIRSIZ(fmt, dp) gives the amount of space required to represent
+ * a directory entry.  Free space in a directory is represented by
+ * entries which have dp->d_reclen > DIRSIZ(fmt, dp).  All DIRBLKSIZ bytes
+ * in a directory block are claimed by the directory entries.  This
+ * usually results in the last entry in a directory having a large
+ * dp->d_reclen.  When entries are deleted from a directory, the
+ * space is returned to the previous entry in the same directory
+ * block by increasing its dp->d_reclen.  If the first entry of
+ * a directory block is free, then its dp->d_ino is set to 0.
+ * Entries other than the first in a directory do not normally have
+ * dp->d_ino set to 0.
+ */
+#define DIRBLKSIZ	DEV_BSIZE
+#define	MAXNAMLEN	255
+
+struct	direct {
+	u_long	d_ino;			/* inode number of entry */
+	u_short	d_reclen;		/* length of this record */
+	u_char	d_type; 		/* file type, see below */
+	u_char	d_namlen;		/* length of string in d_name */
+	char	d_name[MAXNAMLEN + 1];	/* name with length <= MAXNAMLEN */
+};
+
+/*
+ * File types
+ */
+#define	DT_UNKNOWN	 0
+#define	DT_FIFO		 1
+#define	DT_CHR		 2
+#define	DT_DIR		 4
+#define	DT_BLK		 6
+#define	DT_REG		 8
+#define	DT_LNK		10
+#define	DT_SOCK		12
+
+/*
+ * Convert between stat structure types and directory types.
+ */
+#define	IFTODT(mode)	(((mode) & 0170000) >> 12)
+#define	DTTOIF(dirtype)	((dirtype) << 12)
+
+/*
+ * The DIRSIZ macro gives the minimum record length which will hold
+ * the directory entry.  This requires the amount of space in struct direct
+ * without the d_name field, plus enough space for the name with a terminating
+ * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary.
+ */
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+#define DIRSIZ(oldfmt, dp) \
+    ((oldfmt) ? \
+    ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_type+1 + 3) &~ 3)) : \
+    ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3)))
+#else
+#define DIRSIZ(oldfmt, dp) \
+    ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3))
+#endif
+#define OLDDIRFMT	1
+#define NEWDIRFMT	0
+
+/*
+ * Template for manipulating directories.
+ * Should use struct direct's, but the name field
+ * is MAXNAMLEN - 1, and this just won't do.
+ */
+struct dirtemplate {
+	u_long	dot_ino;
+	short	dot_reclen;
+	u_char	dot_type;
+	u_char	dot_namlen;
+	char	dot_name[4];		/* must be multiple of 4 */
+	u_long	dotdot_ino;
+	short	dotdot_reclen;
+	u_char	dotdot_type;
+	u_char	dotdot_namlen;
+	char	dotdot_name[4];		/* ditto */
+};
+
+/*
+ * This is the old format of directories, sanz type element.
+ */
+struct odirtemplate {
+	u_long	dot_ino;
+	short	dot_reclen;
+	u_short	dot_namlen;
+	char	dot_name[4];		/* must be multiple of 4 */
+	u_long	dotdot_ino;
+	short	dotdot_reclen;
+	u_short	dotdot_namlen;
+	char	dotdot_name[4];		/* ditto */
+};
+#endif /* !_DIR_H_ */
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
new file mode 100644
index 0000000..df15596
--- /dev/null
+++ b/sys/ufs/ufs/inode.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)inode.h	8.4 (Berkeley) 1/21/94
+ */
+
+#include <ufs/ufs/dinode.h>
+
+/*
+ * Theoretically, directories can be more than 2Gb in length, however, in
+ * practice this seems unlikely. So, we define the type doff_t as a long
+ * to keep down the cost of doing lookup on a 32-bit machine. If you are
+ * porting to a 64-bit architecture, you should make doff_t the same as off_t.
+ */
+#define	doff_t	long
+
+/*
+ * The inode is used to describe each active (or recently active)
+ * file in the UFS filesystem. It is composed of two types of
+ * information. The first part is the information that is needed
+ * only while the file is active (such as the identity of the file
+ * and linkage to speed its lookup). The second part is the 
+ * permannent meta-data associated with the file which is read
+ * in from the permanent dinode from long term storage when the
+ * file becomes active, and is put back when the file is no longer
+ * being used.
+ */
+struct inode {
+	struct	inode *i_next;	/* Hash chain forward. */
+	struct	inode **i_prev;	/* Hash chain back. */
+	struct	vnode *i_vnode;	/* Vnode associated with this inode. */
+	struct	vnode *i_devvp;	/* Vnode for block I/O. */
+	u_long	i_flag;		/* I* flags. */
+	dev_t	i_dev;		/* Device associated with the inode. */
+	ino_t	i_number;	/* The identity of the inode. */
+	union {			/* Associated filesystem. */
+		struct	fs *fs;		/* FFS */
+		struct	lfs *lfs;	/* LFS */
+	} inode_u;
+#define	i_fs	inode_u.fs
+#define	i_lfs	inode_u.lfs
+	struct	dquot *i_dquot[MAXQUOTAS];	/* Dquot structures. */
+	u_quad_t i_modrev;	/* Revision level for lease. */
+	struct	lockf *i_lockf;	/* Head of byte-level lock list. */
+	pid_t	i_lockholder;	/* DEBUG: holder of inode lock. */
+	pid_t	i_lockwaiter;	/* DEBUG: latest blocked for inode lock. */
+	/*
+	 * Side effects; used during directory lookup.
+	 */
+	long	i_count;	/* Size of free slot in directory. */
+	doff_t	i_endoff;	/* End of useful stuff in directory. */
+	doff_t	i_diroff;	/* Offset in dir, where we found last entry. */
+	doff_t	i_offset;	/* Offset of free space in directory. */
+	ino_t	i_ino;		/* Inode number of found directory. */
+	u_long	i_reclen;	/* Size of found directory entry. */
+	long	i_spare[11];	/* Spares to round up to 128 bytes. */
+	/*
+	 * The on-disk dinode itself.
+	 */
+	struct	dinode i_din;	/* 128 bytes of the on-disk dinode. */
+};
+
+#define	i_atime		i_din.di_atime
+#define	i_blocks	i_din.di_blocks
+#define	i_ctime		i_din.di_ctime
+#define	i_db		i_din.di_db
+#define	i_flags		i_din.di_flags
+#define	i_gen		i_din.di_gen
+#define	i_gid		i_din.di_gid
+#define	i_ib		i_din.di_ib
+#define	i_mode		i_din.di_mode
+#define	i_mtime		i_din.di_mtime
+#define	i_nlink		i_din.di_nlink
+#define	i_rdev		i_din.di_rdev
+#define	i_shortlink	i_din.di_shortlink
+#define	i_size		i_din.di_size
+#define	i_uid		i_din.di_uid
+
+/* These flags are kept in i_flag. */
+#define	IN_ACCESS	0x0001		/* Access time update request. */
+#define	IN_CHANGE	0x0002		/* Inode change time update request. */
+#define	IN_EXLOCK	0x0004		/* File has exclusive lock. */
+#define	IN_LOCKED	0x0008		/* Inode lock. */
+#define	IN_LWAIT	0x0010		/* Process waiting on file lock. */
+#define	IN_MODIFIED	0x0020		/* Inode has been modified. */
+#define	IN_RENAME	0x0040		/* Inode is being renamed. */
+#define	IN_SHLOCK	0x0080		/* File has shared lock. */
+#define	IN_UPDATE	0x0100		/* Modification time update request. */
+#define	IN_WANTED	0x0200		/* Inode is wanted by a process. */
+
+#ifdef KERNEL
+/*
+ * Structure used to pass around logical block paths generated by
+ * ufs_getlbns and used by truncate and bmap code.
+ */
+struct indir {
+	daddr_t	in_lbn;			/* Logical block number. */
+	int	in_off;			/* Offset in buffer. */
+	int	in_exists;		/* Flag if the block exists. */
+};
+
+/* Convert between inode pointers and vnode pointers. */
+#define VTOI(vp)	((struct inode *)(vp)->v_data)
+#define ITOV(ip)	((ip)->i_vnode)
+
+#define	ITIMES(ip, t1, t2) {						\
+	if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) {	\
+		(ip)->i_flag |= IN_MODIFIED;				\
+		if ((ip)->i_flag & IN_ACCESS)				\
+			(ip)->i_atime.ts_sec = (t1)->tv_sec;		\
+		if ((ip)->i_flag & IN_UPDATE) {				\
+			(ip)->i_mtime.ts_sec = (t2)->tv_sec;		\
+			(ip)->i_modrev++;				\
+		}							\
+		if ((ip)->i_flag & IN_CHANGE)				\
+			(ip)->i_ctime.ts_sec = time.tv_sec;		\
+		(ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);	\
+	}								\
+}
+
+/* This overlays the fid structure (see mount.h). */
+struct ufid {
+	u_short	ufid_len;	/* Length of structure. */
+	u_short	ufid_pad;	/* Force long alignment. */
+	ino_t	ufid_ino;	/* File number (ino). */
+	long	ufid_gen;	/* Generation number. */
+};
+#endif /* KERNEL */
diff --git a/sys/ufs/ufs/lockf.h b/sys/ufs/ufs/lockf.h
new file mode 100644
index 0000000..0ec61db
--- /dev/null
+++ b/sys/ufs/ufs/lockf.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Scooter Morris at Genentech Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)lockf.h	8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * The lockf structure is a kernel structure which contains the information
+ * associated with a byte range lock.  The lockf structures are linked into
+ * the inode structure. Locks are sorted by the starting byte of the lock for
+ * efficiency.
+ */
+struct lockf {
+	short	lf_flags;	 /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */
+	short	lf_type;	 /* Lock type: F_RDLCK, F_WRLCK */
+	off_t	lf_start;	 /* The byte # of the start of the lock */
+	off_t	lf_end;		 /* The byte # of the end of the lock (-1=EOF)*/
+	caddr_t	lf_id;		 /* The id of the resource holding the lock */
+	struct	inode *lf_inode; /* Back pointer to the inode */
+	struct	lockf *lf_next;	 /* A pointer to the next lock on this inode */
+	struct	lockf *lf_block; /* The list of blocked locks */
+};
+
+/* Maximum length of sleep chains to traverse to try and detect deadlock. */
+#define MAXDEPTH 50
+
+__BEGIN_DECLS
+void	 lf_addblock __P((struct lockf *, struct lockf *));
+int	 lf_clearlock __P((struct lockf *));
+int	 lf_findoverlap __P((struct lockf *,
+	    struct lockf *, int, struct lockf ***, struct lockf **));
+struct lockf *
+	 lf_getblock __P((struct lockf *));
+int	 lf_getlock __P((struct lockf *, struct flock *));
+int	 lf_setlock __P((struct lockf *));
+void	 lf_split __P((struct lockf *, struct lockf *));
+void	 lf_wakelock __P((struct lockf *));
+__END_DECLS
+
+#ifdef LOCKF_DEBUG
+extern int lockf_debug;
+
+__BEGIN_DECLS
+void	lf_print __P((char *, struct lockf *));
+void	lf_printlist __P((char *, struct lockf *));
+__END_DECLS
+#endif
diff --git a/sys/ufs/ufs/quota.h b/sys/ufs/ufs/quota.h
new file mode 100644
index 0000000..11efb40
--- /dev/null
+++ b/sys/ufs/ufs/quota.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Robert Elz at The University of Melbourne.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)quota.h	8.1 (Berkeley) 6/11/93
+ */
+
+#ifndef _QUOTA_
+#define _QUOTA_
+
+/*
+ * Definitions for disk quotas imposed on the average user
+ * (big brother finally hits UNIX).
+ *
+ * The following constants define the amount of time given a user before the
+ * soft limits are treated as hard limits (usually resulting in an allocation
+ * failure). The timer is started when the user crosses their soft limit, it
+ * is reset when they go below their soft limit.
+ */
+#define	MAX_IQ_TIME	(7*24*60*60)	/* 1 week */
+#define	MAX_DQ_TIME	(7*24*60*60)	/* 1 week */
+
+/*
+ * The following constants define the usage of the quota file array in the
+ * ufsmount structure and dquot array in the inode structure.  The semantics
+ * of the elements of these arrays are defined in the routine getinoquota;
+ * the remainder of the quota code treats them generically and need not be
+ * inspected when changing the size of the array.
+ */
+#define	MAXQUOTAS	2
+#define	USRQUOTA	0	/* element used for user quotas */
+#define	GRPQUOTA	1	/* element used for group quotas */
+
+/*
+ * Definitions for the default names of the quotas files.
+ */
+#define INITQFNAMES { \
+	"user",		/* USRQUOTA */ \
+	"group",	/* GRPQUOTA */ \
+	"undefined", \
+};
+#define	QUOTAFILENAME	"quota"
+#define	QUOTAGROUP	"operator"
+
+/*
+ * Command definitions for the 'quotactl' system call.  The commands are
+ * broken into a main command defined below and a subcommand that is used
+ * to convey the type of quota that is being manipulated (see above).
+ */
+#define SUBCMDMASK	0x00ff
+#define SUBCMDSHIFT	8
+#define	QCMD(cmd, type)	(((cmd) << SUBCMDSHIFT) | ((type) & SUBCMDMASK))
+
+#define	Q_QUOTAON	0x0100	/* enable quotas */
+#define	Q_QUOTAOFF	0x0200	/* disable quotas */
+#define	Q_GETQUOTA	0x0300	/* get limits and usage */
+#define	Q_SETQUOTA	0x0400	/* set limits and usage */
+#define	Q_SETUSE	0x0500	/* set usage */
+#define	Q_SYNC		0x0600	/* sync disk copy of a filesystems quotas */
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is an array of these structures
+ * indexed by user or group number.  The setquota system call establishes
+ * the vnode for each quota file (a pointer is retained in the ufsmount
+ * structure).
+ */
+struct	dqblk {
+	u_long	dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	u_long	dqb_bsoftlimit;	/* preferred limit on disk blks */
+	u_long	dqb_curblocks;	/* current block count */
+	u_long	dqb_ihardlimit;	/* maximum # allocated inodes + 1 */
+	u_long	dqb_isoftlimit;	/* preferred inode limit */
+	u_long	dqb_curinodes;	/* current # allocated inodes */
+	time_t	dqb_btime;	/* time limit for excessive disk use */
+	time_t	dqb_itime;	/* time limit for excessive files */
+};
+
+/*
+ * The following structure records disk usage for a user or group on a
+ * filesystem. There is one allocated for each quota that exists on any
+ * filesystem for the current user or group. A cache is kept of recently
+ * used entries.
+ */
+struct	dquot {
+	struct	dquot *dq_forw, **dq_back; /* hash list */
+	struct	dquot *dq_freef, **dq_freeb; /* free list */
+	short	dq_flags;		/* flags, see below */
+	short	dq_cnt;			/* count of active references */
+	short	dq_spare;		/* unused spare padding */
+	short	dq_type;		/* quota type of this dquot */
+	u_long	dq_id;			/* identifier this applies to */
+	struct	ufsmount *dq_ump;	/* filesystem that this is taken from */
+	struct	dqblk dq_dqb;		/* actual usage & quotas */
+};
+/*
+ * Flag values.
+ */
+#define	DQ_LOCK		0x01		/* this quota locked (no MODS) */
+#define	DQ_WANT		0x02		/* wakeup on unlock */
+#define	DQ_MOD		0x04		/* this quota modified since read */
+#define	DQ_FAKE		0x08		/* no limits here, just usage */
+#define	DQ_BLKS		0x10		/* has been warned about blk limit */
+#define	DQ_INODS	0x20		/* has been warned about inode limit */
+/*
+ * Shorthand notation.
+ */
+#define	dq_bhardlimit	dq_dqb.dqb_bhardlimit
+#define	dq_bsoftlimit	dq_dqb.dqb_bsoftlimit
+#define	dq_curblocks	dq_dqb.dqb_curblocks
+#define	dq_ihardlimit	dq_dqb.dqb_ihardlimit
+#define	dq_isoftlimit	dq_dqb.dqb_isoftlimit
+#define	dq_curinodes	dq_dqb.dqb_curinodes
+#define	dq_btime	dq_dqb.dqb_btime
+#define	dq_itime	dq_dqb.dqb_itime
+
+/*
+ * If the system has never checked for a quota for this file, then it is set
+ * to NODQUOT.  Once a write attempt is made the inode pointer is set to
+ * reference a dquot structure.
+ */
+#define	NODQUOT		((struct dquot *) 0)
+
+/*
+ * Flags to chkdq() and chkiq()
+ */
+#define	FORCE	0x01	/* force usage changes independent of limits */
+#define	CHOWN	0x02	/* (advisory) change initiated by chown */
+
+/*
+ * Macros to avoid subroutine calls to trivial functions.
+ */
+#ifdef DIAGNOSTIC
+#define	DQREF(dq)	dqref(dq)
+#else
+#define	DQREF(dq)	(dq)->dq_cnt++
+#endif
+
+#include <sys/cdefs.h>
+
+struct dquot;
+struct inode;
+struct mount;
+struct proc;
+struct ucred;
+struct ufsmount;
+struct vnode;
+__BEGIN_DECLS
+int	chkdq __P((struct inode *, long, struct ucred *, int));
+int	chkdqchg __P((struct inode *, long, struct ucred *, int));
+int	chkiq __P((struct inode *, long, struct ucred *, int));
+int	chkiqchg __P((struct inode *, long, struct ucred *, int));
+void	dqflush __P((struct vnode *));
+int	dqget __P((struct vnode *,
+	    u_long, struct ufsmount *, int, struct dquot **));
+void	dqinit __P((void));
+void	dqref __P((struct dquot *));
+void	dqrele __P((struct vnode *, struct dquot *));
+int	dqsync __P((struct vnode *, struct dquot *));
+int	getinoquota __P((struct inode *));
+int	getquota __P((struct mount *, u_long, int, caddr_t));
+int	qsync __P((struct mount *mp));
+int	quotaoff __P((struct proc *, struct mount *, int));
+int	quotaon __P((struct proc *, struct mount *, int, caddr_t));
+int	setquota __P((struct mount *, u_long, int, caddr_t));
+int	setuse __P((struct mount *, u_long, int, caddr_t));
+int	ufs_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *));
+__END_DECLS
+
+#ifdef DIAGNOSTIC
+__BEGIN_DECLS
+void	chkdquot __P((struct inode *));
+__END_DECLS
+#endif
+
+#endif /* _QUOTA_ */
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
new file mode 100644
index 0000000..bcd838d
--- /dev/null
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_bmap.c	8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/trace.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the array of block pointers described by the dinode.
+ */
+int
+ufs_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+	/*
+	 * Check for underlying vnode requests and ensure that logical
+	 * to physical mapping is requested.
+	 */
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
+	if (ap->a_bnp == NULL)
+		return (0);
+
+	return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
+	    ap->a_runp));
+}
+
+/*
+ * Indirect blocks are now on the vnode for the file.  They are given negative
+ * logical block numbers.  Indirect blocks are addressed by the negative
+ * address of the first data block to which they point.  Double indirect blocks
+ * are addressed by one less than the address of the first indirect block to
+ * which they point.  Triple indirect blocks are addressed by one less than
+ * the address of the first double indirect block to which they point.
+ *
+ * ufs_bmaparray does the bmap conversion, and if requested returns the
+ * array of logical blocks which must be traversed to get to a block.
+ * Each entry contains the offset into that block that gets you to the
+ * next block and the disk address of the block (if it is assigned).
+ */
+
+int
+ufs_bmaparray(vp, bn, bnp, ap, nump, runp)
+	struct vnode *vp;
+	register daddr_t bn;
+	daddr_t *bnp;
+	struct indir *ap;
+	int *nump;
+	int *runp;
+{
+	register struct inode *ip;
+	struct buf *bp;
+	struct ufsmount *ump;
+	struct mount *mp;
+	struct vnode *devvp;
+	struct indir a[NIADDR], *xap;
+	daddr_t daddr;
+	long metalbn;
+	int error, maxrun, num;
+
+	ip = VTOI(vp);
+	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+#ifdef DIAGNOSTIC
+	if (ap != NULL && nump == NULL || ap == NULL && nump != NULL)
+		panic("ufs_bmaparray: invalid arguments");
+#endif
+
+	if (runp) {
+		/*
+		 * XXX
+		 * If MAXBSIZE is the largest transfer the disks can handle,
+		 * we probably want maxrun to be 1 block less so that we
+		 * don't create a block larger than the device can handle.
+		 */
+		*runp = 0;
+		maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1;
+	}
+
+	xap = ap == NULL ? a : ap;
+	if (!nump)
+		nump = &num;
+	if (error = ufs_getlbns(vp, bn, xap, nump))
+		return (error);
+
+	num = *nump;
+	if (num == 0) {
+		*bnp = blkptrtodb(ump, ip->i_db[bn]);
+		if (*bnp == 0)
+			*bnp = -1;
+		else if (runp)
+			for (++bn; bn < NDADDR && *runp < maxrun &&
+			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
+			    ++bn, ++*runp);
+		return (0);
+	}
+
+
+	/* Get disk address out of indirect block array */
+	daddr = ip->i_ib[xap->in_off];
+
+	devvp = VFSTOUFS(vp->v_mount)->um_devvp;
+	for (bp = NULL, ++xap; --num; ++xap) {
+		/* 
+		 * Exit the loop if there is no disk address assigned yet and
+		 * the indirect block isn't in the cache, or if we were
+		 * looking for an indirect block and we've found it.
+		 */
+
+		metalbn = xap->in_lbn;
+		if (daddr == 0 && !incore(vp, metalbn) || metalbn == bn)
+			break;
+		/*
+		 * If we get here, we've either got the block in the cache
+		 * or we have a disk address for it, go fetch it.
+		 */
+		if (bp)
+			brelse(bp);
+
+		xap->in_exists = 1;
+		bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
+		if (bp->b_flags & (B_DONE | B_DELWRI)) {
+			trace(TR_BREADHIT, pack(vp, size), metalbn);
+		}
+#ifdef DIAGNOSTIC
+		else if (!daddr)
+			panic("ufs_bmaparry: indirect block not in cache");
+#endif
+		else {
+			trace(TR_BREADMISS, pack(vp, size), metalbn);
+			bp->b_blkno = blkptrtodb(ump, daddr);
+			bp->b_flags |= B_READ;
+			VOP_STRATEGY(bp);
+			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */
+			if (error = biowait(bp)) {
+				brelse(bp);
+				return (error);
+			}
+		}
+
+		daddr = ((daddr_t *)bp->b_data)[xap->in_off];
+		if (num == 1 && daddr && runp)
+			for (bn = xap->in_off + 1;
+			    bn < MNINDIR(ump) && *runp < maxrun &&
+			    is_sequential(ump, ((daddr_t *)bp->b_data)[bn - 1],
+			    ((daddr_t *)bp->b_data)[bn]);
+			    ++bn, ++*runp);
+	}
+	if (bp)
+		brelse(bp);
+
+	daddr = blkptrtodb(ump, daddr);
+	*bnp = daddr == 0 ? -1 : daddr;
+	return (0);
+}
+
+/*
+ * Create an array of logical block number/offset pairs which represent the
+ * path of indirect blocks required to access a data block.  The first "pair"
+ * contains the logical block number of the appropriate single, double or
+ * triple indirect block and the offset into the inode indirect block array.
+ * Note, the logical block number of the inode single/double/triple indirect
+ * block appears twice in the array, once with the offset into the i_ib and
+ * once with the offset into the page itself.
+ */
+int
+ufs_getlbns(vp, bn, ap, nump)
+	struct vnode *vp;
+	register daddr_t bn;
+	struct indir *ap;
+	int *nump;
+{
+	long metalbn, realbn;
+	struct ufsmount *ump;
+	int blockcnt, i, numlevels, off;
+
+	ump = VFSTOUFS(vp->v_mount);
+	if (nump)
+		*nump = 0;
+	numlevels = 0;
+	realbn = bn;
+	if ((long)bn < 0)
+		bn = -(long)bn;
+
+	/* The first NDADDR blocks are direct blocks. */
+	if (bn < NDADDR)
+		return (0);
+
+	/* 
+	 * Determine the number of levels of indirection.  After this loop
+	 * is done, blockcnt indicates the number of data blocks possible
+	 * at the given level of indirection, and NIADDR - i is the number
+	 * of levels of indirection needed to locate the requested block.
+	 */
+	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+		if (i == 0)
+			return (EFBIG);
+		blockcnt *= MNINDIR(ump);
+		if (bn < blockcnt)
+			break;
+	}
+
+	/* Calculate the address of the first meta-block. */
+	if (realbn >= 0)
+		metalbn = -(realbn - bn + NIADDR - i);
+	else
+		metalbn = -(-realbn - bn + NIADDR - i);
+
+	/* 
+	 * At each iteration, off is the offset into the bap array which is
+	 * an array of disk addresses at the current level of indirection.
+	 * The logical block number and the offset in that block are stored
+	 * into the argument array.
+	 */
+	ap->in_lbn = metalbn;
+	ap->in_off = off = NIADDR - i;
+	ap->in_exists = 0;
+	ap++;
+	for (++numlevels; i <= NIADDR; i++) {
+		/* If searching for a meta-data block, quit when found. */
+		if (metalbn == realbn)
+			break;
+
+		blockcnt /= MNINDIR(ump);
+		off = (bn / blockcnt) % MNINDIR(ump);
+
+		++numlevels;
+		ap->in_lbn = metalbn;
+		ap->in_off = off;
+		ap->in_exists = 0;
+		++ap;
+
+		metalbn -= -1 + off * blockcnt;
+	}
+	if (nump)
+		*nump = numlevels;
+	return (0);
+}
diff --git a/sys/ufs/ufs/ufs_disksubr.c b/sys/ufs/ufs/ufs_disksubr.c
new file mode 100644
index 0000000..78dede4
--- /dev/null
+++ b/sys/ufs/ufs/ufs_disksubr.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_disksubr.c	8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/disklabel.h>
+#include <sys/syslog.h>
+
+/*
+ * Seek sort for disks.  We depend on the driver which calls us using b_resid
+ * as the current cylinder number.
+ *
+ * The argument ap structure holds a b_actf activity chain pointer on which we
+ * keep two queues, sorted in ascending cylinder order.  The first queue holds
+ * those requests which are positioned after the current cylinder (in the first
+ * request); the second holds requests which came in after their cylinder number
+ * was passed.  Thus we implement a one way scan, retracting after reaching the
+ * end of the drive to the first request on the second queue, at which time it
+ * becomes the first queue.
+ *
+ * A one-way scan is natural because of the way UNIX read-ahead blocks are
+ * allocated.
+ */
+
+/*
+ * For portability with historic industry practice, the
+ * cylinder number has to be maintained in the `b_resid'
+ * field.
+ */
+#define	b_cylinder	b_resid
+
+void
+disksort(ap, bp)
+	register struct buf *ap, *bp;
+{
+	register struct buf *bq;
+
+	/* If the queue is empty, then it's easy. */
+	if (ap->b_actf == NULL) {
+		bp->b_actf = NULL;
+		ap->b_actf = bp;
+		return;
+	}
+
+	/*
+	 * If we lie after the first (currently active) request, then we
+	 * must locate the second request list and add ourselves to it.
+	 */
+	bq = ap->b_actf;
+	if (bp->b_cylinder < bq->b_cylinder) {
+		while (bq->b_actf) {
+			/*
+			 * Check for an ``inversion'' in the normally ascending
+			 * cylinder numbers, indicating the start of the second
+			 * request list.
+			 */
+			if (bq->b_actf->b_cylinder < bq->b_cylinder) {
+				/*
+				 * Search the second request list for the first
+				 * request at a larger cylinder number.  We go
+				 * before that; if there is no such request, we
+				 * go at end.
+				 */
+				do {
+					if (bp->b_cylinder <
+					    bq->b_actf->b_cylinder)
+						goto insert;
+					if (bp->b_cylinder ==
+					    bq->b_actf->b_cylinder &&
+					    bp->b_blkno < bq->b_actf->b_blkno)
+						goto insert;
+					bq = bq->b_actf;
+				} while (bq->b_actf);
+				goto insert;		/* after last */
+			}
+			bq = bq->b_actf;
+		}
+		/*
+		 * No inversions... we will go after the last, and
+		 * be the first request in the second request list.
+		 */
+		goto insert;
+	}
+	/*
+	 * Request is at/after the current request...
+	 * sort in the first request list.
+	 */
+	while (bq->b_actf) {
+		/*
+		 * We want to go after the current request if there is an
+		 * inversion after it (i.e. it is the end of the first
+		 * request list), or if the next request is a larger cylinder
+		 * than our request.
+		 */
+		if (bq->b_actf->b_cylinder < bq->b_cylinder ||
+		    bp->b_cylinder < bq->b_actf->b_cylinder ||
+		    (bp->b_cylinder == bq->b_actf->b_cylinder &&
+		    bp->b_blkno < bq->b_actf->b_blkno))
+			goto insert;
+		bq = bq->b_actf;
+	}
+	/*
+	 * Neither a second list nor a larger request... we go at the end of
+	 * the first list, which is the same as the end of the whole schebang.
+	 */
+insert:	bp->b_actf = bq->b_actf;
+	bq->b_actf = bp;
+}
+
+/*
+ * Attempt to read a disk label from a device using the indicated stategy
+ * routine.  The label must be partly set up before this: secpercyl and
+ * anything required in the strategy routine (e.g., sector size) must be
+ * filled in before calling us.  Returns NULL on success and an error
+ * string on failure.
+ */
+char *
+readdisklabel(dev, strat, lp)
+	dev_t dev;
+	int (*strat)();
+	register struct disklabel *lp;
+{
+	register struct buf *bp;
+	struct disklabel *dlp;
+	char *msg = NULL;
+
+	if (lp->d_secperunit == 0)
+		lp->d_secperunit = 0x1fffffff;
+	lp->d_npartitions = 1;
+	if (lp->d_partitions[0].p_size == 0)
+		lp->d_partitions[0].p_size = 0x1fffffff;
+	lp->d_partitions[0].p_offset = 0;
+
+	bp = geteblk((int)lp->d_secsize);
+	bp->b_dev = dev;
+	bp->b_blkno = LABELSECTOR;
+	bp->b_bcount = lp->d_secsize;
+	bp->b_flags = B_BUSY | B_READ;
+	bp->b_cylinder = LABELSECTOR / lp->d_secpercyl;
+	(*strat)(bp);
+	if (biowait(bp))
+		msg = "I/O error";
+	else for (dlp = (struct disklabel *)bp->b_data;
+	    dlp <= (struct disklabel *)((char *)bp->b_data +
+	    DEV_BSIZE - sizeof(*dlp));
+	    dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+		if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
+			if (msg == NULL)
+				msg = "no disk label";
+		} else if (dlp->d_npartitions > MAXPARTITIONS ||
+			   dkcksum(dlp) != 0)
+			msg = "disk label corrupted";
+		else {
+			*lp = *dlp;
+			msg = NULL;
+			break;
+		}
+	}
+	bp->b_flags = B_INVAL | B_AGE;
+	brelse(bp);
+	return (msg);
+}
+
+/*
+ * Check new disk label for sensibility before setting it.
+ */
+int
+setdisklabel(olp, nlp, openmask)
+	register struct disklabel *olp, *nlp;
+	u_long openmask;
+{
+	register i;
+	register struct partition *opp, *npp;
+
+	if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
+	    dkcksum(nlp) != 0)
+		return (EINVAL);
+	while ((i = ffs((long)openmask)) != 0) {
+		i--;
+		openmask &= ~(1 << i);
+		if (nlp->d_npartitions <= i)
+			return (EBUSY);
+		opp = &olp->d_partitions[i];
+		npp = &nlp->d_partitions[i];
+		if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
+			return (EBUSY);
+		/*
+		 * Copy internally-set partition information
+		 * if new label doesn't include it.		XXX
+		 */
+		if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
+			npp->p_fstype = opp->p_fstype;
+			npp->p_fsize = opp->p_fsize;
+			npp->p_frag = opp->p_frag;
+			npp->p_cpg = opp->p_cpg;
+		}
+	}
+ 	nlp->d_checksum = 0;
+ 	nlp->d_checksum = dkcksum(nlp);
+	*olp = *nlp;
+	return (0);
+}
+
+/* encoding of disk minor numbers, should be elsewhere... */
+#define dkunit(dev)		(minor(dev) >> 3)
+#define dkpart(dev)		(minor(dev) & 07)
+#define dkminor(unit, part)	(((unit) << 3) | (part))
+
+/*
+ * Write disk label back to device after modification.
+ */
+int
+writedisklabel(dev, strat, lp)
+	dev_t dev;
+	int (*strat)();
+	register struct disklabel *lp;
+{
+	struct buf *bp;
+	struct disklabel *dlp;
+	int labelpart;
+	int error = 0;
+
+	labelpart = dkpart(dev);
+	if (lp->d_partitions[labelpart].p_offset != 0) {
+		if (lp->d_partitions[0].p_offset != 0)
+			return (EXDEV);			/* not quite right */
+		labelpart = 0;
+	}
+	bp = geteblk((int)lp->d_secsize);
+	bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart));
+	bp->b_blkno = LABELSECTOR;
+	bp->b_bcount = lp->d_secsize;
+	bp->b_flags = B_READ;
+	(*strat)(bp);
+	if (error = biowait(bp))
+		goto done;
+	for (dlp = (struct disklabel *)bp->b_data;
+	    dlp <= (struct disklabel *)
+	      ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
+	    dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
+		    dkcksum(dlp) == 0) {
+			*dlp = *lp;
+			bp->b_flags = B_WRITE;
+			(*strat)(bp);
+			error = biowait(bp);
+			goto done;
+		}
+	}
+	error = ESRCH;
+done:
+	brelse(bp);
+	return (error);
+}
+
+/*
+ * Compute checksum for disk label.
+ */
+dkcksum(lp)
+	register struct disklabel *lp;
+{
+	register u_short *start, *end;
+	register u_short sum = 0;
+
+	start = (u_short *)lp;
+	end = (u_short *)&lp->d_partitions[lp->d_npartitions];
+	while (start < end)
+		sum ^= *start++;
+	return (sum);
+}
+
+/*
+ * Disk error is the preface to plaintive error messages
+ * about failing disk transfers.  It prints messages of the form
+
+hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
+
+ * if the offset of the error in the transfer and a disk label
+ * are both available.  blkdone should be -1 if the position of the error
+ * is unknown; the disklabel pointer may be null from drivers that have not
+ * been converted to use them.  The message is printed with printf
+ * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
+ * The message should be completed (with at least a newline) with printf
+ * or addlog, respectively.  There is no trailing space.
+ */
+void
+diskerr(bp, dname, what, pri, blkdone, lp)
+	register struct buf *bp;
+	char *dname, *what;
+	int pri, blkdone;
+	register struct disklabel *lp;
+{
+	int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev);
+	register void (*pr) __P((const char *, ...));
+	char partname = 'a' + part;
+	int sn;
+
+	if (pri != LOG_PRINTF) {
+		log(pri, "");
+		pr = addlog;
+	} else
+		pr = printf;
+	(*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
+	    bp->b_flags & B_READ ? "read" : "writ");
+	sn = bp->b_blkno;
+	if (bp->b_bcount <= DEV_BSIZE)
+		(*pr)("%d", sn);
+	else {
+		if (blkdone >= 0) {
+			sn += blkdone;
+			(*pr)("%d of ", sn);
+		}
+		(*pr)("%d-%d", bp->b_blkno,
+		    bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
+	}
+	if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
+#ifdef tahoe
+		sn *= DEV_BSIZE / lp->d_secsize;		/* XXX */
+#endif
+		sn += lp->d_partitions[part].p_offset;
+		(*pr)(" (%s%d bn %d; cn %d", dname, unit, sn,
+		    sn / lp->d_secpercyl);
+		sn %= lp->d_secpercyl;
+		(*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors);
+	}
+}
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
new file mode 100644
index 0000000..e25923e
--- /dev/null
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -0,0 +1,125 @@
+/*-
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_extern.h	8.3 (Berkeley) 4/16/94
+ */
+
+struct buf;
+struct direct;
+struct disklabel;
+struct fid;
+struct flock;
+struct inode;
+struct mbuf;
+struct mount;
+struct nameidata;
+struct proc;
+struct ucred;
+struct uio;
+struct vattr;
+struct vnode;
+struct ufs_args;
+
+__BEGIN_DECLS
+void	 diskerr
+	    __P((struct buf *, char *, char *, int, int, struct disklabel *));
+void	 disksort __P((struct buf *, struct buf *));
+u_int	 dkcksum __P((struct disklabel *));
+char	*readdisklabel __P((dev_t, int (*)(), struct disklabel *));
+int	 setdisklabel __P((struct disklabel *, struct disklabel *, u_long));
+int	 writedisklabel __P((dev_t, int (*)(), struct disklabel *));
+
+int	 ufs_abortop __P((struct vop_abortop_args *));
+int	 ufs_access __P((struct vop_access_args *));
+int	 ufs_advlock __P((struct vop_advlock_args *));
+int	 ufs_bmap __P((struct vop_bmap_args *));
+int	 ufs_check_export __P((struct mount *, struct ufid *, struct mbuf *,
+		struct vnode **, int *exflagsp, struct ucred **));
+int	 ufs_checkpath __P((struct inode *, struct inode *, struct ucred *));
+int	 ufs_close __P((struct vop_close_args *));
+int	 ufs_create __P((struct vop_create_args *));
+void	 ufs_dirbad __P((struct inode *, doff_t, char *));
+int	 ufs_dirbadentry __P((struct vnode *, struct direct *, int));
+int	 ufs_dirempty __P((struct inode *, ino_t, struct ucred *));
+int	 ufs_direnter __P((struct inode *, struct vnode *,struct componentname *));
+int	 ufs_dirremove __P((struct vnode *, struct componentname*));
+int	 ufs_dirrewrite
+	    __P((struct inode *, struct inode *, struct componentname *));
+int	 ufs_getattr __P((struct vop_getattr_args *));
+int	 ufs_getlbns __P((struct vnode *, daddr_t, struct indir *, int *));
+struct vnode *
+	 ufs_ihashget __P((dev_t, ino_t));
+void	 ufs_ihashinit __P((void));
+void	 ufs_ihashins __P((struct inode *));
+struct vnode *
+	 ufs_ihashlookup __P((dev_t, ino_t));
+void	 ufs_ihashrem __P((struct inode *));
+int	 ufs_inactive __P((struct vop_inactive_args *));
+int	 ufs_init __P((void));
+int	 ufs_ioctl __P((struct vop_ioctl_args *));
+int	 ufs_islocked __P((struct vop_islocked_args *));
+int	 ufs_link __P((struct vop_link_args *));
+int	 ufs_lock __P((struct vop_lock_args *));
+int	 ufs_lookup __P((struct vop_lookup_args *));
+int	 ufs_makeinode __P((int mode, struct vnode *, struct vnode **, struct componentname *));
+int	 ufs_mkdir __P((struct vop_mkdir_args *));
+int	 ufs_mknod __P((struct vop_mknod_args *));
+int	 ufs_mmap __P((struct vop_mmap_args *));
+int	 ufs_open __P((struct vop_open_args *));
+int	 ufs_pathconf __P((struct vop_pathconf_args *));
+int	 ufs_print __P((struct vop_print_args *));
+int	 ufs_readdir __P((struct vop_readdir_args *));
+int	 ufs_readlink __P((struct vop_readlink_args *));
+int	 ufs_reclaim __P((struct vop_reclaim_args *));
+int	 ufs_remove __P((struct vop_remove_args *));
+int	 ufs_rename __P((struct vop_rename_args *));
+int	 ufs_rmdir __P((struct vop_rmdir_args *));
+int	 ufs_root __P((struct mount *, struct vnode **));
+int	 ufs_seek __P((struct vop_seek_args *));
+int	 ufs_select __P((struct vop_select_args *));
+int	 ufs_setattr __P((struct vop_setattr_args *));
+int	 ufs_start __P((struct mount *, int, struct proc *));
+int	 ufs_strategy __P((struct vop_strategy_args *));
+int	 ufs_symlink __P((struct vop_symlink_args *));
+int	 ufs_unlock __P((struct vop_unlock_args *));
+int	 ufs_vinit __P((struct mount *,
+	    int (**)(), int (**)(), struct vnode **));
+int	 ufsspec_close __P((struct vop_close_args *));
+int	 ufsspec_read __P((struct vop_read_args *));
+int	 ufsspec_write __P((struct vop_write_args *));
+
+#ifdef FIFO
+int	ufsfifo_read __P((struct vop_read_args *));
+int	ufsfifo_write __P((struct vop_write_args *));
+int	ufsfifo_close __P((struct vop_close_args *));
+#endif
+__END_DECLS
diff --git a/sys/ufs/ufs/ufs_ihash.c b/sys/ufs/ufs/ufs_ihash.c
new file mode 100644
index 0000000..4a37c90
--- /dev/null
+++ b/sys/ufs/ufs/ufs_ihash.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_ihash.c	8.4 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Structures associated with inode cacheing.
+ */
+struct inode **ihashtbl;
+u_long	ihash;		/* size of hash table - 1 */
+#define	INOHASH(device, inum)	(((device) + (inum)) & ihash)
+
+/*
+ * Initialize inode hash table.
+ */
+void
+ufs_ihashinit()
+{
+
+	ihashtbl = hashinit(desiredvnodes, M_UFSMNT, &ihash);
+}
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, return it, even if it is locked.
+ */
+struct vnode *
+ufs_ihashlookup(device, inum)
+	dev_t device;
+	ino_t inum;
+{
+	register struct inode *ip;
+
+	for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
+		if (ip == NULL)
+			return (NULL);
+		if (inum == ip->i_number && device == ip->i_dev)
+			return (ITOV(ip));
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, but locked, wait for it.
+ */
+struct vnode *
+ufs_ihashget(device, inum)
+	dev_t device;
+	ino_t inum;
+{
+	register struct inode *ip;
+	struct vnode *vp;
+
+	for (;;)
+		for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
+			if (ip == NULL)
+				return (NULL);
+			if (inum == ip->i_number && device == ip->i_dev) {
+				if (ip->i_flag & IN_LOCKED) {
+					ip->i_flag |= IN_WANTED;
+					sleep(ip, PINOD);
+					break;
+				}
+				vp = ITOV(ip);
+				if (!vget(vp, 1))
+					return (vp);
+				break;
+			}
+		}
+	/* NOTREACHED */
+}
+
+/*
+ * Insert the inode into the hash table, and return it locked.
+ */
+void
+ufs_ihashins(ip)
+	struct inode *ip;
+{
+	struct inode **ipp, *iq;
+
+	ipp = &ihashtbl[INOHASH(ip->i_dev, ip->i_number)];
+	if (iq = *ipp)
+		iq->i_prev = &ip->i_next;
+	ip->i_next = iq;
+	ip->i_prev = ipp;
+	*ipp = ip;
+	if (ip->i_flag & IN_LOCKED)
+		panic("ufs_ihashins: already locked");
+	if (curproc)
+		ip->i_lockholder = curproc->p_pid;
+	else
+		ip->i_lockholder = -1;
+	ip->i_flag |= IN_LOCKED;
+}
+
+/*
+ * Remove the inode from the hash table.
+ */
+void
+ufs_ihashrem(ip)
+	register struct inode *ip;
+{
+	register struct inode *iq;
+
+	if (iq = ip->i_next)
+		iq->i_prev = ip->i_prev;
+	*ip->i_prev = iq;
+#ifdef DIAGNOSTIC
+	ip->i_next = NULL;
+	ip->i_prev = NULL;
+#endif
+}
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
new file mode 100644
index 0000000..ac876f9
--- /dev/null
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_inode.c	8.4 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+u_long	nextgennumber;		/* Next generation number to assign. */
+int	prtactive = 0;		/* 1 => print out reclaim of active vnodes */
+
+int
+ufs_init()
+{
+	static int first = 1;
+
+	if (!first)
+		return (0);
+	first = 0;
+
+#ifdef DIAGNOSTIC
+	if ((sizeof(struct inode) - 1) & sizeof(struct inode))
+		printf("ufs_init: bad size %d\n", sizeof(struct inode));
+#endif
+	ufs_ihashinit();
+	dqinit();
+	return (0);
+}
+
+/*
+ * Last reference to an inode.  If necessary, write or delete it.
+ */
+int
+ufs_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	struct timeval tv;
+	int mode, error;
+	extern int prtactive;
+
+	if (prtactive && vp->v_usecount != 0)
+		vprint("ffs_inactive: pushing active", vp);
+
+	/* Get rid of inodes related to stale file handles. */
+	if (ip->i_mode == 0) {
+		if ((vp->v_flag & VXLOCK) == 0)
+			vgone(vp);
+		return (0);
+	}
+
+	error = 0;
+#ifdef DIAGNOSTIC
+	if (VOP_ISLOCKED(vp))
+		panic("ffs_inactive: locked inode");
+	if (curproc)
+		ip->i_lockholder = curproc->p_pid;
+	else
+		ip->i_lockholder = -1;
+#endif
+	ip->i_flag |= IN_LOCKED;
+	if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+#ifdef QUOTA
+		if (!getinoquota(ip))
+			(void)chkiq(ip, -1, NOCRED, 0);
+#endif
+		error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, NULL);
+		ip->i_rdev = 0;
+		mode = ip->i_mode;
+		ip->i_mode = 0;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		VOP_VFREE(vp, ip->i_number, mode);
+	}
+	if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
+		tv = time;
+		VOP_UPDATE(vp, &tv, &tv, 0);
+	}
+	VOP_UNLOCK(vp);
+	/*
+	 * If we are done with the inode, reclaim it
+	 * so that it can be reused immediately.
+	 */
+	if (vp->v_usecount == 0 && ip->i_mode == 0)
+		vgone(vp);
+	return (error);
+}
+
+/*
+ * Reclaim an inode so that it can be used for other purposes.
+ */
+int
+ufs_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip;
+	int i, type;
+
+	if (prtactive && vp->v_usecount != 0)
+		vprint("ufs_reclaim: pushing active", vp);
+	/*
+	 * Remove the inode from its hash chain.
+	 */
+	ip = VTOI(vp);
+	ufs_ihashrem(ip);
+	/*
+	 * Purge old data structures associated with the inode.
+	 */
+	cache_purge(vp);
+	if (ip->i_devvp) {
+		vrele(ip->i_devvp);
+		ip->i_devvp = 0;
+	}
+#ifdef QUOTA
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (ip->i_dquot[i] != NODQUOT) {
+			dqrele(vp, ip->i_dquot[i]);
+			ip->i_dquot[i] = NODQUOT;
+		}
+	}
+#endif
+	switch (vp->v_mount->mnt_stat.f_type) {
+	case MOUNT_UFS:
+		type = M_FFSNODE;
+		break;
+	case MOUNT_MFS:
+		type = M_MFSNODE;
+		break;
+	case MOUNT_LFS:
+		type = M_LFSNODE;
+		break;
+	default:
+		panic("ufs_reclaim: not ufs file");
+	}
+	FREE(vp->v_data, type);
+	vp->v_data = NULL;
+	return (0);
+}
diff --git a/sys/ufs/ufs/ufs_lockf.c b/sys/ufs/ufs/ufs_lockf.c
new file mode 100644
index 0000000..cb9a737
--- /dev/null
+++ b/sys/ufs/ufs/ufs_lockf.c
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Scooter Morris at Genentech Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_lockf.c	8.3 (Berkeley) 1/6/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/fcntl.h>
+
+#include <ufs/ufs/lockf.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * This variable controls the maximum number of processes that will
+ * be checked in doing deadlock detection.
+ */
+int maxlockdepth = MAXDEPTH;
+
+#ifdef LOCKF_DEBUG
+int	lockf_debug = 0;
+#endif
+
+#define NOLOCKF (struct lockf *)0
+#define SELF	0x1
+#define OTHERS	0x2
+
+/*
+ * Set a byte-range lock.
+ */
+int
+lf_setlock(lock)
+	register struct lockf *lock;
+{
+	register struct lockf *block;
+	struct inode *ip = lock->lf_inode;
+	struct lockf **prev, *overlap, *ltmp;
+	static char lockstr[] = "lockf";
+	int ovcase, priority, needtolink, error;
+
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 1)
+		lf_print("lf_setlock", lock);
+#endif /* LOCKF_DEBUG */
+
+	/*
+	 * Set the priority
+	 */
+	priority = PLOCK;
+	if (lock->lf_type == F_WRLCK)
+		priority += 4;
+	priority |= PCATCH;
+	/*
+	 * Scan lock list for this file looking for locks that would block us.
+	 */
+	while (block = lf_getblock(lock)) {
+		/*
+		 * Free the structure and return if nonblocking.
+		 */
+		if ((lock->lf_flags & F_WAIT) == 0) {
+			FREE(lock, M_LOCKF);
+			return (EAGAIN);
+		}
+		/*
+		 * We are blocked. Since flock style locks cover
+		 * the whole file, there is no chance for deadlock.
+		 * For byte-range locks we must check for deadlock.
+		 *
+		 * Deadlock detection is done by looking through the
+		 * wait channels to see if there are any cycles that
+		 * involve us. MAXDEPTH is set just to make sure we
+		 * do not go off into neverland.
+		 */
+		if ((lock->lf_flags & F_POSIX) &&
+		    (block->lf_flags & F_POSIX)) {
+			register struct proc *wproc;
+			register struct lockf *waitblock;
+			int i = 0;
+
+			/* The block is waiting on something */
+			wproc = (struct proc *)block->lf_id;
+			while (wproc->p_wchan &&
+			       (wproc->p_wmesg == lockstr) &&
+			       (i++ < maxlockdepth)) {
+				waitblock = (struct lockf *)wproc->p_wchan;
+				/* Get the owner of the blocking lock */
+				waitblock = waitblock->lf_next;
+				if ((waitblock->lf_flags & F_POSIX) == 0)
+					break;
+				wproc = (struct proc *)waitblock->lf_id;
+				if (wproc == (struct proc *)lock->lf_id) {
+					free(lock, M_LOCKF);
+					return (EDEADLK);
+				}
+			}
+		}
+		/*
+		 * For flock type locks, we must first remove
+		 * any shared locks that we hold before we sleep
+		 * waiting for an exclusive lock.
+		 */
+		if ((lock->lf_flags & F_FLOCK) &&
+		    lock->lf_type == F_WRLCK) {
+			lock->lf_type = F_UNLCK;
+			(void) lf_clearlock(lock);
+			lock->lf_type = F_WRLCK;
+		}
+		/*
+		 * Add our lock to the blocked list and sleep until we're free.
+		 * Remember who blocked us (for deadlock detection).
+		 */
+		lock->lf_next = block;
+		lf_addblock(block, lock);
+#ifdef LOCKF_DEBUG
+		if (lockf_debug & 1) {
+			lf_print("lf_setlock: blocking on", block);
+			lf_printlist("lf_setlock", block);
+		}
+#endif /* LOCKF_DEBUG */
+		if (error = tsleep((caddr_t)lock, priority, lockstr, 0)) {
+			/*
+			 * Delete ourselves from the waiting to lock list.
+			 */
+			for (block = lock->lf_next;
+			     block != NOLOCKF;
+			     block = block->lf_block) {
+				if (block->lf_block != lock)
+					continue;
+				block->lf_block = block->lf_block->lf_block;
+				break;
+			}
+			/*
+			 * If we did not find ourselves on the list, but
+			 * are still linked onto a lock list, then something
+			 * is very wrong.
+			 */
+			if (block == NOLOCKF && lock->lf_next != NOLOCKF)
+				panic("lf_setlock: lost lock");
+			free(lock, M_LOCKF);
+			return (error);
+		}
+	}
+	/*
+	 * No blocks!!  Add the lock.  Note that we will
+	 * downgrade or upgrade any overlapping locks this
+	 * process already owns.
+	 *
+	 * Skip over locks owned by other processes.
+	 * Handle any locks that overlap and are owned by ourselves.
+	 */
+	prev = &ip->i_lockf;
+	block = ip->i_lockf;
+	needtolink = 1;
+	for (;;) {
+		if (ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap))
+			block = overlap->lf_next;
+		/*
+		 * Six cases:
+		 *	0) no overlap
+		 *	1) overlap == lock
+		 *	2) overlap contains lock
+		 *	3) lock contains overlap
+		 *	4) overlap starts before lock
+		 *	5) overlap ends after lock
+		 */
+		switch (ovcase) {
+		case 0: /* no overlap */
+			if (needtolink) {
+				*prev = lock;
+				lock->lf_next = overlap;
+			}
+			break;
+
+		case 1: /* overlap == lock */
+			/*
+			 * If downgrading lock, others may be
+			 * able to acquire it.
+			 */
+			if (lock->lf_type == F_RDLCK &&
+			    overlap->lf_type == F_WRLCK)
+				lf_wakelock(overlap);
+			overlap->lf_type = lock->lf_type;
+			FREE(lock, M_LOCKF);
+			lock = overlap; /* for debug output below */
+			break;
+
+		case 2: /* overlap contains lock */
+			/*
+			 * Check for common starting point and different types.
+			 */
+			if (overlap->lf_type == lock->lf_type) {
+				free(lock, M_LOCKF);
+				lock = overlap; /* for debug output below */
+				break;
+			}
+			if (overlap->lf_start == lock->lf_start) {
+				*prev = lock;
+				lock->lf_next = overlap;
+				overlap->lf_start = lock->lf_end + 1;
+			} else
+				lf_split(overlap, lock);
+			lf_wakelock(overlap);
+			break;
+
+		case 3: /* lock contains overlap */
+			/*
+			 * If downgrading lock, others may be able to
+			 * acquire it, otherwise take the list.
+			 */
+			if (lock->lf_type == F_RDLCK &&
+			    overlap->lf_type == F_WRLCK) {
+				lf_wakelock(overlap);
+			} else {
+				ltmp = lock->lf_block;
+				lock->lf_block = overlap->lf_block;
+				lf_addblock(lock, ltmp);
+			}
+			/*
+			 * Add the new lock if necessary and delete the overlap.
+			 */
+			if (needtolink) {
+				*prev = lock;
+				lock->lf_next = overlap->lf_next;
+				prev = &lock->lf_next;
+				needtolink = 0;
+			} else
+				*prev = overlap->lf_next;
+			free(overlap, M_LOCKF);
+			continue;
+
+		case 4: /* overlap starts before lock */
+			/*
+			 * Add lock after overlap on the list.
+			 */
+			lock->lf_next = overlap->lf_next;
+			overlap->lf_next = lock;
+			overlap->lf_end = lock->lf_start - 1;
+			prev = &lock->lf_next;
+			lf_wakelock(overlap);
+			needtolink = 0;
+			continue;
+
+		case 5: /* overlap ends after lock */
+			/*
+			 * Add the new lock before overlap.
+			 */
+			if (needtolink) {
+				*prev = lock;
+				lock->lf_next = overlap;
+			}
+			overlap->lf_start = lock->lf_end + 1;
+			lf_wakelock(overlap);
+			break;
+		}
+		break;
+	}
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 1) {
+		lf_print("lf_setlock: got the lock", lock);
+		lf_printlist("lf_setlock", lock);
+	}
+#endif /* LOCKF_DEBUG */
+	return (0);
+}
+
+/*
+ * Remove a byte-range lock on an inode.
+ *
+ * Generally, find the lock (or an overlap to that lock)
+ * and remove it (or shrink it), then wakeup anyone we can.
+ */
+int
+lf_clearlock(unlock)
+	register struct lockf *unlock;
+{
+	struct inode *ip = unlock->lf_inode;
+	register struct lockf *lf = ip->i_lockf;
+	struct lockf *overlap, **prev;
+	int ovcase;
+
+	if (lf == NOLOCKF)
+		return (0);
+#ifdef LOCKF_DEBUG
+	if (unlock->lf_type != F_UNLCK)
+		panic("lf_clearlock: bad type");
+	if (lockf_debug & 1)
+		lf_print("lf_clearlock", unlock);
+#endif /* LOCKF_DEBUG */
+	prev = &ip->i_lockf;
+	while (ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) {
+		/*
+		 * Wakeup the list of locks to be retried.
+		 */
+		lf_wakelock(overlap);
+
+		switch (ovcase) {
+
+		case 1: /* overlap == lock */
+			*prev = overlap->lf_next;
+			FREE(overlap, M_LOCKF);
+			break;
+
+		case 2: /* overlap contains lock: split it */
+			if (overlap->lf_start == unlock->lf_start) {
+				overlap->lf_start = unlock->lf_end + 1;
+				break;
+			}
+			lf_split(overlap, unlock);
+			overlap->lf_next = unlock->lf_next;
+			break;
+
+		case 3: /* lock contains overlap */
+			*prev = overlap->lf_next;
+			lf = overlap->lf_next;
+			free(overlap, M_LOCKF);
+			continue;
+
+		case 4: /* overlap starts before lock */
+			overlap->lf_end = unlock->lf_start - 1;
+			prev = &overlap->lf_next;
+			lf = overlap->lf_next;
+			continue;
+
+		case 5: /* overlap ends after lock */
+			overlap->lf_start = unlock->lf_end + 1;
+			break;
+		}
+		break;
+	}
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 1)
+		lf_printlist("lf_clearlock", unlock);
+#endif /* LOCKF_DEBUG */
+	return (0);
+}
+
+/*
+ * Check whether there is a blocking lock,
+ * and if so return its process identifier.
+ */
+int
+lf_getlock(lock, fl)
+	register struct lockf *lock;
+	register struct flock *fl;
+{
+	register struct lockf *block;
+
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 1)
+		lf_print("lf_getlock", lock);
+#endif /* LOCKF_DEBUG */
+
+	if (block = lf_getblock(lock)) {
+		fl->l_type = block->lf_type;
+		fl->l_whence = SEEK_SET;
+		fl->l_start = block->lf_start;
+		if (block->lf_end == -1)
+			fl->l_len = 0;
+		else
+			fl->l_len = block->lf_end - block->lf_start + 1;
+		if (block->lf_flags & F_POSIX)
+			fl->l_pid = ((struct proc *)(block->lf_id))->p_pid;
+		else
+			fl->l_pid = -1;
+	} else {
+		fl->l_type = F_UNLCK;
+	}
+	return (0);
+}
+
+/*
+ * Walk the list of locks for an inode and
+ * return the first blocking lock.
+ */
+struct lockf *
+lf_getblock(lock)
+	register struct lockf *lock;
+{
+	struct lockf **prev, *overlap, *lf = lock->lf_inode->i_lockf;
+	int ovcase;
+
+	prev = &lock->lf_inode->i_lockf;
+	while (ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap)) {
+		/*
+		 * We've found an overlap, see if it blocks us
+		 */
+		if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
+			return (overlap);
+		/*
+		 * Nope, point to the next one on the list and
+		 * see if it blocks us
+		 */
+		lf = overlap->lf_next;
+	}
+	return (NOLOCKF);
+}
+
+/*
+ * Walk the list of locks for an inode to
+ * find an overlapping lock (if any).
+ *
+ * NOTE: this returns only the FIRST overlapping lock.  There
+ *	 may be more than one.
+ */
+int
+lf_findoverlap(lf, lock, type, prev, overlap)
+	register struct lockf *lf;
+	struct lockf *lock;
+	int type;
+	struct lockf ***prev;
+	struct lockf **overlap;
+{
+	off_t start, end;
+
+	*overlap = lf;
+	if (lf == NOLOCKF)
+		return (0);
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 2)
+		lf_print("lf_findoverlap: looking for overlap in", lock);
+#endif /* LOCKF_DEBUG */
+	start = lock->lf_start;
+	end = lock->lf_end;
+	while (lf != NOLOCKF) {
+		if (((type & SELF) && lf->lf_id != lock->lf_id) ||
+		    ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
+			*prev = &lf->lf_next;
+			*overlap = lf = lf->lf_next;
+			continue;
+		}
+#ifdef LOCKF_DEBUG
+		if (lockf_debug & 2)
+			lf_print("\tchecking", lf);
+#endif /* LOCKF_DEBUG */
+		/*
+		 * OK, check for overlap
+		 *
+		 * Six cases:
+		 *	0) no overlap
+		 *	1) overlap == lock
+		 *	2) overlap contains lock
+		 *	3) lock contains overlap
+		 *	4) overlap starts before lock
+		 *	5) overlap ends after lock
+		 */
+		if ((lf->lf_end != -1 && start > lf->lf_end) ||
+		    (end != -1 && lf->lf_start > end)) {
+			/* Case 0 */
+#ifdef LOCKF_DEBUG
+			if (lockf_debug & 2)
+				printf("no overlap\n");
+#endif /* LOCKF_DEBUG */
+			if ((type & SELF) && end != -1 && lf->lf_start > end)
+				return (0);
+			*prev = &lf->lf_next;
+			*overlap = lf = lf->lf_next;
+			continue;
+		}
+		if ((lf->lf_start == start) && (lf->lf_end == end)) {
+			/* Case 1 */
+#ifdef LOCKF_DEBUG
+			if (lockf_debug & 2)
+				printf("overlap == lock\n");
+#endif /* LOCKF_DEBUG */
+			return (1);
+		}
+		if ((lf->lf_start <= start) &&
+		    (end != -1) &&
+		    ((lf->lf_end >= end) || (lf->lf_end == -1))) {
+			/* Case 2 */
+#ifdef LOCKF_DEBUG
+			if (lockf_debug & 2)
+				printf("overlap contains lock\n");
+#endif /* LOCKF_DEBUG */
+			return (2);
+		}
+		if (start <= lf->lf_start &&
+		           (end == -1 ||
+			   (lf->lf_end != -1 && end >= lf->lf_end))) {
+			/* Case 3 */
+#ifdef LOCKF_DEBUG
+			if (lockf_debug & 2)
+				printf("lock contains overlap\n");
+#endif /* LOCKF_DEBUG */
+			return (3);
+		}
+		if ((lf->lf_start < start) &&
+			((lf->lf_end >= start) || (lf->lf_end == -1))) {
+			/* Case 4 */
+#ifdef LOCKF_DEBUG
+			if (lockf_debug & 2)
+				printf("overlap starts before lock\n");
+#endif /* LOCKF_DEBUG */
+			return (4);
+		}
+		if ((lf->lf_start > start) &&
+			(end != -1) &&
+			((lf->lf_end > end) || (lf->lf_end == -1))) {
+			/* Case 5 */
+#ifdef LOCKF_DEBUG
+			if (lockf_debug & 2)
+				printf("overlap ends after lock\n");
+#endif /* LOCKF_DEBUG */
+			return (5);
+		}
+		panic("lf_findoverlap: default");
+	}
+	return (0);
+}
+
+/*
+ * Add a lock to the end of the blocked list.
+ */
+void
+lf_addblock(lock, blocked)
+	struct lockf *lock;
+	struct lockf *blocked;
+{
+	register struct lockf *lf;
+
+	if (blocked == NOLOCKF)
+		return;
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 2) {
+		lf_print("addblock: adding", blocked);
+		lf_print("to blocked list of", lock);
+	}
+#endif /* LOCKF_DEBUG */
+	if ((lf = lock->lf_block) == NOLOCKF) {
+		lock->lf_block = blocked;
+		return;
+	}
+	while (lf->lf_block != NOLOCKF)
+		lf = lf->lf_block;
+	lf->lf_block = blocked;
+	return;
+}
+
+/*
+ * Split a lock and a contained region into
+ * two or three locks as necessary.
+ */
+void
+lf_split(lock1, lock2)
+	register struct lockf *lock1;
+	register struct lockf *lock2;
+{
+	register struct lockf *splitlock;
+
+#ifdef LOCKF_DEBUG
+	if (lockf_debug & 2) {
+		lf_print("lf_split", lock1);
+		lf_print("splitting from", lock2);
+	}
+#endif /* LOCKF_DEBUG */
+	/*
+	 * Check to see if spliting into only two pieces.
+	 */
+	if (lock1->lf_start == lock2->lf_start) {
+		lock1->lf_start = lock2->lf_end + 1;
+		lock2->lf_next = lock1;
+		return;
+	}
+	if (lock1->lf_end == lock2->lf_end) {
+		lock1->lf_end = lock2->lf_start - 1;
+		lock2->lf_next = lock1->lf_next;
+		lock1->lf_next = lock2;
+		return;
+	}
+	/*
+	 * Make a new lock consisting of the last part of
+	 * the encompassing lock
+	 */
+	MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
+	bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock);
+	splitlock->lf_start = lock2->lf_end + 1;
+	splitlock->lf_block = NOLOCKF;
+	lock1->lf_end = lock2->lf_start - 1;
+	/*
+	 * OK, now link it in
+	 */
+	splitlock->lf_next = lock1->lf_next;
+	lock2->lf_next = splitlock;
+	lock1->lf_next = lock2;
+}
+
+/*
+ * Wakeup a blocklist
+ */
+void
+lf_wakelock(listhead)
+	struct lockf *listhead;
+{
+        register struct lockf *blocklist, *wakelock;
+
+	blocklist = listhead->lf_block;
+	listhead->lf_block = NOLOCKF;
+        while (blocklist != NOLOCKF) {
+                wakelock = blocklist;
+                blocklist = blocklist->lf_block;
+		wakelock->lf_block = NOLOCKF;
+		wakelock->lf_next = NOLOCKF;
+#ifdef LOCKF_DEBUG
+		if (lockf_debug & 2)
+			lf_print("lf_wakelock: awakening", wakelock);
+#endif /* LOCKF_DEBUG */
+                wakeup((caddr_t)wakelock);
+        }
+}
+
+#ifdef LOCKF_DEBUG
+/*
+ * Print out a lock.
+ */
+void
+lf_print(tag, lock)
+	char *tag;
+	register struct lockf *lock;
+{
+	
+	printf("%s: lock 0x%lx for ", tag, lock);
+	if (lock->lf_flags & F_POSIX)
+		printf("proc %d", ((struct proc *)(lock->lf_id))->p_pid);
+	else
+		printf("id 0x%x", lock->lf_id);
+	printf(" in ino %d on dev <%d, %d>, %s, start %d, end %d",
+		lock->lf_inode->i_number,
+		major(lock->lf_inode->i_dev),
+		minor(lock->lf_inode->i_dev),
+		lock->lf_type == F_RDLCK ? "shared" :
+		lock->lf_type == F_WRLCK ? "exclusive" :
+		lock->lf_type == F_UNLCK ? "unlock" :
+		"unknown", lock->lf_start, lock->lf_end);
+	if (lock->lf_block)
+		printf(" block 0x%x\n", lock->lf_block);
+	else
+		printf("\n");
+}
+
+void
+lf_printlist(tag, lock)
+	char *tag;
+	struct lockf *lock;
+{
+	register struct lockf *lf;
+
+	printf("%s: Lock list for ino %d on dev <%d, %d>:\n",
+		tag, lock->lf_inode->i_number,
+		major(lock->lf_inode->i_dev),
+		minor(lock->lf_inode->i_dev));
+	for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) {
+		printf("\tlock 0x%lx for ", lf);
+		if (lf->lf_flags & F_POSIX)
+			printf("proc %d", ((struct proc *)(lf->lf_id))->p_pid);
+		else
+			printf("id 0x%x", lf->lf_id);
+		printf(", %s, start %d, end %d",
+			lf->lf_type == F_RDLCK ? "shared" :
+			lf->lf_type == F_WRLCK ? "exclusive" :
+			lf->lf_type == F_UNLCK ? "unlock" :
+			"unknown", lf->lf_start, lf->lf_end);
+		if (lf->lf_block)
+			printf(" block 0x%x\n", lf->lf_block);
+		else
+			printf("\n");
+	}
+}
+#endif /* LOCKF_DEBUG */
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
new file mode 100644
index 0000000..87c6802
--- /dev/null
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -0,0 +1,970 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_lookup.c	8.6 (Berkeley) 4/1/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+struct	nchstats nchstats;
+#ifdef DIAGNOSTIC
+int	dirchk = 1;
+#else
+int	dirchk = 0;
+#endif
+
+#define FSFMT(vp)	((vp)->v_mount->mnt_maxsymlinklen <= 0)
+
+/*
+ * Convert a component of a pathname into a pointer to a locked inode.
+ * This is a very central and rather complicated routine.
+ * If the file system is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
+ * on whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it and the target of the pathname
+ * exists, lookup returns both the target and its parent directory locked.
+ * When creating or renaming and LOCKPARENT is specified, the target may
+ * not be ".".  When deleting and LOCKPARENT is specified, the target may
+ * be "."., but the caller must check to ensure it does an vrele and vput
+ * instead of two vputs.
+ *
+ * Overall outline of ufs_lookup:
+ *
+ *	check accessibility of directory
+ *	look for name in cache, if found, then if at end of path
+ *	  and deleting or creating, drop it, else return name
+ *	search for name in directory, to found or notfound
+ * notfound:
+ *	if creating, return locked directory, leaving info on available slots
+ *	else return error
+ * found:
+ *	if at end of path and deleting, return information to allow delete
+ *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ *	  inode and return info to allow rewrite
+ *	if not at end, add name to cache; if at end and neither creating
+ *	  nor deleting, add name to cache
+ */
+int
+ufs_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vdp;	/* vnode for directory being searched */
+	register struct inode *dp;	/* inode for directory being searched */
+	struct buf *bp;			/* a buffer of directory entries */
+	register struct direct *ep;	/* the current directory entry */
+	int entryoffsetinblock;		/* offset of ep in bp's buffer */
+	enum {NONE, COMPACT, FOUND} slotstatus;
+	doff_t slotoffset;		/* offset of area with free space */
+	int slotsize;			/* size of area at slotoffset */
+	int slotfreespace;		/* amount of space free in slot */
+	int slotneeded;			/* size of the entry we're seeking */
+	int numdirpasses;		/* strategy for directory search */
+	doff_t endsearch;		/* offset to end directory search */
+	doff_t prevoff;			/* prev entry dp->i_offset */
+	struct vnode *pdp;		/* saved dp during symlink work */
+	struct vnode *tdp;		/* returned by VFS_VGET */
+	doff_t enduseful;		/* pointer past last used dir slot */
+	u_long bmask;			/* block offset mask */
+	int lockparent;			/* 1 => lockparent flag is set */
+	int wantparent;			/* 1 => wantparent or lockparent flag */
+	int namlen, error;
+	struct vnode **vpp = ap->a_vpp;
+	struct componentname *cnp = ap->a_cnp;
+	struct ucred *cred = cnp->cn_cred;
+	int flags = cnp->cn_flags;
+	int nameiop = cnp->cn_nameiop;
+
+	bp = NULL;
+	slotoffset = -1;
+	*vpp = NULL;
+	vdp = ap->a_dvp;
+	dp = VTOI(vdp);
+	lockparent = flags & LOCKPARENT;
+	wantparent = flags & (LOCKPARENT|WANTPARENT);
+
+	/*
+	 * Check accessiblity of directory.
+	 */
+	if ((dp->i_mode & IFMT) != IFDIR)
+		return (ENOTDIR);
+	if (error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc))
+		return (error);
+
+	/*
+	 * We now have a segment name to search for, and a directory to search.
+	 *
+	 * Before tediously performing a linear scan of the directory,
+	 * check the name cache to see if the directory/name pair
+	 * we are looking for is known already.
+	 */
+	if (error = cache_lookup(vdp, vpp, cnp)) {
+		int vpid;	/* capability number of vnode */
+
+		if (error == ENOENT)
+			return (error);
+		/*
+		 * Get the next vnode in the path.
+		 * See comment below starting `Step through' for
+		 * an explaination of the locking protocol.
+		 */
+		pdp = vdp;
+		dp = VTOI(*vpp);
+		vdp = *vpp;
+		vpid = vdp->v_id;
+		if (pdp == vdp) {   /* lookup on "." */
+			VREF(vdp);
+			error = 0;
+		} else if (flags & ISDOTDOT) {
+			VOP_UNLOCK(pdp);
+			error = vget(vdp, 1);
+			if (!error && lockparent && (flags & ISLASTCN))
+				error = VOP_LOCK(pdp);
+		} else {
+			error = vget(vdp, 1);
+			if (!lockparent || error || !(flags & ISLASTCN))
+				VOP_UNLOCK(pdp);
+		}
+		/*
+		 * Check that the capability number did not change
+		 * while we were waiting for the lock.
+		 */
+		if (!error) {
+			if (vpid == vdp->v_id)
+				return (0);
+			vput(vdp);
+			if (lockparent && pdp != vdp && (flags & ISLASTCN))
+				VOP_UNLOCK(pdp);
+		}
+		if (error = VOP_LOCK(pdp))
+			return (error);
+		vdp = pdp;
+		dp = VTOI(pdp);
+		*vpp = NULL;
+	}
+
+	/*
+	 * Suppress search for slots unless creating
+	 * file and at end of pathname, in which case
+	 * we watch for a place to put the new file in
+	 * case it doesn't already exist.
+	 */
+	slotstatus = FOUND;
+	slotfreespace = slotsize = slotneeded = 0;
+	if ((nameiop == CREATE || nameiop == RENAME) &&
+	    (flags & ISLASTCN)) {
+		slotstatus = NONE;
+		slotneeded = (sizeof(struct direct) - MAXNAMLEN +
+			cnp->cn_namelen + 3) &~ 3;
+	}
+
+	/*
+	 * If there is cached information on a previous search of
+	 * this directory, pick up where we last left off.
+	 * We cache only lookups as these are the most common
+	 * and have the greatest payoff. Caching CREATE has little
+	 * benefit as it usually must search the entire directory
+	 * to determine that the entry does not exist. Caching the
+	 * location of the last DELETE or RENAME has not reduced
+	 * profiling time and hence has been removed in the interest
+	 * of simplicity.
+	 */
+	bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
+	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
+	    dp->i_diroff > dp->i_size) {
+		entryoffsetinblock = 0;
+		dp->i_offset = 0;
+		numdirpasses = 1;
+	} else {
+		dp->i_offset = dp->i_diroff;
+		if ((entryoffsetinblock = dp->i_offset & bmask) &&
+		    (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)))
+			return (error);
+		numdirpasses = 2;
+		nchstats.ncs_2passes++;
+	}
+	prevoff = dp->i_offset;
+	endsearch = roundup(dp->i_size, DIRBLKSIZ);
+	enduseful = 0;
+
+searchloop:
+	while (dp->i_offset < endsearch) {
+		/*
+		 * If necessary, get the next directory block.
+		 */
+		if ((dp->i_offset & bmask) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			if (error =
+			    VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))
+				return (error);
+			entryoffsetinblock = 0;
+		}
+		/*
+		 * If still looking for a slot, and at a DIRBLKSIZE
+		 * boundary, have to start looking for free space again.
+		 */
+		if (slotstatus == NONE &&
+		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
+			slotoffset = -1;
+			slotfreespace = 0;
+		}
+		/*
+		 * Get pointer to next entry.
+		 * Full validation checks are slow, so we only check
+		 * enough to insure forward progress through the
+		 * directory. Complete checks can be run by patching
+		 * "dirchk" to be true.
+		 */
+		ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock);
+		if (ep->d_reclen == 0 ||
+		    dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock)) {
+			int i;
+
+			ufs_dirbad(dp, dp->i_offset, "mangled entry");
+			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
+			dp->i_offset += i;
+			entryoffsetinblock += i;
+			continue;
+		}
+
+		/*
+		 * If an appropriate sized slot has not yet been found,
+		 * check to see if one is available. Also accumulate space
+		 * in the current block so that we can determine if
+		 * compaction is viable.
+		 */
+		if (slotstatus != FOUND) {
+			int size = ep->d_reclen;
+
+			if (ep->d_ino != 0)
+				size -= DIRSIZ(FSFMT(vdp), ep);
+			if (size > 0) {
+				if (size >= slotneeded) {
+					slotstatus = FOUND;
+					slotoffset = dp->i_offset;
+					slotsize = ep->d_reclen;
+				} else if (slotstatus == NONE) {
+					slotfreespace += size;
+					if (slotoffset == -1)
+						slotoffset = dp->i_offset;
+					if (slotfreespace >= slotneeded) {
+						slotstatus = COMPACT;
+						slotsize = dp->i_offset +
+						      ep->d_reclen - slotoffset;
+					}
+				}
+			}
+		}
+
+		/*
+		 * Check for a name match.
+		 */
+		if (ep->d_ino) {
+#			if (BYTE_ORDER == LITTLE_ENDIAN)
+				if (vdp->v_mount->mnt_maxsymlinklen > 0)
+					namlen = ep->d_namlen;
+				else
+					namlen = ep->d_type;
+#			else
+				namlen = ep->d_namlen;
+#			endif
+			if (namlen == cnp->cn_namelen &&
+			    !bcmp(cnp->cn_nameptr, ep->d_name,
+				(unsigned)namlen)) {
+				/*
+				 * Save directory entry's inode number and
+				 * reclen in ndp->ni_ufs area, and release
+				 * directory buffer.
+				 */
+				dp->i_ino = ep->d_ino;
+				dp->i_reclen = ep->d_reclen;
+				brelse(bp);
+				goto found;
+			}
+		}
+		prevoff = dp->i_offset;
+		dp->i_offset += ep->d_reclen;
+		entryoffsetinblock += ep->d_reclen;
+		if (ep->d_ino)
+			enduseful = dp->i_offset;
+	}
+/* notfound: */
+	/*
+	 * If we started in the middle of the directory and failed
+	 * to find our target, we must check the beginning as well.
+	 */
+	if (numdirpasses == 2) {
+		numdirpasses--;
+		dp->i_offset = 0;
+		endsearch = dp->i_diroff;
+		goto searchloop;
+	}
+	if (bp != NULL)
+		brelse(bp);
+	/*
+	 * If creating, and at end of pathname and current
+	 * directory has not been removed, then can consider
+	 * allowing file to be created.
+	 */
+	if ((nameiop == CREATE || nameiop == RENAME) &&
+	    (flags & ISLASTCN) && dp->i_nlink != 0) {
+		/*
+		 * Access for write is interpreted as allowing
+		 * creation of files in the directory.
+		 */
+		if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
+			return (error);
+		/*
+		 * Return an indication of where the new directory
+		 * entry should be put.  If we didn't find a slot,
+		 * then set dp->i_count to 0 indicating
+		 * that the new slot belongs at the end of the
+		 * directory. If we found a slot, then the new entry
+		 * can be put in the range from dp->i_offset to
+		 * dp->i_offset + dp->i_count.
+		 */
+		if (slotstatus == NONE) {
+			dp->i_offset = roundup(dp->i_size, DIRBLKSIZ);
+			dp->i_count = 0;
+			enduseful = dp->i_offset;
+		} else {
+			dp->i_offset = slotoffset;
+			dp->i_count = slotsize;
+			if (enduseful < slotoffset + slotsize)
+				enduseful = slotoffset + slotsize;
+		}
+		dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
+		dp->i_flag |= IN_CHANGE | IN_UPDATE;
+		/*
+		 * We return with the directory locked, so that
+		 * the parameters we set up above will still be
+		 * valid if we actually decide to do a direnter().
+		 * We return ni_vp == NULL to indicate that the entry
+		 * does not currently exist; we leave a pointer to
+		 * the (locked) directory inode in ndp->ni_dvp.
+		 * The pathname buffer is saved so that the name
+		 * can be obtained later.
+		 *
+		 * NB - if the directory is unlocked, then this
+		 * information cannot be used.
+		 */
+		cnp->cn_flags |= SAVENAME;
+		if (!lockparent)
+			VOP_UNLOCK(vdp);
+		return (EJUSTRETURN);
+	}
+	/*
+	 * Insert name into cache (as non-existent) if appropriate.
+	 */
+	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
+		cache_enter(vdp, *vpp, cnp);
+	return (ENOENT);
+
+found:
+	if (numdirpasses == 2)
+		nchstats.ncs_pass2++;
+	/*
+	 * Check that directory length properly reflects presence
+	 * of this entry.
+	 */
+	if (entryoffsetinblock + DIRSIZ(FSFMT(vdp), ep) > dp->i_size) {
+		ufs_dirbad(dp, dp->i_offset, "i_size too small");
+		dp->i_size = entryoffsetinblock + DIRSIZ(FSFMT(vdp), ep);
+		dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+
+	/*
+	 * Found component in pathname.
+	 * If the final component of path name, save information
+	 * in the cache as to where the entry was found.
+	 */
+	if ((flags & ISLASTCN) && nameiop == LOOKUP)
+		dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
+
+	/*
+	 * If deleting, and at end of pathname, return
+	 * parameters which can be used to remove file.
+	 * If the wantparent flag isn't set, we return only
+	 * the directory (in ndp->ni_dvp), otherwise we go
+	 * on and lock the inode, being careful with ".".
+	 */
+	if (nameiop == DELETE && (flags & ISLASTCN)) {
+		/*
+		 * Write access to directory required to delete files.
+		 */
+		if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
+			return (error);
+		/*
+		 * Return pointer to current entry in dp->i_offset,
+		 * and distance past previous entry (if there
+		 * is a previous entry in this block) in dp->i_count.
+		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
+		 */
+		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
+			dp->i_count = 0;
+		else
+			dp->i_count = dp->i_offset - prevoff;
+		if (dp->i_number == dp->i_ino) {
+			VREF(vdp);
+			*vpp = vdp;
+			return (0);
+		}
+		if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp))
+			return (error);
+		/*
+		 * If directory is "sticky", then user must own
+		 * the directory, or the file in it, else she
+		 * may not delete it (unless she's root). This
+		 * implements append-only directories.
+		 */
+		if ((dp->i_mode & ISVTX) &&
+		    cred->cr_uid != 0 &&
+		    cred->cr_uid != dp->i_uid &&
+		    VTOI(tdp)->i_uid != cred->cr_uid) {
+			vput(tdp);
+			return (EPERM);
+		}
+		*vpp = tdp;
+		if (!lockparent)
+			VOP_UNLOCK(vdp);
+		return (0);
+	}
+
+	/*
+	 * If rewriting (RENAME), return the inode and the
+	 * information required to rewrite the present directory
+	 * Must get inode of directory entry to verify it's a
+	 * regular file, or empty directory.
+	 */
+	if (nameiop == RENAME && wantparent &&
+	    (flags & ISLASTCN)) {
+		if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
+			return (error);
+		/*
+		 * Careful about locking second inode.
+		 * This can only occur if the target is ".".
+		 */
+		if (dp->i_number == dp->i_ino)
+			return (EISDIR);
+		if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp))
+			return (error);
+		*vpp = tdp;
+		cnp->cn_flags |= SAVENAME;
+		if (!lockparent)
+			VOP_UNLOCK(vdp);
+		return (0);
+	}
+
+	/*
+	 * Step through the translation in the name.  We do not `vput' the
+	 * directory because we may need it again if a symbolic link
+	 * is relative to the current directory.  Instead we save it
+	 * unlocked as "pdp".  We must get the target inode before unlocking
+	 * the directory to insure that the inode will not be removed
+	 * before we get it.  We prevent deadlock by always fetching
+	 * inodes from the root, moving down the directory tree. Thus
+	 * when following backward pointers ".." we must unlock the
+	 * parent directory before getting the requested directory.
+	 * There is a potential race condition here if both the current
+	 * and parent directories are removed before the VFS_VGET for the
+	 * inode associated with ".." returns.  We hope that this occurs
+	 * infrequently since we cannot avoid this race condition without
+	 * implementing a sophisticated deadlock detection algorithm.
+	 * Note also that this simple deadlock detection scheme will not
+	 * work if the file system has any hard links other than ".."
+	 * that point backwards in the directory structure.
+	 */
+	pdp = vdp;
+	if (flags & ISDOTDOT) {
+		VOP_UNLOCK(pdp);	/* race to get the inode */
+		if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) {
+			VOP_LOCK(pdp);
+			return (error);
+		}
+		if (lockparent && (flags & ISLASTCN) &&
+		    (error = VOP_LOCK(pdp))) {
+			vput(tdp);
+			return (error);
+		}
+		*vpp = tdp;
+	} else if (dp->i_number == dp->i_ino) {
+		VREF(vdp);	/* we want ourself, ie "." */
+		*vpp = vdp;
+	} else {
+		if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp))
+			return (error);
+		if (!lockparent || !(flags & ISLASTCN))
+			VOP_UNLOCK(pdp);
+		*vpp = tdp;
+	}
+
+	/*
+	 * Insert name into cache if appropriate.
+	 */
+	if (cnp->cn_flags & MAKEENTRY)
+		cache_enter(vdp, *vpp, cnp);
+	return (0);
+}
+
+void
+ufs_dirbad(ip, offset, how)
+	struct inode *ip;
+	doff_t offset;
+	char *how;
+{
+	struct mount *mp;
+
+	mp = ITOV(ip)->v_mount;
+	(void)printf("%s: bad dir ino %d at offset %d: %s\n",
+	    mp->mnt_stat.f_mntonname, ip->i_number, offset, how);
+	if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0)
+		panic("bad dir");
+}
+
+/*
+ * Do consistency checking on a directory entry:
+ *	record length must be multiple of 4
+ *	entry must fit in rest of its DIRBLKSIZ block
+ *	record must be large enough to contain entry
+ *	name is not longer than MAXNAMLEN
+ *	name must be as long as advertised, and null terminated
+ */
+int
+ufs_dirbadentry(dp, ep, entryoffsetinblock)
+	struct vnode *dp;
+	register struct direct *ep;
+	int entryoffsetinblock;
+{
+	register int i;
+	int namlen;
+
+#	if (BYTE_ORDER == LITTLE_ENDIAN)
+		if (dp->v_mount->mnt_maxsymlinklen > 0)
+			namlen = ep->d_namlen;
+		else
+			namlen = ep->d_type;
+#	else
+		namlen = ep->d_namlen;
+#	endif
+	if ((ep->d_reclen & 0x3) != 0 ||
+	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
+	    ep->d_reclen < DIRSIZ(FSFMT(dp), ep) || namlen > MAXNAMLEN) {
+		/*return (1); */
+		printf("First bad\n");
+		goto bad;
+	}
+	for (i = 0; i < namlen; i++)
+		if (ep->d_name[i] == '\0') {
+			/*return (1); */
+			printf("Second bad\n");
+			goto bad;
+	}
+	if (ep->d_name[i])
+		goto bad;
+	return (ep->d_name[i]);
+bad:
+	return(1);
+}
+
+/*
+ * Write a directory entry after a call to namei, using the parameters
+ * that it left in nameidata.  The argument ip is the inode which the new
+ * directory entry will refer to.  Dvp is a pointer to the directory to
+ * be written, which was left locked by namei. Remaining parameters
+ * (dp->i_offset, dp->i_count) indicate how the space for the new
+ * entry is to be obtained.
+ */
+int
+ufs_direnter(ip, dvp, cnp)
+	struct inode *ip;
+	struct vnode *dvp;
+	register struct componentname *cnp;
+{
+	register struct direct *ep, *nep;
+	register struct inode *dp;
+	struct buf *bp;
+	struct direct newdir;
+	struct iovec aiov;
+	struct uio auio;
+	u_int dsize;
+	int error, loc, newentrysize, spacefree;
+	char *dirbuf;
+
+#ifdef DIAGNOSTIC
+	if ((cnp->cn_flags & SAVENAME) == 0)
+		panic("direnter: missing name");
+#endif
+	dp = VTOI(dvp);
+	newdir.d_ino = ip->i_number;
+	newdir.d_namlen = cnp->cn_namelen;
+	bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
+	if (dvp->v_mount->mnt_maxsymlinklen > 0)
+		newdir.d_type = IFTODT(ip->i_mode);
+	else {
+		newdir.d_type = 0;
+#		if (BYTE_ORDER == LITTLE_ENDIAN)
+			{ u_char tmp = newdir.d_namlen;
+			newdir.d_namlen = newdir.d_type;
+			newdir.d_type = tmp; }
+#		endif
+	}
+	newentrysize = DIRSIZ(FSFMT(dvp), &newdir);
+	if (dp->i_count == 0) {
+		/*
+		 * If dp->i_count is 0, then namei could find no
+		 * space in the directory. Here, dp->i_offset will
+		 * be on a directory block boundary and we will write the
+		 * new entry into a fresh block.
+		 */
+		if (dp->i_offset & (DIRBLKSIZ - 1))
+			panic("ufs_direnter: newblk");
+		auio.uio_offset = dp->i_offset;
+		newdir.d_reclen = DIRBLKSIZ;
+		auio.uio_resid = newentrysize;
+		aiov.iov_len = newentrysize;
+		aiov.iov_base = (caddr_t)&newdir;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
+		auio.uio_rw = UIO_WRITE;
+		auio.uio_segflg = UIO_SYSSPACE;
+		auio.uio_procp = (struct proc *)0;
+		error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
+		if (DIRBLKSIZ >
+		    VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
+			/* XXX should grow with balloc() */
+			panic("ufs_direnter: frag size");
+		else if (!error) {
+			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
+			dp->i_flag |= IN_CHANGE;
+		}
+		return (error);
+	}
+
+	/*
+	 * If dp->i_count is non-zero, then namei found space
+	 * for the new entry in the range dp->i_offset to
+	 * dp->i_offset + dp->i_count in the directory.
+	 * To use this space, we may have to compact the entries located
+	 * there, by copying them together towards the beginning of the
+	 * block, leaving the free space in one usable chunk at the end.
+	 */
+
+	/*
+	 * Increase size of directory if entry eats into new space.
+	 * This should never push the size past a new multiple of
+	 * DIRBLKSIZE.
+	 *
+	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
+	 */
+	if (dp->i_offset + dp->i_count > dp->i_size)
+		dp->i_size = dp->i_offset + dp->i_count;
+	/*
+	 * Get the block containing the space for the new directory entry.
+	 */
+	if (error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp))
+		return (error);
+	/*
+	 * Find space for the new entry. In the simple case, the entry at
+	 * offset base will have the space. If it does not, then namei
+	 * arranged that compacting the region dp->i_offset to
+	 * dp->i_offset + dp->i_count would yield the
+	 * space.
+	 */
+	ep = (struct direct *)dirbuf;
+	dsize = DIRSIZ(FSFMT(dvp), ep);
+	spacefree = ep->d_reclen - dsize;
+	for (loc = ep->d_reclen; loc < dp->i_count; ) {
+		nep = (struct direct *)(dirbuf + loc);
+		if (ep->d_ino) {
+			/* trim the existing slot */
+			ep->d_reclen = dsize;
+			ep = (struct direct *)((char *)ep + dsize);
+		} else {
+			/* overwrite; nothing there; header is ours */
+			spacefree += dsize;
+		}
+		dsize = DIRSIZ(FSFMT(dvp), nep);
+		spacefree += nep->d_reclen - dsize;
+		loc += nep->d_reclen;
+		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
+	}
+	/*
+	 * Update the pointer fields in the previous entry (if any),
+	 * copy in the new entry, and write out the block.
+	 */
+	if (ep->d_ino == 0) {
+		if (spacefree + dsize < newentrysize)
+			panic("ufs_direnter: compact1");
+		newdir.d_reclen = spacefree + dsize;
+	} else {
+		if (spacefree < newentrysize)
+			panic("ufs_direnter: compact2");
+		newdir.d_reclen = spacefree;
+		ep->d_reclen = dsize;
+		ep = (struct direct *)((char *)ep + dsize);
+	}
+	bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize);
+	error = VOP_BWRITE(bp);
+	dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
+		error = VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC,
+		    cnp->cn_cred, cnp->cn_proc);
+	return (error);
+}
+
+/*
+ * Remove a directory entry after a call to namei, using
+ * the parameters which it left in nameidata. The entry
+ * dp->i_offset contains the offset into the directory of the
+ * entry to be eliminated.  The dp->i_count field contains the
+ * size of the previous record in the directory.  If this
+ * is 0, the first entry is being deleted, so we need only
+ * zero the inode number to mark the entry as free.  If the
+ * entry is not the first in the directory, we must reclaim
+ * the space of the now empty record by adding the record size
+ * to the size of the previous entry.
+ */
+int
+ufs_dirremove(dvp, cnp)
+	struct vnode *dvp;
+	struct componentname *cnp;
+{
+	register struct inode *dp;
+	struct direct *ep;
+	struct buf *bp;
+	int error;
+
+	dp = VTOI(dvp);
+	if (dp->i_count == 0) {
+		/*
+		 * First entry in block: set d_ino to zero.
+		 */
+		if (error =
+		    VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp))
+			return (error);
+		ep->d_ino = 0;
+		error = VOP_BWRITE(bp);
+		dp->i_flag |= IN_CHANGE | IN_UPDATE;
+		return (error);
+	}
+	/*
+	 * Collapse new free space into previous entry.
+	 */
+	if (error = VOP_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count),
+	    (char **)&ep, &bp))
+		return (error);
+	ep->d_reclen += dp->i_reclen;
+	error = VOP_BWRITE(bp);
+	dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	return (error);
+}
+
+/*
+ * Rewrite an existing directory entry to point at the inode
+ * supplied.  The parameters describing the directory entry are
+ * set up by a call to namei.
+ */
+int
+ufs_dirrewrite(dp, ip, cnp)
+	struct inode *dp, *ip;
+	struct componentname *cnp;
+{
+	struct buf *bp;
+	struct direct *ep;
+	struct vnode *vdp = ITOV(dp);
+	int error;
+
+	if (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp))
+		return (error);
+	ep->d_ino = ip->i_number;
+	if (vdp->v_mount->mnt_maxsymlinklen > 0)
+		ep->d_type = IFTODT(ip->i_mode);
+	error = VOP_BWRITE(bp);
+	dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	return (error);
+}
+
+/*
+ * Check if a directory is empty or not.
+ * Inode supplied must be locked.
+ *
+ * Using a struct dirtemplate here is not precisely
+ * what we want, but better than using a struct direct.
+ *
+ * NB: does not handle corrupted directories.
+ */
+int
+ufs_dirempty(ip, parentino, cred)
+	register struct inode *ip;
+	ino_t parentino;
+	struct ucred *cred;
+{
+	register off_t off;
+	struct dirtemplate dbuf;
+	register struct direct *dp = (struct direct *)&dbuf;
+	int error, count, namlen;
+#define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
+
+	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
+		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
+		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0);
+		/*
+		 * Since we read MINDIRSIZ, residual must
+		 * be 0 unless we're at end of file.
+		 */
+		if (error || count != 0)
+			return (0);
+		/* avoid infinite loops */
+		if (dp->d_reclen == 0)
+			return (0);
+		/* skip empty entries */
+		if (dp->d_ino == 0)
+			continue;
+		/* accept only "." and ".." */
+#		if (BYTE_ORDER == LITTLE_ENDIAN)
+			if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0)
+				namlen = dp->d_namlen;
+			else
+				namlen = dp->d_type;
+#		else
+			namlen = dp->d_namlen;
+#		endif
+		if (namlen > 2)
+			return (0);
+		if (dp->d_name[0] != '.')
+			return (0);
+		/*
+		 * At this point namlen must be 1 or 2.
+		 * 1 implies ".", 2 implies ".." if second
+		 * char is also "."
+		 */
+		if (namlen == 1)
+			continue;
+		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
+			continue;
+		return (0);
+	}
+	return (1);
+}
+
+/*
+ * Check if source directory is in the path of the target directory.
+ * Target is supplied locked, source is unlocked.
+ * The target is always vput before returning.
+ */
+int
+ufs_checkpath(source, target, cred)
+	struct inode *source, *target;
+	struct ucred *cred;
+{
+	struct vnode *vp;
+	int error, rootino, namlen;
+	struct dirtemplate dirbuf;
+
+	vp = ITOV(target);
+	if (target->i_number == source->i_number) {
+		error = EEXIST;
+		goto out;
+	}
+	rootino = ROOTINO;
+	error = 0;
+	if (target->i_number == rootino)
+		goto out;
+
+	for (;;) {
+		if (vp->v_type != VDIR) {
+			error = ENOTDIR;
+			break;
+		}
+		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
+			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
+			IO_NODELOCKED, cred, (int *)0, (struct proc *)0);
+		if (error != 0)
+			break;
+#		if (BYTE_ORDER == LITTLE_ENDIAN)
+			if (vp->v_mount->mnt_maxsymlinklen > 0)
+				namlen = dirbuf.dotdot_namlen;
+			else
+				namlen = dirbuf.dotdot_type;
+#		else
+			namlen = dirbuf.dotdot_namlen;
+#		endif
+		if (namlen != 2 ||
+		    dirbuf.dotdot_name[0] != '.' ||
+		    dirbuf.dotdot_name[1] != '.') {
+			error = ENOTDIR;
+			break;
+		}
+		if (dirbuf.dotdot_ino == source->i_number) {
+			error = EINVAL;
+			break;
+		}
+		if (dirbuf.dotdot_ino == rootino)
+			break;
+		vput(vp);
+		if (error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp)) {
+			vp = NULL;
+			break;
+		}
+	}
+
+out:
+	if (error == ENOTDIR)
+		printf("checkpath: .. not a directory\n");
+	if (vp != NULL)
+		vput(vp);
+	return (error);
+}
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
new file mode 100644
index 0000000..15cb1cf
--- /dev/null
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -0,0 +1,938 @@
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Robert Elz at The University of Melbourne.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_quota.c	8.2 (Berkeley) 12/30/93
+ */
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Quota name to error message mapping.
+ */
+static char *quotatypes[] = INITQFNAMES;
+
+/*
+ * Set up the quotas for an inode.
+ *
+ * This routine completely defines the semantics of quotas.
+ * If other criterion want to be used to establish quotas, the
+ * MAXQUOTAS value in quotas.h should be increased, and the
+ * additional dquots set up here.
+ */
+int
+getinoquota(ip)
+	register struct inode *ip;
+{
+	struct ufsmount *ump;
+	struct vnode *vp = ITOV(ip);
+	int error;
+
+	ump = VFSTOUFS(vp->v_mount);
+	/*
+	 * Set up the user quota based on file uid.
+	 * EINVAL means that quotas are not enabled.
+	 */
+	if (ip->i_dquot[USRQUOTA] == NODQUOT &&
+	    (error =
+		dqget(vp, ip->i_uid, ump, USRQUOTA, &ip->i_dquot[USRQUOTA])) &&
+	    error != EINVAL)
+		return (error);
+	/*
+	 * Set up the group quota based on file gid.
+	 * EINVAL means that quotas are not enabled.
+	 */
+	if (ip->i_dquot[GRPQUOTA] == NODQUOT &&
+	    (error =
+		dqget(vp, ip->i_gid, ump, GRPQUOTA, &ip->i_dquot[GRPQUOTA])) &&
+	    error != EINVAL)
+		return (error);
+	return (0);
+}
+
+/*
+ * Update disk usage, and take corrective action.
+ */
+int
+chkdq(ip, change, cred, flags)
+	register struct inode *ip;
+	long change;
+	struct ucred *cred;
+	int flags;
+{
+	register struct dquot *dq;
+	register int i;
+	int ncurblocks, error;
+
+#ifdef DIAGNOSTIC
+	if ((flags & CHOWN) == 0)
+		chkdquot(ip);
+#endif
+	if (change == 0)
+		return (0);
+	if (change < 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = ip->i_dquot[i]) == NODQUOT)
+				continue;
+			while (dq->dq_flags & DQ_LOCK) {
+				dq->dq_flags |= DQ_WANT;
+				sleep((caddr_t)dq, PINOD+1);
+			}
+			ncurblocks = dq->dq_curblocks + change;
+			if (ncurblocks >= 0)
+				dq->dq_curblocks = ncurblocks;
+			else
+				dq->dq_curblocks = 0;
+			dq->dq_flags &= ~DQ_BLKS;
+			dq->dq_flags |= DQ_MOD;
+		}
+		return (0);
+	}
+	if ((flags & FORCE) == 0 && cred->cr_uid != 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = ip->i_dquot[i]) == NODQUOT)
+				continue;
+			if (error = chkdqchg(ip, change, cred, i))
+				return (error);
+		}
+	}
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if ((dq = ip->i_dquot[i]) == NODQUOT)
+			continue;
+		while (dq->dq_flags & DQ_LOCK) {
+			dq->dq_flags |= DQ_WANT;
+			sleep((caddr_t)dq, PINOD+1);
+		}
+		dq->dq_curblocks += change;
+		dq->dq_flags |= DQ_MOD;
+	}
+	return (0);
+}
+
+/*
+ * Check for a valid change to a users allocation.
+ * Issue an error message if appropriate.
+ */
+int
+chkdqchg(ip, change, cred, type)
+	struct inode *ip;
+	long change;
+	struct ucred *cred;
+	int type;
+{
+	register struct dquot *dq = ip->i_dquot[type];
+	long ncurblocks = dq->dq_curblocks + change;
+
+	/*
+	 * If user would exceed their hard limit, disallow space allocation.
+	 */
+	if (ncurblocks >= dq->dq_bhardlimit && dq->dq_bhardlimit) {
+		if ((dq->dq_flags & DQ_BLKS) == 0 &&
+		    ip->i_uid == cred->cr_uid) {
+			uprintf("\n%s: write failed, %s disk limit reached\n",
+			    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+			    quotatypes[type]);
+			dq->dq_flags |= DQ_BLKS;
+		}
+		return (EDQUOT);
+	}
+	/*
+	 * If user is over their soft limit for too long, disallow space
+	 * allocation. Reset time limit as they cross their soft limit.
+	 */
+	if (ncurblocks >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) {
+		if (dq->dq_curblocks < dq->dq_bsoftlimit) {
+			dq->dq_btime = time.tv_sec +
+			    VFSTOUFS(ITOV(ip)->v_mount)->um_btime[type];
+			if (ip->i_uid == cred->cr_uid)
+				uprintf("\n%s: warning, %s %s\n",
+				    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+				    quotatypes[type], "disk quota exceeded");
+			return (0);
+		}
+		if (time.tv_sec > dq->dq_btime) {
+			if ((dq->dq_flags & DQ_BLKS) == 0 &&
+			    ip->i_uid == cred->cr_uid) {
+				uprintf("\n%s: write failed, %s %s\n",
+				    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+				    quotatypes[type],
+				    "disk quota exceeded for too long");
+				dq->dq_flags |= DQ_BLKS;
+			}
+			return (EDQUOT);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Check the inode limit, applying corrective action.
+ */
+int
+chkiq(ip, change, cred, flags)
+	register struct inode *ip;
+	long change;
+	struct ucred *cred;
+	int flags;
+{
+	register struct dquot *dq;
+	register int i;
+	int ncurinodes, error;
+
+#ifdef DIAGNOSTIC
+	if ((flags & CHOWN) == 0)
+		chkdquot(ip);
+#endif
+	if (change == 0)
+		return (0);
+	if (change < 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = ip->i_dquot[i]) == NODQUOT)
+				continue;
+			while (dq->dq_flags & DQ_LOCK) {
+				dq->dq_flags |= DQ_WANT;
+				sleep((caddr_t)dq, PINOD+1);
+			}
+			ncurinodes = dq->dq_curinodes + change;
+			if (ncurinodes >= 0)
+				dq->dq_curinodes = ncurinodes;
+			else
+				dq->dq_curinodes = 0;
+			dq->dq_flags &= ~DQ_INODS;
+			dq->dq_flags |= DQ_MOD;
+		}
+		return (0);
+	}
+	if ((flags & FORCE) == 0 && cred->cr_uid != 0) {
+		for (i = 0; i < MAXQUOTAS; i++) {
+			if ((dq = ip->i_dquot[i]) == NODQUOT)
+				continue;
+			if (error = chkiqchg(ip, change, cred, i))
+				return (error);
+		}
+	}
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if ((dq = ip->i_dquot[i]) == NODQUOT)
+			continue;
+		while (dq->dq_flags & DQ_LOCK) {
+			dq->dq_flags |= DQ_WANT;
+			sleep((caddr_t)dq, PINOD+1);
+		}
+		dq->dq_curinodes += change;
+		dq->dq_flags |= DQ_MOD;
+	}
+	return (0);
+}
+
+/*
+ * Check for a valid change to a users allocation.
+ * Issue an error message if appropriate.
+ */
+int
+chkiqchg(ip, change, cred, type)
+	struct inode *ip;
+	long change;
+	struct ucred *cred;
+	int type;
+{
+	register struct dquot *dq = ip->i_dquot[type];
+	long ncurinodes = dq->dq_curinodes + change;
+
+	/*
+	 * If user would exceed their hard limit, disallow inode allocation.
+	 */
+	if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) {
+		if ((dq->dq_flags & DQ_INODS) == 0 &&
+		    ip->i_uid == cred->cr_uid) {
+			uprintf("\n%s: write failed, %s inode limit reached\n",
+			    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+			    quotatypes[type]);
+			dq->dq_flags |= DQ_INODS;
+		}
+		return (EDQUOT);
+	}
+	/*
+	 * If user is over their soft limit for too long, disallow inode
+	 * allocation. Reset time limit as they cross their soft limit.
+	 */
+	if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) {
+		if (dq->dq_curinodes < dq->dq_isoftlimit) {
+			dq->dq_itime = time.tv_sec +
+			    VFSTOUFS(ITOV(ip)->v_mount)->um_itime[type];
+			if (ip->i_uid == cred->cr_uid)
+				uprintf("\n%s: warning, %s %s\n",
+				    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+				    quotatypes[type], "inode quota exceeded");
+			return (0);
+		}
+		if (time.tv_sec > dq->dq_itime) {
+			if ((dq->dq_flags & DQ_INODS) == 0 &&
+			    ip->i_uid == cred->cr_uid) {
+				uprintf("\n%s: write failed, %s %s\n",
+				    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
+				    quotatypes[type],
+				    "inode quota exceeded for too long");
+				dq->dq_flags |= DQ_INODS;
+			}
+			return (EDQUOT);
+		}
+	}
+	return (0);
+}
+
+#ifdef DIAGNOSTIC
+/*
+ * On filesystems with quotas enabled, it is an error for a file to change
+ * size and not to have a dquot structure associated with it.
+ */
+void
+chkdquot(ip)
+	register struct inode *ip;
+{
+	struct ufsmount *ump = VFSTOUFS(ITOV(ip)->v_mount);
+	register int i;
+
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (ump->um_quotas[i] == NULLVP ||
+		    (ump->um_qflags[i] & (QTF_OPENING|QTF_CLOSING)))
+			continue;
+		if (ip->i_dquot[i] == NODQUOT) {
+			vprint("chkdquot: missing dquot", ITOV(ip));
+			panic("missing dquot");
+		}
+	}
+}
+#endif
+
+/*
+ * Code to process quotactl commands.
+ */
+
+/*
+ * Q_QUOTAON - set up a quota file for a particular file system.
+ */
+int
+quotaon(p, mp, type, fname)
+	struct proc *p;
+	struct mount *mp;
+	register int type;
+	caddr_t fname;
+{
+	register struct ufsmount *ump = VFSTOUFS(mp);
+	register struct vnode *vp, **vpp;
+	struct vnode *nextvp;
+	struct dquot *dq;
+	int error;
+	struct nameidata nd;
+
+	vpp = &ump->um_quotas[type];
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, p);
+	if (error = vn_open(&nd, FREAD|FWRITE, 0))
+		return (error);
+	vp = nd.ni_vp;
+	VOP_UNLOCK(vp);
+	if (vp->v_type != VREG) {
+		(void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
+		return (EACCES);
+	}
+	if (vfs_busy(mp)) {
+		(void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
+		return (EBUSY);
+	}
+	if (*vpp != vp)
+		quotaoff(p, mp, type);
+	ump->um_qflags[type] |= QTF_OPENING;
+	mp->mnt_flag |= MNT_QUOTA;
+	vp->v_flag |= VSYSTEM;
+	*vpp = vp;
+	/*
+	 * Save the credential of the process that turned on quotas.
+	 * Set up the time limits for this quota.
+	 */
+	crhold(p->p_ucred);
+	ump->um_cred[type] = p->p_ucred;
+	ump->um_btime[type] = MAX_DQ_TIME;
+	ump->um_itime[type] = MAX_IQ_TIME;
+	if (dqget(NULLVP, 0, ump, type, &dq) == 0) {
+		if (dq->dq_btime > 0)
+			ump->um_btime[type] = dq->dq_btime;
+		if (dq->dq_itime > 0)
+			ump->um_itime[type] = dq->dq_itime;
+		dqrele(NULLVP, dq);
+	}
+	/*
+	 * Search vnodes associated with this mount point,
+	 * adding references to quota file being opened.
+	 * NB: only need to add dquot's for inodes being modified.
+	 */
+again:
+	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
+		nextvp = vp->v_mntvnodes.le_next;
+		if (vp->v_writecount == 0)
+			continue;
+		if (vget(vp, 1))
+			goto again;
+		if (error = getinoquota(VTOI(vp))) {
+			vput(vp);
+			break;
+		}
+		vput(vp);
+		if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
+			goto again;
+	}
+	ump->um_qflags[type] &= ~QTF_OPENING;
+	if (error)
+		quotaoff(p, mp, type);
+	vfs_unbusy(mp);
+	return (error);
+}
+
+/*
+ * Q_QUOTAOFF - turn off disk quotas for a filesystem.
+ */
+int
+quotaoff(p, mp, type)
+	struct proc *p;
+	struct mount *mp;
+	register int type;
+{
+	register struct vnode *vp;
+	struct vnode *qvp, *nextvp;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	register struct dquot *dq;
+	register struct inode *ip;
+	int error;
+	
+	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+		panic("quotaoff: not busy");
+	if ((qvp = ump->um_quotas[type]) == NULLVP)
+		return (0);
+	ump->um_qflags[type] |= QTF_CLOSING;
+	/*
+	 * Search vnodes associated with this mount point,
+	 * deleting any references to quota file being closed.
+	 */
+again:
+	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
+		nextvp = vp->v_mntvnodes.le_next;
+		if (vget(vp, 1))
+			goto again;
+		ip = VTOI(vp);
+		dq = ip->i_dquot[type];
+		ip->i_dquot[type] = NODQUOT;
+		dqrele(vp, dq);
+		vput(vp);
+		if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
+			goto again;
+	}
+	dqflush(qvp);
+	qvp->v_flag &= ~VSYSTEM;
+	error = vn_close(qvp, FREAD|FWRITE, p->p_ucred, p);
+	ump->um_quotas[type] = NULLVP;
+	crfree(ump->um_cred[type]);
+	ump->um_cred[type] = NOCRED;
+	ump->um_qflags[type] &= ~QTF_CLOSING;
+	for (type = 0; type < MAXQUOTAS; type++)
+		if (ump->um_quotas[type] != NULLVP)
+			break;
+	if (type == MAXQUOTAS)
+		mp->mnt_flag &= ~MNT_QUOTA;
+	return (error);
+}
+
+/*
+ * Q_GETQUOTA - return current values in a dqblk structure.
+ */
+int
+getquota(mp, id, type, addr)
+	struct mount *mp;
+	u_long id;
+	int type;
+	caddr_t addr;
+{
+	struct dquot *dq;
+	int error;
+
+	if (error = dqget(NULLVP, id, VFSTOUFS(mp), type, &dq))
+		return (error);
+	error = copyout((caddr_t)&dq->dq_dqb, addr, sizeof (struct dqblk));
+	dqrele(NULLVP, dq);
+	return (error);
+}
+
+/*
+ * Q_SETQUOTA - assign an entire dqblk structure.
+ */
+int
+setquota(mp, id, type, addr)
+	struct mount *mp;
+	u_long id;
+	int type;
+	caddr_t addr;
+{
+	register struct dquot *dq;
+	struct dquot *ndq;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct dqblk newlim;
+	int error;
+
+	if (error = copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk)))
+		return (error);
+	if (error = dqget(NULLVP, id, ump, type, &ndq))
+		return (error);
+	dq = ndq;
+	while (dq->dq_flags & DQ_LOCK) {
+		dq->dq_flags |= DQ_WANT;
+		sleep((caddr_t)dq, PINOD+1);
+	}
+	/*
+	 * Copy all but the current values.
+	 * Reset time limit if previously had no soft limit or were
+	 * under it, but now have a soft limit and are over it.
+	 */
+	newlim.dqb_curblocks = dq->dq_curblocks;
+	newlim.dqb_curinodes = dq->dq_curinodes;
+	if (dq->dq_id != 0) {
+		newlim.dqb_btime = dq->dq_btime;
+		newlim.dqb_itime = dq->dq_itime;
+	}
+	if (newlim.dqb_bsoftlimit &&
+	    dq->dq_curblocks >= newlim.dqb_bsoftlimit &&
+	    (dq->dq_bsoftlimit == 0 || dq->dq_curblocks < dq->dq_bsoftlimit))
+		newlim.dqb_btime = time.tv_sec + ump->um_btime[type];
+	if (newlim.dqb_isoftlimit &&
+	    dq->dq_curinodes >= newlim.dqb_isoftlimit &&
+	    (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit))
+		newlim.dqb_itime = time.tv_sec + ump->um_itime[type];
+	dq->dq_dqb = newlim;
+	if (dq->dq_curblocks < dq->dq_bsoftlimit)
+		dq->dq_flags &= ~DQ_BLKS;
+	if (dq->dq_curinodes < dq->dq_isoftlimit)
+		dq->dq_flags &= ~DQ_INODS;
+	if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
+	    dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
+		dq->dq_flags |= DQ_FAKE;
+	else
+		dq->dq_flags &= ~DQ_FAKE;
+	dq->dq_flags |= DQ_MOD;
+	dqrele(NULLVP, dq);
+	return (0);
+}
+
+/*
+ * Q_SETUSE - set current inode and block usage.
+ */
+int
+setuse(mp, id, type, addr)
+	struct mount *mp;
+	u_long id;
+	int type;
+	caddr_t addr;
+{
+	register struct dquot *dq;
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct dquot *ndq;
+	struct dqblk usage;
+	int error;
+
+	if (error = copyin(addr, (caddr_t)&usage, sizeof (struct dqblk)))
+		return (error);
+	if (error = dqget(NULLVP, id, ump, type, &ndq))
+		return (error);
+	dq = ndq;
+	while (dq->dq_flags & DQ_LOCK) {
+		dq->dq_flags |= DQ_WANT;
+		sleep((caddr_t)dq, PINOD+1);
+	}
+	/*
+	 * Reset time limit if have a soft limit and were
+	 * previously under it, but are now over it.
+	 */
+	if (dq->dq_bsoftlimit && dq->dq_curblocks < dq->dq_bsoftlimit &&
+	    usage.dqb_curblocks >= dq->dq_bsoftlimit)
+		dq->dq_btime = time.tv_sec + ump->um_btime[type];
+	if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit &&
+	    usage.dqb_curinodes >= dq->dq_isoftlimit)
+		dq->dq_itime = time.tv_sec + ump->um_itime[type];
+	dq->dq_curblocks = usage.dqb_curblocks;
+	dq->dq_curinodes = usage.dqb_curinodes;
+	if (dq->dq_curblocks < dq->dq_bsoftlimit)
+		dq->dq_flags &= ~DQ_BLKS;
+	if (dq->dq_curinodes < dq->dq_isoftlimit)
+		dq->dq_flags &= ~DQ_INODS;
+	dq->dq_flags |= DQ_MOD;
+	dqrele(NULLVP, dq);
+	return (0);
+}
+
+/*
+ * Q_SYNC - sync quota files to disk.
+ */
+int
+qsync(mp)
+	struct mount *mp;
+{
+	struct ufsmount *ump = VFSTOUFS(mp);
+	register struct vnode *vp, *nextvp;
+	register struct dquot *dq;
+	register int i;
+
+	/*
+	 * Check if the mount point has any quotas.
+	 * If not, simply return.
+	 */
+	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+		panic("qsync: not busy");
+	for (i = 0; i < MAXQUOTAS; i++)
+		if (ump->um_quotas[i] != NULLVP)
+			break;
+	if (i == MAXQUOTAS)
+		return (0);
+	/*
+	 * Search vnodes associated with this mount point,
+	 * synchronizing any modified dquot structures.
+	 */
+again:
+	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
+		nextvp = vp->v_mntvnodes.le_next;
+		if (VOP_ISLOCKED(vp))
+			continue;
+		if (vget(vp, 1))
+			goto again;
+		for (i = 0; i < MAXQUOTAS; i++) {
+			dq = VTOI(vp)->i_dquot[i];
+			if (dq != NODQUOT && (dq->dq_flags & DQ_MOD))
+				dqsync(vp, dq);
+		}
+		vput(vp);
+		if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
+			goto again;
+	}
+	return (0);
+}
+
+/*
+ * Code pertaining to management of the in-core dquot data structures.
+ */
+struct dquot **dqhashtbl;
+u_long dqhash;
+
+/*
+ * Dquot free list.
+ */
+#define	DQUOTINC	5	/* minimum free dquots desired */
+struct dquot *dqfreel, **dqback = &dqfreel;
+long numdquot, desireddquot = DQUOTINC;
+
+/*
+ * Initialize the quota system.
+ */
+void
+dqinit()
+{
+
+	dqhashtbl = hashinit(desiredvnodes, M_DQUOT, &dqhash);
+}
+
+/*
+ * Obtain a dquot structure for the specified identifier and quota file
+ * reading the information from the file if necessary.
+ */
+int
+dqget(vp, id, ump, type, dqp)
+	struct vnode *vp;
+	u_long id;
+	register struct ufsmount *ump;
+	register int type;
+	struct dquot **dqp;
+{
+	register struct dquot *dq, *dp, **dpp;
+	register struct vnode *dqvp;
+	struct iovec aiov;
+	struct uio auio;
+	int error;
+
+	dqvp = ump->um_quotas[type];
+	if (dqvp == NULLVP || (ump->um_qflags[type] & QTF_CLOSING)) {
+		*dqp = NODQUOT;
+		return (EINVAL);
+	}
+	/*
+	 * Check the cache first.
+	 */
+	dpp = &dqhashtbl[((((int)(dqvp)) >> 8) + id) & dqhash];
+	for (dq = *dpp; dq; dq = dq->dq_forw) {
+		if (dq->dq_id != id ||
+		    dq->dq_ump->um_quotas[dq->dq_type] != dqvp)
+			continue;
+		/*
+		 * Cache hit with no references.  Take
+		 * the structure off the free list.
+		 */
+		if (dq->dq_cnt == 0) {
+			if ((dp = dq->dq_freef) != NODQUOT)
+				dp->dq_freeb = dq->dq_freeb;
+			else
+				dqback = dq->dq_freeb;
+			*dq->dq_freeb = dp;
+		}
+		DQREF(dq);
+		*dqp = dq;
+		return (0);
+	}
+	/*
+	 * Not in cache, allocate a new one.
+	 */
+	if (dqfreel == NODQUOT && numdquot < MAXQUOTAS * desiredvnodes)
+		desireddquot += DQUOTINC;
+	if (numdquot < desireddquot) {
+		dq = (struct dquot *)malloc(sizeof *dq, M_DQUOT, M_WAITOK);
+		bzero((char *)dq, sizeof *dq);
+		numdquot++;
+	} else {
+		if ((dq = dqfreel) == NULL) {
+			tablefull("dquot");
+			*dqp = NODQUOT;
+			return (EUSERS);
+		}
+		if (dq->dq_cnt || (dq->dq_flags & DQ_MOD))
+			panic("free dquot isn't");
+		if ((dp = dq->dq_freef) != NODQUOT)
+			dp->dq_freeb = &dqfreel;
+		else
+			dqback = &dqfreel;
+		dqfreel = dp;
+		dq->dq_freef = NULL;
+		dq->dq_freeb = NULL;
+		if (dp = dq->dq_forw)
+			dp->dq_back = dq->dq_back;
+		*dq->dq_back = dp;
+	}
+	/*
+	 * Initialize the contents of the dquot structure.
+	 */
+	if (vp != dqvp)
+		VOP_LOCK(dqvp);
+	if (dp = *dpp)
+		dp->dq_back = &dq->dq_forw;
+	dq->dq_forw = dp;
+	dq->dq_back = dpp;
+	*dpp = dq;
+	DQREF(dq);
+	dq->dq_flags = DQ_LOCK;
+	dq->dq_id = id;
+	dq->dq_ump = ump;
+	dq->dq_type = type;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = (caddr_t)&dq->dq_dqb;
+	aiov.iov_len = sizeof (struct dqblk);
+	auio.uio_resid = sizeof (struct dqblk);
+	auio.uio_offset = (off_t)(id * sizeof (struct dqblk));
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_READ;
+	auio.uio_procp = (struct proc *)0;
+	error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]);
+	if (auio.uio_resid == sizeof(struct dqblk) && error == 0)
+		bzero((caddr_t)&dq->dq_dqb, sizeof(struct dqblk));
+	if (vp != dqvp)
+		VOP_UNLOCK(dqvp);
+	if (dq->dq_flags & DQ_WANT)
+		wakeup((caddr_t)dq);
+	dq->dq_flags = 0;
+	/*
+	 * I/O error in reading quota file, release
+	 * quota structure and reflect problem to caller.
+	 */
+	if (error) {
+		if (dp = dq->dq_forw)
+			dp->dq_back = dq->dq_back;
+		*dq->dq_back = dp;
+		dq->dq_forw = NULL;
+		dq->dq_back = NULL;
+		dqrele(vp, dq);
+		*dqp = NODQUOT;
+		return (error);
+	}
+	/*
+	 * Check for no limit to enforce.
+	 * Initialize time values if necessary.
+	 */
+	if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
+	    dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
+		dq->dq_flags |= DQ_FAKE;
+	if (dq->dq_id != 0) {
+		if (dq->dq_btime == 0)
+			dq->dq_btime = time.tv_sec + ump->um_btime[type];
+		if (dq->dq_itime == 0)
+			dq->dq_itime = time.tv_sec + ump->um_itime[type];
+	}
+	*dqp = dq;
+	return (0);
+}
+
+/*
+ * Obtain a reference to a dquot.
+ */
+void
+dqref(dq)
+	struct dquot *dq;
+{
+
+	dq->dq_cnt++;
+}
+
+/*
+ * Release a reference to a dquot.
+ */
+void
+dqrele(vp, dq)
+	struct vnode *vp;
+	register struct dquot *dq;
+{
+
+	if (dq == NODQUOT)
+		return;
+	if (dq->dq_cnt > 1) {
+		dq->dq_cnt--;
+		return;
+	}
+	if (dq->dq_flags & DQ_MOD)
+		(void) dqsync(vp, dq);
+	if (--dq->dq_cnt > 0)
+		return;
+	if (dqfreel != NODQUOT) {
+		*dqback = dq;
+		dq->dq_freeb = dqback;
+	} else {
+		dqfreel = dq;
+		dq->dq_freeb = &dqfreel;
+	}
+	dq->dq_freef = NODQUOT;
+	dqback = &dq->dq_freef;
+}
+
+/*
+ * Update the disk quota in the quota file.
+ */
+int
+dqsync(vp, dq)
+	struct vnode *vp;
+	register struct dquot *dq;
+{
+	struct vnode *dqvp;
+	struct iovec aiov;
+	struct uio auio;
+	int error;
+
+	if (dq == NODQUOT)
+		panic("dqsync: dquot");
+	if ((dq->dq_flags & DQ_MOD) == 0)
+		return (0);
+	if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP)
+		panic("dqsync: file");
+	if (vp != dqvp)
+		VOP_LOCK(dqvp);
+	while (dq->dq_flags & DQ_LOCK) {
+		dq->dq_flags |= DQ_WANT;
+		sleep((caddr_t)dq, PINOD+2);
+		if ((dq->dq_flags & DQ_MOD) == 0) {
+			if (vp != dqvp)
+				VOP_UNLOCK(dqvp);
+			return (0);
+		}
+	}
+	dq->dq_flags |= DQ_LOCK;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	aiov.iov_base = (caddr_t)&dq->dq_dqb;
+	aiov.iov_len = sizeof (struct dqblk);
+	auio.uio_resid = sizeof (struct dqblk);
+	auio.uio_offset = (off_t)(dq->dq_id * sizeof (struct dqblk));
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_procp = (struct proc *)0;
+	error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]);
+	if (auio.uio_resid && error == 0)
+		error = EIO;
+	if (dq->dq_flags & DQ_WANT)
+		wakeup((caddr_t)dq);
+	dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT);
+	if (vp != dqvp)
+		VOP_UNLOCK(dqvp);
+	return (error);
+}
+
+/*
+ * Flush all entries from the cache for a particular vnode.
+ */
+void
+dqflush(vp)
+	register struct vnode *vp;
+{
+	register struct dquot *dq, *dp, **dpp, *nextdq;
+
+	/*
+	 * Move all dquot's that used to refer to this quota
+	 * file off their hash chains (they will eventually
+	 * fall off the head of the free list and be re-used).
+	 */
+	for (dpp = &dqhashtbl[dqhash]; dpp >= dqhashtbl; dpp--) {
+		for (dq = *dpp; dq; dq = nextdq) {
+			nextdq = dq->dq_forw;
+			if (dq->dq_ump->um_quotas[dq->dq_type] != vp)
+				continue;
+			if (dq->dq_cnt)
+				panic("dqflush: stray dquot");
+			if (dp = dq->dq_forw)
+				dp->dq_back = dq->dq_back;
+			*dq->dq_back = dp;
+			dq->dq_forw = NULL;
+			dq->dq_back = NULL;
+			dq->dq_ump = (struct ufsmount *)0;
+		}
+	}
+}
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
new file mode 100644
index 0000000..5ead2c1
--- /dev/null
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -0,0 +1,295 @@
+/*-
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_readwrite.c	8.7 (Berkeley) 1/21/94
+ */
+
+#ifdef LFS_READWRITE
+#define	BLKSIZE(a, b, c)	blksize(a)
+#define	FS			struct lfs
+#define	I_FS			i_lfs
+#define	READ			lfs_read
+#define	READ_S			"lfs_read"
+#define	WRITE			lfs_write
+#define	WRITE_S			"lfs_write"
+#define	fs_bsize		lfs_bsize
+#define	fs_maxfilesize		lfs_maxfilesize
+#else
+#define	BLKSIZE(a, b, c)	blksize(a, b, c)
+#define	FS			struct fs
+#define	I_FS			i_fs
+#define	READ			ffs_read
+#define	READ_S			"ffs_read"
+#define	WRITE			ffs_write
+#define	WRITE_S			"ffs_write"
+#endif
+
+/*
+ * Vnode op for reading.
+ */
+/* ARGSUSED */
+READ(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp;
+	register struct inode *ip;
+	register struct uio *uio;
+	register FS *fs;
+	struct buf *bp;
+	daddr_t lbn, nextlbn;
+	off_t bytesinfile;
+	long size, xfersize, blkoffset;
+	int error;
+	u_short mode;
+
+	vp = ap->a_vp;
+	ip = VTOI(vp);
+	mode = ip->i_mode;
+	uio = ap->a_uio;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_READ)
+		panic("%s: mode", READ_S);
+
+	if (vp->v_type == VLNK) {
+		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
+			panic("%s: short symlink", READ_S);
+	} else if (vp->v_type != VREG && vp->v_type != VDIR)
+		panic("%s: type %d", READ_S, vp->v_type);
+#endif
+	fs = ip->I_FS;
+	if ((u_quad_t)uio->uio_offset > fs->fs_maxfilesize)
+		return (EFBIG);
+
+	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
+		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
+			break;
+		lbn = lblkno(fs, uio->uio_offset);
+		nextlbn = lbn + 1;
+		size = BLKSIZE(fs, ip, lbn);
+		blkoffset = blkoff(fs, uio->uio_offset);
+		xfersize = fs->fs_bsize - blkoffset;
+		if (uio->uio_resid < xfersize)
+			xfersize = uio->uio_resid;
+		if (bytesinfile < xfersize)
+			xfersize = bytesinfile;
+
+#ifdef LFS_READWRITE
+		(void)lfs_check(vp, lbn);
+		error = cluster_read(vp, ip->i_size, lbn, size, NOCRED, &bp);
+#else
+		if (lblktosize(fs, nextlbn) > ip->i_size)
+			error = bread(vp, lbn, size, NOCRED, &bp);
+		else if (doclusterread)
+			error = cluster_read(vp,
+			    ip->i_size, lbn, size, NOCRED, &bp);
+		else if (lbn - 1 == vp->v_lastr) {
+			int nextsize = BLKSIZE(fs, ip, nextlbn);
+			error = breadn(vp, lbn,
+			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
+		} else
+			error = bread(vp, lbn, size, NOCRED, &bp);
+#endif
+		if (error)
+			break;
+		vp->v_lastr = lbn;
+
+		/*
+		 * We should only get non-zero b_resid when an I/O error
+		 * has occurred, which should cause us to break above.
+		 * However, if the short read did not cause an error,
+		 * then we want to ensure that we do not uiomove bad
+		 * or uninitialized data.
+		 */
+		size -= bp->b_resid;
+		if (size < xfersize) {
+			if (size == 0)
+				break;
+			xfersize = size;
+		}
+		if (error =
+		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio))
+			break;
+
+		if (S_ISREG(mode) && (xfersize + blkoffset == fs->fs_bsize ||
+		    uio->uio_offset == ip->i_size))
+			bp->b_flags |= B_AGE;
+		brelse(bp);
+	}
+	if (bp != NULL)
+		brelse(bp);
+	ip->i_flag |= IN_ACCESS;
+	return (error);
+}
+
+/*
+ * Vnode op for writing.
+ */
+WRITE(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp;
+	register struct uio *uio;
+	register struct inode *ip;
+	register FS *fs;
+	struct buf *bp;
+	struct proc *p;
+	daddr_t lbn;
+	off_t osize;
+	int blkoffset, error, flags, ioflag, resid, size, xfersize;
+
+	ioflag = ap->a_ioflag;
+	uio = ap->a_uio;
+	vp = ap->a_vp;
+	ip = VTOI(vp);
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_WRITE)
+		panic("%s: mode", WRITE_S);
+#endif
+
+	switch (vp->v_type) {
+	case VREG:
+		if (ioflag & IO_APPEND)
+			uio->uio_offset = ip->i_size;
+		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
+			return (EPERM);
+		/* FALLTHROUGH */
+	case VLNK:
+		break;
+	case VDIR:
+		if ((ioflag & IO_SYNC) == 0)
+			panic("%s: nonsync dir write", WRITE_S);
+		break;
+	default:
+		panic("%s: type", WRITE_S);
+	}
+
+	fs = ip->I_FS;
+	if (uio->uio_offset < 0 ||
+	    (u_quad_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
+		return (EFBIG);
+	/*
+	 * Maybe this should be above the vnode op call, but so long as
+	 * file servers have no limits, I don't think it matters.
+	 */
+	p = uio->uio_procp;
+	if (vp->v_type == VREG && p &&
+	    uio->uio_offset + uio->uio_resid >
+	    p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+		psignal(p, SIGXFSZ);
+		return (EFBIG);
+	}
+
+	resid = uio->uio_resid;
+	osize = ip->i_size;
+	flags = ioflag & IO_SYNC ? B_SYNC : 0;
+
+	for (error = 0; uio->uio_resid > 0;) {
+		lbn = lblkno(fs, uio->uio_offset);
+		blkoffset = blkoff(fs, uio->uio_offset);
+		xfersize = fs->fs_bsize - blkoffset;
+		if (uio->uio_resid < xfersize)
+			xfersize = uio->uio_resid;
+#ifdef LFS_READWRITE
+		(void)lfs_check(vp, lbn);
+		error = lfs_balloc(vp, xfersize, lbn, &bp);
+#else
+		if (fs->fs_bsize > xfersize)
+			flags |= B_CLRBUF;
+		else
+			flags &= ~B_CLRBUF;
+
+		error = ffs_balloc(ip,
+		    lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
+#endif
+		if (error)
+			break;
+		if (uio->uio_offset + xfersize > ip->i_size) {
+			ip->i_size = uio->uio_offset + xfersize;
+			vnode_pager_setsize(vp, (u_long)ip->i_size);
+		}
+		(void)vnode_pager_uncache(vp);
+
+		size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
+		if (size < xfersize)
+			xfersize = size;
+
+		error =
+		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
+#ifdef LFS_READWRITE
+		(void)VOP_BWRITE(bp);
+#else
+		if (ioflag & IO_SYNC)
+			(void)bwrite(bp);
+		else if (xfersize + blkoffset == fs->fs_bsize)
+			if (doclusterwrite)
+				cluster_write(bp, ip->i_size);
+			else {
+				bp->b_flags |= B_AGE;
+				bawrite(bp);
+			}
+		else
+			bdwrite(bp);
+#endif
+		if (error || xfersize == 0)
+			break;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	}
+	/*
+	 * If we successfully wrote any data, and we are not the superuser
+	 * we clear the setuid and setgid bits as a precaution against
+	 * tampering.
+	 */
+	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
+		ip->i_mode &= ~(ISUID | ISGID);
+	if (error) {
+		if (ioflag & IO_UNIT) {
+			(void)VOP_TRUNCATE(vp, osize,
+			    ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
+			uio->uio_offset -= resid - uio->uio_resid;
+			uio->uio_resid = resid;
+		}
+	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
+		error = VOP_UPDATE(vp, &time, &time, 1);
+	return (error);
+}
diff --git a/sys/ufs/ufs/ufs_vfsops.c b/sys/ufs/ufs/ufs_vfsops.c
new file mode 100644
index 0000000..f806e0b
--- /dev/null
+++ b/sys/ufs/ufs/ufs_vfsops.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 1991, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_vfsops.c	8.4 (Berkeley) 4/16/94
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Flag to permit forcible unmounting.
+ */
+int doforce = 1;
+
+/*
+ * Make a filesystem operational.
+ * Nothing to do at the moment.
+ */
+/* ARGSUSED */
+int
+ufs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+/*
+ * Return the root of a filesystem.
+ */
+int
+ufs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct vnode *nvp;
+	int error;
+
+	if (error = VFS_VGET(mp, (ino_t)ROOTINO, &nvp))
+		return (error);
+	*vpp = nvp;
+	return (0);
+}
+
+/*
+ * Do operations associated with quotas
+ */
+int
+ufs_quotactl(mp, cmds, uid, arg, p)
+	struct mount *mp;
+	int cmds;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+	int cmd, type, error;
+
+#ifndef QUOTA
+	return (EOPNOTSUPP);
+#else
+	if (uid == -1)
+		uid = p->p_cred->p_ruid;
+	cmd = cmds >> SUBCMDSHIFT;
+
+	switch (cmd) {
+	case Q_GETQUOTA:
+	case Q_SYNC:
+		if (uid == p->p_cred->p_ruid)
+			break;
+		/* fall through */
+	default:
+		if (error = suser(p->p_ucred, &p->p_acflag))
+			return (error);
+	}
+
+	type = cmd & SUBCMDMASK;
+	if ((u_int)type >= MAXQUOTAS)
+		return (EINVAL);
+
+	switch (cmd) {
+
+	case Q_QUOTAON:
+		return (quotaon(p, mp, type, arg));
+
+	case Q_QUOTAOFF:
+		if (vfs_busy(mp))
+			return (0);
+		error = quotaoff(p, mp, type);
+		vfs_unbusy(mp);
+		return (error);
+
+	case Q_SETQUOTA:
+		return (setquota(mp, uid, type, arg));
+
+	case Q_SETUSE:
+		return (setuse(mp, uid, type, arg));
+
+	case Q_GETQUOTA:
+		return (getquota(mp, uid, type, arg));
+
+	case Q_SYNC:
+		if (vfs_busy(mp))
+			return (0);
+		error = qsync(mp);
+		vfs_unbusy(mp);
+		return (error);
+
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+#endif
+}
+
+/*
+ * This is the generic part of fhtovp called after the underlying
+ * filesystem has validated the file handle.
+ *
+ * Verify that a host should have access to a filesystem, and if so
+ * return a vnode for the presented file handle.
+ */
+int
+ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)
+	register struct mount *mp;
+	struct ufid *ufhp;
+	struct mbuf *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+	register struct inode *ip;
+	register struct netcred *np;
+	register struct ufsmount *ump = VFSTOUFS(mp);
+	struct vnode *nvp;
+	int error;
+
+	/*
+	 * Get the export permission structure for this <mp, client> tuple.
+	 */
+	np = vfs_export_lookup(mp, &ump->um_export, nam);
+	if (np == NULL)
+		return (EACCES);
+
+	if (error = VFS_VGET(mp, ufhp->ufid_ino, &nvp)) {
+		*vpp = NULLVP;
+		return (error);
+	}
+	ip = VTOI(nvp);
+	if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen) {
+		vput(nvp);
+		*vpp = NULLVP;
+		return (ESTALE);
+	}
+	*vpp = nvp;
+	*exflagsp = np->netc_exflags;
+	*credanonp = &np->netc_anon;
+	return (0);
+}
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
new file mode 100644
index 0000000..7b7c883
--- /dev/null
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -0,0 +1,2159 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufs_vnops.c	8.10 (Berkeley) 4/1/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/lockf.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+static int ufs_chmod __P((struct vnode *, int, struct ucred *, struct proc *));
+static int ufs_chown
+	__P((struct vnode *, uid_t, gid_t, struct ucred *, struct proc *));
+
+union _qcvt {
+	quad_t qcvt;
+	long val[2];
+};
+#define SETHIGH(q, h) { \
+	union _qcvt tmp; \
+	tmp.qcvt = (q); \
+	tmp.val[_QUAD_HIGHWORD] = (h); \
+	(q) = tmp.qcvt; \
+}
+#define SETLOW(q, l) { \
+	union _qcvt tmp; \
+	tmp.qcvt = (q); \
+	tmp.val[_QUAD_LOWWORD] = (l); \
+	(q) = tmp.qcvt; \
+}
+
+/*
+ * Create a regular file
+ */
+int
+ufs_create(ap)
+	struct vop_create_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	int error;
+
+	if (error =
+	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
+	    ap->a_dvp, ap->a_vpp, ap->a_cnp))
+		return (error);
+	return (0);
+}
+
+/*
+ * Mknod vnode call
+ */
+/* ARGSUSED */
+int
+ufs_mknod(ap)
+	struct vop_mknod_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	register struct vattr *vap = ap->a_vap;
+	register struct vnode **vpp = ap->a_vpp;
+	register struct inode *ip;
+	int error;
+
+	if (error =
+	    ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
+	    ap->a_dvp, vpp, ap->a_cnp))
+		return (error);
+	ip = VTOI(*vpp);
+	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+	if (vap->va_rdev != VNOVAL) {
+		/*
+		 * Want to be able to use this to make badblock
+		 * inodes, so don't truncate the dev number.
+		 */
+		ip->i_rdev = vap->va_rdev;
+	}
+	/*
+	 * Remove inode so that it will be reloaded by VFS_VGET and
+	 * checked to see if it is an alias of an existing entry in
+	 * the inode cache.
+	 */
+	vput(*vpp);
+	(*vpp)->v_type = VNON;
+	vgone(*vpp);
+	*vpp = 0;
+	return (0);
+}
+
+/*
+ * Open called.
+ *
+ * Nothing to do.
+ */
+/* ARGSUSED */
+int
+ufs_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/*
+	 * Files marked append-only must be opened for appending.
+	 */
+	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
+	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
+		return (EPERM);
+	return (0);
+}
+
+/*
+ * Close called.
+ *
+ * Update the times on the inode.
+ */
+/* ARGSUSED */
+int
+ufs_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+
+	if (vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+		ITIMES(ip, &time, &time);
+	return (0);
+}
+
+int
+ufs_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	register struct ucred *cred = ap->a_cred;
+	mode_t mask, mode = ap->a_mode;
+	register gid_t *gp;
+	int i, error;
+
+#ifdef DIAGNOSTIC
+	if (!VOP_ISLOCKED(vp)) {
+		vprint("ufs_access: not locked", vp);
+		panic("ufs_access: not locked");
+	}
+#endif
+#ifdef QUOTA
+	if (mode & VWRITE)
+		switch (vp->v_type) {
+		case VDIR:
+		case VLNK:
+		case VREG:
+			if (error = getinoquota(ip))
+				return (error);
+			break;
+		}
+#endif
+
+	/* If immutable bit set, nobody gets to write it. */
+	if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE))
+		return (EPERM);
+
+	/* Otherwise, user id 0 always gets access. */
+	if (cred->cr_uid == 0)
+		return (0);
+
+	mask = 0;
+
+	/* Otherwise, check the owner. */
+	if (cred->cr_uid == ip->i_uid) {
+		if (mode & VEXEC)
+			mask |= S_IXUSR;
+		if (mode & VREAD)
+			mask |= S_IRUSR;
+		if (mode & VWRITE)
+			mask |= S_IWUSR;
+		return ((ip->i_mode & mask) == mask ? 0 : EACCES);
+	}
+
+	/* Otherwise, check the groups. */
+	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
+		if (ip->i_gid == *gp) {
+			if (mode & VEXEC)
+				mask |= S_IXGRP;
+			if (mode & VREAD)
+				mask |= S_IRGRP;
+			if (mode & VWRITE)
+				mask |= S_IWGRP;
+			return ((ip->i_mode & mask) == mask ? 0 : EACCES);
+		}
+
+	/* Otherwise, check everyone else. */
+	if (mode & VEXEC)
+		mask |= S_IXOTH;
+	if (mode & VREAD)
+		mask |= S_IROTH;
+	if (mode & VWRITE)
+		mask |= S_IWOTH;
+	return ((ip->i_mode & mask) == mask ? 0 : EACCES);
+}
+
+/* ARGSUSED */
+int
+ufs_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	register struct vattr *vap = ap->a_vap;
+
+	ITIMES(ip, &time, &time);
+	/*
+	 * Copy from inode table
+	 */
+	vap->va_fsid = ip->i_dev;
+	vap->va_fileid = ip->i_number;
+	vap->va_mode = ip->i_mode & ~IFMT;
+	vap->va_nlink = ip->i_nlink;
+	vap->va_uid = ip->i_uid;
+	vap->va_gid = ip->i_gid;
+	vap->va_rdev = (dev_t)ip->i_rdev;
+	vap->va_size = ip->i_din.di_size;
+	vap->va_atime = ip->i_atime;
+	vap->va_mtime = ip->i_mtime;
+	vap->va_ctime = ip->i_ctime;
+	vap->va_flags = ip->i_flags;
+	vap->va_gen = ip->i_gen;
+	/* this doesn't belong here */
+	if (vp->v_type == VBLK)
+		vap->va_blocksize = BLKDEV_IOSIZE;
+	else if (vp->v_type == VCHR)
+		vap->va_blocksize = MAXBSIZE;
+	else
+		vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
+	vap->va_bytes = dbtob(ip->i_blocks);
+	vap->va_type = vp->v_type;
+	vap->va_filerev = ip->i_modrev;
+	return (0);
+}
+
+/*
+ * Set attribute vnode op. called from several syscalls
+ */
+int
+ufs_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vattr *vap = ap->a_vap;
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	register struct ucred *cred = ap->a_cred;
+	register struct proc *p = ap->a_p;
+	struct timeval atimeval, mtimeval;
+	int error;
+
+	/*
+	 * Check for unsettable attributes.
+	 */
+	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
+	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
+	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
+	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
+		return (EINVAL);
+	}
+	if (vap->va_flags != VNOVAL) {
+		if (cred->cr_uid != ip->i_uid &&
+		    (error = suser(cred, &p->p_acflag)))
+			return (error);
+		if (cred->cr_uid == 0) {
+			if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) &&
+			    securelevel > 0)
+				return (EPERM);
+			ip->i_flags = vap->va_flags;
+		} else {
+			if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND))
+				return (EPERM);
+			ip->i_flags &= SF_SETTABLE;
+			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
+		}
+		ip->i_flag |= IN_CHANGE;
+		if (vap->va_flags & (IMMUTABLE | APPEND))
+			return (0);
+	}
+	if (ip->i_flags & (IMMUTABLE | APPEND))
+		return (EPERM);
+	/*
+	 * Go through the fields and update iff not VNOVAL.
+	 */
+	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL)
+		if (error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p))
+			return (error);
+	if (vap->va_size != VNOVAL) {
+		if (vp->v_type == VDIR)
+			return (EISDIR);
+		if (error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p))
+			return (error);
+	}
+	ip = VTOI(vp);
+	if (vap->va_atime.ts_sec != VNOVAL || vap->va_mtime.ts_sec != VNOVAL) {
+		if (cred->cr_uid != ip->i_uid &&
+		    (error = suser(cred, &p->p_acflag)) &&
+		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 
+		    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
+			return (error);
+		if (vap->va_atime.ts_sec != VNOVAL)
+			ip->i_flag |= IN_ACCESS;
+		if (vap->va_mtime.ts_sec != VNOVAL)
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		atimeval.tv_sec = vap->va_atime.ts_sec;
+		atimeval.tv_usec = vap->va_atime.ts_nsec / 1000;
+		mtimeval.tv_sec = vap->va_mtime.ts_sec;
+		mtimeval.tv_usec = vap->va_mtime.ts_nsec / 1000;
+		if (error = VOP_UPDATE(vp, &atimeval, &mtimeval, 1))
+			return (error);
+	}
+	error = 0;
+	if (vap->va_mode != (mode_t)VNOVAL)
+		error = ufs_chmod(vp, (int)vap->va_mode, cred, p);
+	return (error);
+}
+
+/*
+ * Change the mode on a file.
+ * Inode must be locked before calling.
+ */
+static int
+ufs_chmod(vp, mode, cred, p)
+	register struct vnode *vp;
+	register int mode;
+	register struct ucred *cred;
+	struct proc *p;
+{
+	register struct inode *ip = VTOI(vp);
+	int error;
+
+	if (cred->cr_uid != ip->i_uid &&
+	    (error = suser(cred, &p->p_acflag)))
+		return (error);
+	if (cred->cr_uid) {
+		if (vp->v_type != VDIR && (mode & S_ISTXT))
+			return (EFTYPE);
+		if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
+			return (EPERM);
+	}
+	ip->i_mode &= ~ALLPERMS;
+	ip->i_mode |= (mode & ALLPERMS);
+	ip->i_flag |= IN_CHANGE;
+	if ((vp->v_flag & VTEXT) && (ip->i_mode & S_ISTXT) == 0)
+		(void) vnode_pager_uncache(vp);
+	return (0);
+}
+
+/*
+ * Perform chown operation on inode ip;
+ * inode must be locked prior to call.
+ */
+static int
+ufs_chown(vp, uid, gid, cred, p)
+	register struct vnode *vp;
+	uid_t uid;
+	gid_t gid;
+	struct ucred *cred;
+	struct proc *p;
+{
+	register struct inode *ip = VTOI(vp);
+	uid_t ouid;
+	gid_t ogid;
+	int error = 0;
+#ifdef QUOTA
+	register int i;
+	long change;
+#endif
+
+	if (uid == (uid_t)VNOVAL)
+		uid = ip->i_uid;
+	if (gid == (gid_t)VNOVAL)
+		gid = ip->i_gid;
+	/*
+	 * If we don't own the file, are trying to change the owner
+	 * of the file, or are not a member of the target group,
+	 * the caller must be superuser or the call fails.
+	 */
+	if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
+	    !groupmember((gid_t)gid, cred)) &&
+	    (error = suser(cred, &p->p_acflag)))
+		return (error);
+	ogid = ip->i_gid;
+	ouid = ip->i_uid;
+#ifdef QUOTA
+	if (error = getinoquota(ip))
+		return (error);
+	if (ouid == uid) {
+		dqrele(vp, ip->i_dquot[USRQUOTA]);
+		ip->i_dquot[USRQUOTA] = NODQUOT;
+	}
+	if (ogid == gid) {
+		dqrele(vp, ip->i_dquot[GRPQUOTA]);
+		ip->i_dquot[GRPQUOTA] = NODQUOT;
+	}
+	change = ip->i_blocks;
+	(void) chkdq(ip, -change, cred, CHOWN);
+	(void) chkiq(ip, -1, cred, CHOWN);
+	for (i = 0; i < MAXQUOTAS; i++) {
+		dqrele(vp, ip->i_dquot[i]);
+		ip->i_dquot[i] = NODQUOT;
+	}
+#endif
+	ip->i_gid = gid;
+	ip->i_uid = uid;
+#ifdef QUOTA
+	if ((error = getinoquota(ip)) == 0) {
+		if (ouid == uid) {
+			dqrele(vp, ip->i_dquot[USRQUOTA]);
+			ip->i_dquot[USRQUOTA] = NODQUOT;
+		}
+		if (ogid == gid) {
+			dqrele(vp, ip->i_dquot[GRPQUOTA]);
+			ip->i_dquot[GRPQUOTA] = NODQUOT;
+		}
+		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
+			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
+				goto good;
+			else
+				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
+		}
+		for (i = 0; i < MAXQUOTAS; i++) {
+			dqrele(vp, ip->i_dquot[i]);
+			ip->i_dquot[i] = NODQUOT;
+		}
+	}
+	ip->i_gid = ogid;
+	ip->i_uid = ouid;
+	if (getinoquota(ip) == 0) {
+		if (ouid == uid) {
+			dqrele(vp, ip->i_dquot[USRQUOTA]);
+			ip->i_dquot[USRQUOTA] = NODQUOT;
+		}
+		if (ogid == gid) {
+			dqrele(vp, ip->i_dquot[GRPQUOTA]);
+			ip->i_dquot[GRPQUOTA] = NODQUOT;
+		}
+		(void) chkdq(ip, change, cred, FORCE|CHOWN);
+		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
+		(void) getinoquota(ip);
+	}
+	return (error);
+good:
+	if (getinoquota(ip))
+		panic("chown: lost quota");
+#endif /* QUOTA */
+	if (ouid != uid || ogid != gid)
+		ip->i_flag |= IN_CHANGE;
+	if (ouid != uid && cred->cr_uid != 0)
+		ip->i_mode &= ~ISUID;
+	if (ogid != gid && cred->cr_uid != 0)
+		ip->i_mode &= ~ISGID;
+	return (0);
+}
+
+/* ARGSUSED */
+int
+ufs_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (ENOTTY);
+}
+
+/* ARGSUSED */
+int
+ufs_select(ap)
+	struct vop_select_args /* {
+		struct vnode *a_vp;
+		int  a_which;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/*
+	 * We should really check to see if I/O is possible.
+	 */
+	return (1);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+ufs_mmap(ap)
+	struct vop_mmap_args /* {
+		struct vnode *a_vp;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (EINVAL);
+}
+
+/*
+ * Seek on a file
+ *
+ * Nothing to do, so just return.
+ */
+/* ARGSUSED */
+int
+ufs_seek(ap)
+	struct vop_seek_args /* {
+		struct vnode *a_vp;
+		off_t  a_oldoff;
+		off_t  a_newoff;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	return (0);
+}
+
+int
+ufs_remove(ap)
+	struct vop_remove_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct inode *ip;
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *dvp = ap->a_dvp;
+	int error;
+
+	ip = VTOI(vp);
+	if ((ip->i_flags & (IMMUTABLE | APPEND)) ||
+	    (VTOI(dvp)->i_flags & APPEND)) {
+		error = EPERM;
+		goto out;
+	}
+	if ((error = ufs_dirremove(dvp, ap->a_cnp)) == 0) {
+		ip->i_nlink--;
+		ip->i_flag |= IN_CHANGE;
+	}
+out:
+	if (dvp == vp)
+		vrele(vp);
+	else
+		vput(vp);
+	vput(dvp);
+	return (error);
+}
+
+/*
+ * link vnode call
+ */
+int
+ufs_link(ap)
+	struct vop_link_args /* {
+		struct vnode *a_vp;
+		struct vnode *a_tdvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *tdvp = ap->a_tdvp;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct inode *ip;
+	struct timeval tv;
+	int error;
+
+#ifdef DIAGNOSTIC
+	if ((cnp->cn_flags & HASBUF) == 0)
+		panic("ufs_link: no name");
+#endif
+	if (vp->v_mount != tdvp->v_mount) {
+		VOP_ABORTOP(vp, cnp);
+		error = EXDEV;
+		goto out2;
+	}
+	if (vp != tdvp && (error = VOP_LOCK(tdvp))) {
+		VOP_ABORTOP(vp, cnp);
+		goto out2;
+	}
+	ip = VTOI(tdvp);
+	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
+		VOP_ABORTOP(vp, cnp);
+		error = EMLINK;
+		goto out1;
+	}
+	if (ip->i_flags & (IMMUTABLE | APPEND)) {
+		VOP_ABORTOP(vp, cnp);
+		error = EPERM;
+		goto out1;
+	}
+	ip->i_nlink++;
+	ip->i_flag |= IN_CHANGE;
+	tv = time;
+	error = VOP_UPDATE(tdvp, &tv, &tv, 1);
+	if (!error)
+		error = ufs_direnter(ip, vp, cnp);
+	if (error) {
+		ip->i_nlink--;
+		ip->i_flag |= IN_CHANGE;
+	}
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+out1:
+	if (vp != tdvp)
+		VOP_UNLOCK(tdvp);
+out2:
+	vput(vp);
+	return (error);
+}
+
+
+
+/*
+ * relookup - lookup a path name component
+ *    Used by lookup to re-aquire things.
+ */
+int
+relookup(dvp, vpp, cnp)
+	struct vnode *dvp, **vpp;
+	struct componentname *cnp;
+{
+	register struct vnode *dp = 0;	/* the directory we are searching */
+	int docache;			/* == 0 do not cache last component */
+	int wantparent;			/* 1 => wantparent or lockparent flag */
+	int rdonly;			/* lookup read-only flag bit */
+	int error = 0;
+#ifdef NAMEI_DIAGNOSTIC
+	int newhash;			/* DEBUG: check name hash */
+	char *cp;			/* DEBUG: check name ptr/len */
+#endif
+
+	/*
+	 * Setup: break out flag bits into variables.
+	 */
+	wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
+	docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
+	if (cnp->cn_nameiop == DELETE ||
+	    (wantparent && cnp->cn_nameiop != CREATE))
+		docache = 0;
+	rdonly = cnp->cn_flags & RDONLY;
+	cnp->cn_flags &= ~ISSYMLINK;
+	dp = dvp;
+	VOP_LOCK(dp);
+
+/* dirloop: */
+	/*
+	 * Search a new directory.
+	 *
+	 * The cn_hash value is for use by vfs_cache.
+	 * The last component of the filename is left accessible via
+	 * cnp->cn_nameptr for callers that need the name. Callers needing
+	 * the name set the SAVENAME flag. When done, they assume
+	 * responsibility for freeing the pathname buffer.
+	 */
+#ifdef NAMEI_DIAGNOSTIC
+	for (newhash = 0, cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
+		newhash += (unsigned char)*cp;
+	if (newhash != cnp->cn_hash)
+		panic("relookup: bad hash");
+	if (cnp->cn_namelen != cp - cnp->cn_nameptr)
+		panic ("relookup: bad len");
+	if (*cp != 0)
+		panic("relookup: not last component");
+	printf("{%s}: ", cnp->cn_nameptr);
+#endif
+
+	/*
+	 * Check for degenerate name (e.g. / or "")
+	 * which is a way of talking about a directory,
+	 * e.g. like "/." or ".".
+	 */
+	if (cnp->cn_nameptr[0] == '\0') {
+		if (cnp->cn_nameiop != LOOKUP || wantparent) {
+			error = EISDIR;
+			goto bad;
+		}
+		if (dp->v_type != VDIR) {
+			error = ENOTDIR;
+			goto bad;
+		}
+		if (!(cnp->cn_flags & LOCKLEAF))
+			VOP_UNLOCK(dp);
+		*vpp = dp;
+		if (cnp->cn_flags & SAVESTART)
+			panic("lookup: SAVESTART");
+		return (0);
+	}
+
+	if (cnp->cn_flags & ISDOTDOT)
+		panic ("relookup: lookup on dot-dot");
+
+	/*
+	 * We now have a segment name to search for, and a directory to search.
+	 */
+	if (error = VOP_LOOKUP(dp, vpp, cnp)) {
+#ifdef DIAGNOSTIC
+		if (*vpp != NULL)
+			panic("leaf should be empty");
+#endif
+		if (error != EJUSTRETURN)
+			goto bad;
+		/*
+		 * If creating and at end of pathname, then can consider
+		 * allowing file to be created.
+		 */
+		if (rdonly || (dvp->v_mount->mnt_flag & MNT_RDONLY)) {
+			error = EROFS;
+			goto bad;
+		}
+		/* ASSERT(dvp == ndp->ni_startdir) */
+		if (cnp->cn_flags & SAVESTART)
+			VREF(dvp);
+		/*
+		 * We return with ni_vp NULL to indicate that the entry
+		 * doesn't currently exist, leaving a pointer to the
+		 * (possibly locked) directory inode in ndp->ni_dvp.
+		 */
+		return (0);
+	}
+	dp = *vpp;
+
+#ifdef DIAGNOSTIC
+	/*
+	 * Check for symbolic link
+	 */
+	if (dp->v_type == VLNK && (cnp->cn_flags & FOLLOW))
+		panic ("relookup: symlink found.\n");
+#endif
+
+	/*
+	 * Check for read-only file systems.
+	 */
+	if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) {
+		/*
+		 * Disallow directory write attempts on read-only
+		 * file systems.
+		 */
+		if (rdonly || (dp->v_mount->mnt_flag & MNT_RDONLY) ||
+		    (wantparent &&
+		     (dvp->v_mount->mnt_flag & MNT_RDONLY))) {
+			error = EROFS;
+			goto bad2;
+		}
+	}
+	/* ASSERT(dvp == ndp->ni_startdir) */
+	if (cnp->cn_flags & SAVESTART)
+		VREF(dvp);
+	
+	if (!wantparent)
+		vrele(dvp);
+	if ((cnp->cn_flags & LOCKLEAF) == 0)
+		VOP_UNLOCK(dp);
+	return (0);
+
+bad2:
+	if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN))
+		VOP_UNLOCK(dvp);
+	vrele(dvp);
+bad:
+	vput(dp);
+	*vpp = NULL;
+	return (error);
+}
+
+
+/*
+ * Rename system call.
+ * 	rename("foo", "bar");
+ * is essentially
+ *	unlink("bar");
+ *	link("foo", "bar");
+ *	unlink("foo");
+ * but ``atomically''.  Can't do full commit without saving state in the
+ * inode on disk which isn't feasible at this time.  Best we can do is
+ * always guarantee the target exists.
+ *
+ * Basic algorithm is:
+ *
+ * 1) Bump link count on source while we're linking it to the
+ *    target.  This also ensure the inode won't be deleted out
+ *    from underneath us while we work (it may be truncated by
+ *    a concurrent `trunc' or `open' for creation).
+ * 2) Link source to destination.  If destination already exists,
+ *    delete it first.
+ * 3) Unlink source reference to inode if still around. If a
+ *    directory was moved and the parent of the destination
+ *    is different from the source, patch the ".." entry in the
+ *    directory.
+ */
+int
+ufs_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	struct vnode *tvp = ap->a_tvp;
+	register struct vnode *tdvp = ap->a_tdvp;
+	struct vnode *fvp = ap->a_fvp;
+	register struct vnode *fdvp = ap->a_fdvp;
+	register struct componentname *tcnp = ap->a_tcnp;
+	register struct componentname *fcnp = ap->a_fcnp;
+	register struct inode *ip, *xp, *dp;
+	struct dirtemplate dirbuf;
+	struct timeval tv;
+	int doingdirectory = 0, oldparent = 0, newparent = 0;
+	int error = 0;
+	u_char namlen;
+
+#ifdef DIAGNOSTIC
+	if ((tcnp->cn_flags & HASBUF) == 0 ||
+	    (fcnp->cn_flags & HASBUF) == 0)
+		panic("ufs_rename: no name");
+#endif
+	/*
+	 * Check for cross-device rename.
+	 */
+	if ((fvp->v_mount != tdvp->v_mount) ||
+	    (tvp && (fvp->v_mount != tvp->v_mount))) {
+		error = EXDEV;
+abortit:
+		VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
+		if (tdvp == tvp)
+			vrele(tdvp);
+		else
+			vput(tdvp);
+		if (tvp)
+			vput(tvp);
+		VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
+		vrele(fdvp);
+		vrele(fvp);
+		return (error);
+	}
+
+	/*
+	 * Check if just deleting a link name.
+	 */
+	if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) ||
+	    (VTOI(tdvp)->i_flags & APPEND))) {
+		error = EPERM;
+		goto abortit;
+	}
+	if (fvp == tvp) {
+		if (fvp->v_type == VDIR) {
+			error = EINVAL;
+			goto abortit;
+		}
+		VOP_ABORTOP(fdvp, fcnp);
+		vrele(fdvp);
+		vrele(fvp);
+		vput(tdvp);
+		vput(tvp);
+		tcnp->cn_flags &= ~MODMASK;
+		tcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
+		if ((tcnp->cn_flags & SAVESTART) == 0)
+			panic("ufs_rename: lost from startdir");
+		tcnp->cn_nameiop = DELETE;
+		(void) relookup(tdvp, &tvp, tcnp);
+		return (VOP_REMOVE(tdvp, tvp, tcnp));
+	}
+	if (error = VOP_LOCK(fvp))
+		goto abortit;
+	dp = VTOI(fdvp);
+	ip = VTOI(fvp);
+	if ((ip->i_flags & (IMMUTABLE | APPEND)) || (dp->i_flags & APPEND)) {
+		VOP_UNLOCK(fvp);
+		error = EPERM;
+		goto abortit;
+	}
+	if ((ip->i_mode & IFMT) == IFDIR) {
+		/*
+		 * Avoid ".", "..", and aliases of "." for obvious reasons.
+		 */
+		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+		    dp == ip || (fcnp->cn_flags&ISDOTDOT) ||
+		    (ip->i_flag & IN_RENAME)) {
+			VOP_UNLOCK(fvp);
+			error = EINVAL;
+			goto abortit;
+		}
+		ip->i_flag |= IN_RENAME;
+		oldparent = dp->i_number;
+		doingdirectory++;
+	}
+	vrele(fdvp);
+
+	/*
+	 * When the target exists, both the directory
+	 * and target vnodes are returned locked.
+	 */
+	dp = VTOI(tdvp);
+	xp = NULL;
+	if (tvp)
+		xp = VTOI(tvp);
+
+	/*
+	 * 1) Bump link count while we're moving stuff
+	 *    around.  If we crash somewhere before
+	 *    completing our work, the link count
+	 *    may be wrong, but correctable.
+	 */
+	ip->i_nlink++;
+	ip->i_flag |= IN_CHANGE;
+	tv = time;
+	if (error = VOP_UPDATE(fvp, &tv, &tv, 1)) {
+		VOP_UNLOCK(fvp);
+		goto bad;
+	}
+
+	/*
+	 * If ".." must be changed (ie the directory gets a new
+	 * parent) then the source directory must not be in the
+	 * directory heirarchy above the target, as this would
+	 * orphan everything below the source directory. Also
+	 * the user must have write permission in the source so
+	 * as to be able to change "..". We must repeat the call 
+	 * to namei, as the parent directory is unlocked by the
+	 * call to checkpath().
+	 */
+	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
+	VOP_UNLOCK(fvp);
+	if (oldparent != dp->i_number)
+		newparent = dp->i_number;
+	if (doingdirectory && newparent) {
+		if (error)	/* write access check above */
+			goto bad;
+		if (xp != NULL)
+			vput(tvp);
+		if (error = ufs_checkpath(ip, dp, tcnp->cn_cred))
+			goto out;
+		if ((tcnp->cn_flags & SAVESTART) == 0)
+			panic("ufs_rename: lost to startdir");
+		if (error = relookup(tdvp, &tvp, tcnp))
+			goto out;
+		dp = VTOI(tdvp);
+		xp = NULL;
+		if (tvp)
+			xp = VTOI(tvp);
+	}
+	/*
+	 * 2) If target doesn't exist, link the target
+	 *    to the source and unlink the source. 
+	 *    Otherwise, rewrite the target directory
+	 *    entry to reference the source inode and
+	 *    expunge the original entry's existence.
+	 */
+	if (xp == NULL) {
+		if (dp->i_dev != ip->i_dev)
+			panic("rename: EXDEV");
+		/*
+		 * Account for ".." in new directory.
+		 * When source and destination have the same
+		 * parent we don't fool with the link count.
+		 */
+		if (doingdirectory && newparent) {
+			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
+				error = EMLINK;
+				goto bad;
+			}
+			dp->i_nlink++;
+			dp->i_flag |= IN_CHANGE;
+			if (error = VOP_UPDATE(tdvp, &tv, &tv, 1))
+				goto bad;
+		}
+		if (error = ufs_direnter(ip, tdvp, tcnp)) {
+			if (doingdirectory && newparent) {
+				dp->i_nlink--;
+				dp->i_flag |= IN_CHANGE;
+				(void)VOP_UPDATE(tdvp, &tv, &tv, 1);
+			}
+			goto bad;
+		}
+		vput(tdvp);
+	} else {
+		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
+			panic("rename: EXDEV");
+		/*
+		 * Short circuit rename(foo, foo).
+		 */
+		if (xp->i_number == ip->i_number)
+			panic("rename: same file");
+		/*
+		 * If the parent directory is "sticky", then the user must
+		 * own the parent directory, or the destination of the rename,
+		 * otherwise the destination may not be changed (except by
+		 * root). This implements append-only directories.
+		 */
+		if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
+		    tcnp->cn_cred->cr_uid != dp->i_uid &&
+		    xp->i_uid != tcnp->cn_cred->cr_uid) {
+			error = EPERM;
+			goto bad;
+		}
+		/*
+		 * Target must be empty if a directory and have no links
+		 * to it. Also, ensure source and target are compatible
+		 * (both directories, or both not directories).
+		 */
+		if ((xp->i_mode&IFMT) == IFDIR) {
+			if (!ufs_dirempty(xp, dp->i_number, tcnp->cn_cred) || 
+			    xp->i_nlink > 2) {
+				error = ENOTEMPTY;
+				goto bad;
+			}
+			if (!doingdirectory) {
+				error = ENOTDIR;
+				goto bad;
+			}
+			cache_purge(tdvp);
+		} else if (doingdirectory) {
+			error = EISDIR;
+			goto bad;
+		}
+		if (error = ufs_dirrewrite(dp, ip, tcnp))
+			goto bad;
+		/*
+		 * If the target directory is in the same
+		 * directory as the source directory,
+		 * decrement the link count on the parent
+		 * of the target directory.
+		 */
+		 if (doingdirectory && !newparent) {
+			dp->i_nlink--;
+			dp->i_flag |= IN_CHANGE;
+		}
+		vput(tdvp);
+		/*
+		 * Adjust the link count of the target to
+		 * reflect the dirrewrite above.  If this is
+		 * a directory it is empty and there are
+		 * no links to it, so we can squash the inode and
+		 * any space associated with it.  We disallowed
+		 * renaming over top of a directory with links to
+		 * it above, as the remaining link would point to
+		 * a directory without "." or ".." entries.
+		 */
+		xp->i_nlink--;
+		if (doingdirectory) {
+			if (--xp->i_nlink != 0)
+				panic("rename: linked directory");
+			error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC,
+			    tcnp->cn_cred, tcnp->cn_proc);
+		}
+		xp->i_flag |= IN_CHANGE;
+		vput(tvp);
+		xp = NULL;
+	}
+
+	/*
+	 * 3) Unlink the source.
+	 */
+	fcnp->cn_flags &= ~MODMASK;
+	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
+	if ((fcnp->cn_flags & SAVESTART) == 0)
+		panic("ufs_rename: lost from startdir");
+	(void) relookup(fdvp, &fvp, fcnp);
+	if (fvp != NULL) {
+		xp = VTOI(fvp);
+		dp = VTOI(fdvp);
+	} else {
+		/*
+		 * From name has disappeared.
+		 */
+		if (doingdirectory)
+			panic("rename: lost dir entry");
+		vrele(ap->a_fvp);
+		return (0);
+	}
+	/*
+	 * Ensure that the directory entry still exists and has not
+	 * changed while the new name has been entered. If the source is
+	 * a file then the entry may have been unlinked or renamed. In
+	 * either case there is no further work to be done. If the source
+	 * is a directory then it cannot have been rmdir'ed; its link
+	 * count of three would cause a rmdir to fail with ENOTEMPTY.
+	 * The IRENAME flag ensures that it cannot be moved by another
+	 * rename.
+	 */
+	if (xp != ip) {
+		if (doingdirectory)
+			panic("rename: lost dir entry");
+	} else {
+		/*
+		 * If the source is a directory with a
+		 * new parent, the link count of the old
+		 * parent directory must be decremented
+		 * and ".." set to point to the new parent.
+		 */
+		if (doingdirectory && newparent) {
+			dp->i_nlink--;
+			dp->i_flag |= IN_CHANGE;
+			error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
+				sizeof (struct dirtemplate), (off_t)0,
+				UIO_SYSSPACE, IO_NODELOCKED, 
+				tcnp->cn_cred, (int *)0, (struct proc *)0);
+			if (error == 0) {
+#				if (BYTE_ORDER == LITTLE_ENDIAN)
+					if (fvp->v_mount->mnt_maxsymlinklen <= 0)
+						namlen = dirbuf.dotdot_type;
+					else
+						namlen = dirbuf.dotdot_namlen;
+#				else
+					namlen = dirbuf.dotdot_namlen;
+#				endif
+				if (namlen != 2 ||
+				    dirbuf.dotdot_name[0] != '.' ||
+				    dirbuf.dotdot_name[1] != '.') {
+					ufs_dirbad(xp, (doff_t)12,
+					    "rename: mangled dir");
+				} else {
+					dirbuf.dotdot_ino = newparent;
+					(void) vn_rdwr(UIO_WRITE, fvp,
+					    (caddr_t)&dirbuf,
+					    sizeof (struct dirtemplate),
+					    (off_t)0, UIO_SYSSPACE,
+					    IO_NODELOCKED|IO_SYNC,
+					    tcnp->cn_cred, (int *)0,
+					    (struct proc *)0);
+					cache_purge(fdvp);
+				}
+			}
+		}
+		error = ufs_dirremove(fdvp, fcnp);
+		if (!error) {
+			xp->i_nlink--;
+			xp->i_flag |= IN_CHANGE;
+		}
+		xp->i_flag &= ~IN_RENAME;
+	}
+	if (dp)
+		vput(fdvp);
+	if (xp)
+		vput(fvp);
+	vrele(ap->a_fvp);
+	return (error);
+
+bad:
+	if (xp)
+		vput(ITOV(xp));
+	vput(ITOV(dp));
+out:
+	if (VOP_LOCK(fvp) == 0) {
+		ip->i_nlink--;
+		ip->i_flag |= IN_CHANGE;
+		vput(fvp);
+	} else
+		vrele(fvp);
+	return (error);
+}
+
+/*
+ * A virgin directory (no blushing please).
+ */
+static struct dirtemplate mastertemplate = {
+	0, 12, DT_DIR, 1, ".",
+	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
+};
+static struct odirtemplate omastertemplate = {
+	0, 12, 1, ".",
+	0, DIRBLKSIZ - 12, 2, ".."
+};
+
+/*
+ * Mkdir system call
+ */
+int
+ufs_mkdir(ap)
+	struct vop_mkdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	register struct vnode *dvp = ap->a_dvp;
+	register struct vattr *vap = ap->a_vap;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct inode *ip, *dp;
+	struct vnode *tvp;
+	struct dirtemplate dirtemplate, *dtp;
+	struct timeval tv;
+	int error, dmode;
+
+#ifdef DIAGNOSTIC
+	if ((cnp->cn_flags & HASBUF) == 0)
+		panic("ufs_mkdir: no name");
+#endif
+	dp = VTOI(dvp);
+	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
+		error = EMLINK;
+		goto out;
+	}
+	dmode = vap->va_mode & 0777;
+	dmode |= IFDIR;
+	/*
+	 * Must simulate part of ufs_makeinode here to acquire the inode,
+	 * but not have it entered in the parent directory. The entry is
+	 * made later after writing "." and ".." entries.
+	 */
+	if (error = VOP_VALLOC(dvp, dmode, cnp->cn_cred, &tvp))
+		goto out;
+	ip = VTOI(tvp);
+	ip->i_uid = cnp->cn_cred->cr_uid;
+	ip->i_gid = dp->i_gid;
+#ifdef QUOTA
+	if ((error = getinoquota(ip)) ||
+	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+		free(cnp->cn_pnbuf, M_NAMEI);
+		VOP_VFREE(tvp, ip->i_number, dmode);
+		vput(tvp);
+		vput(dvp);
+		return (error);
+	}
+#endif
+	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+	ip->i_mode = dmode;
+	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
+	ip->i_nlink = 2;
+	tv = time;
+	error = VOP_UPDATE(tvp, &tv, &tv, 1);
+
+	/*
+	 * Bump link count in parent directory
+	 * to reflect work done below.  Should
+	 * be done before reference is created
+	 * so reparation is possible if we crash.
+	 */
+	dp->i_nlink++;
+	dp->i_flag |= IN_CHANGE;
+	if (error = VOP_UPDATE(dvp, &tv, &tv, 1))
+		goto bad;
+
+	/* Initialize directory with "." and ".." from static template. */
+	if (dvp->v_mount->mnt_maxsymlinklen > 0)
+		dtp = &mastertemplate;
+	else
+		dtp = (struct dirtemplate *)&omastertemplate;
+	dirtemplate = *dtp;
+	dirtemplate.dot_ino = ip->i_number;
+	dirtemplate.dotdot_ino = dp->i_number;
+	error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate,
+	    sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE,
+	    IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0);
+	if (error) {
+		dp->i_nlink--;
+		dp->i_flag |= IN_CHANGE;
+		goto bad;
+	}
+	if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
+		panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */
+	else {
+		ip->i_size = DIRBLKSIZ;
+		ip->i_flag |= IN_CHANGE;
+	}
+
+	/* Directory set up, now install it's entry in the parent directory. */
+	if (error = ufs_direnter(ip, dvp, cnp)) {
+		dp->i_nlink--;
+		dp->i_flag |= IN_CHANGE;
+	}
+bad:
+	/*
+	 * No need to do an explicit VOP_TRUNCATE here, vrele will do this
+	 * for us because we set the link count to 0.
+	 */
+	if (error) {
+		ip->i_nlink = 0;
+		ip->i_flag |= IN_CHANGE;
+		vput(tvp);
+	} else
+		*ap->a_vpp = tvp;
+out:
+	FREE(cnp->cn_pnbuf, M_NAMEI);
+	vput(dvp);
+	return (error);
+}
+
+/*
+ * Rmdir system call.
+ */
+int
+ufs_rmdir(ap)
+	struct vop_rmdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *dvp = ap->a_dvp;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct inode *ip, *dp;
+	int error;
+
+	ip = VTOI(vp);
+	dp = VTOI(dvp);
+	/*
+	 * No rmdir "." please.
+	 */
+	if (dp == ip) {
+		vrele(dvp);
+		vput(vp);
+		return (EINVAL);
+	}
+	/*
+	 * Verify the directory is empty (and valid).
+	 * (Rmdir ".." won't be valid since
+	 *  ".." will contain a reference to
+	 *  the current directory and thus be
+	 *  non-empty.)
+	 */
+	error = 0;
+	if (ip->i_nlink != 2 ||
+	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
+		error = ENOTEMPTY;
+		goto out;
+	}
+	if ((dp->i_flags & APPEND) || (ip->i_flags & (IMMUTABLE | APPEND))) {
+		error = EPERM;
+		goto out;
+	}
+	/*
+	 * Delete reference to directory before purging
+	 * inode.  If we crash in between, the directory
+	 * will be reattached to lost+found,
+	 */
+	if (error = ufs_dirremove(dvp, cnp))
+		goto out;
+	dp->i_nlink--;
+	dp->i_flag |= IN_CHANGE;
+	cache_purge(dvp);
+	vput(dvp);
+	dvp = NULL;
+	/*
+	 * Truncate inode.  The only stuff left
+	 * in the directory is "." and "..".  The
+	 * "." reference is inconsequential since
+	 * we're quashing it.  The ".." reference
+	 * has already been adjusted above.  We've
+	 * removed the "." reference and the reference
+	 * in the parent directory, but there may be
+	 * other hard links so decrement by 2 and
+	 * worry about them later.
+	 */
+	ip->i_nlink -= 2;
+	error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
+	    cnp->cn_proc);
+	cache_purge(ITOV(ip));
+out:
+	if (dvp)
+		vput(dvp);
+	vput(vp);
+	return (error);
+}
+
+/*
+ * symlink -- make a symbolic link
+ */
+int
+ufs_symlink(ap)
+	struct vop_symlink_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+		char *a_target;
+	} */ *ap;
+{
+	register struct vnode *vp, **vpp = ap->a_vpp;
+	register struct inode *ip;
+	int len, error;
+
+	if (error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
+	    vpp, ap->a_cnp))
+		return (error);
+	vp = *vpp;
+	len = strlen(ap->a_target);
+	if (len < vp->v_mount->mnt_maxsymlinklen) {
+		ip = VTOI(vp);
+		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
+		ip->i_size = len;
+		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	} else
+		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
+		    UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0,
+		    (struct proc *)0);
+	vput(vp);
+	return (error);
+}
+
+/*
+ * Vnode op for reading directories.
+ * 
+ * The routine below assumes that the on-disk format of a directory
+ * is the same as that defined by <sys/dirent.h>. If the on-disk
+ * format changes, then it will be necessary to do a conversion
+ * from the on-disk format that read returns to the format defined
+ * by <sys/dirent.h>.
+ */
+int
+ufs_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct uio *uio = ap->a_uio;
+	int count, lost, error;
+
+	count = uio->uio_resid;
+	count &= ~(DIRBLKSIZ - 1);
+	lost = uio->uio_resid - count;
+	if (count < DIRBLKSIZ || (uio->uio_offset & (DIRBLKSIZ -1)))
+		return (EINVAL);
+	uio->uio_resid = count;
+	uio->uio_iov->iov_len = count;
+#	if (BYTE_ORDER == LITTLE_ENDIAN)
+		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
+			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
+		} else {
+			struct dirent *dp, *edp;
+			struct uio auio;
+			struct iovec aiov;
+			caddr_t dirbuf;
+			int readcnt;
+			u_char tmp;
+
+			auio = *uio;
+			auio.uio_iov = &aiov;
+			auio.uio_iovcnt = 1;
+			auio.uio_segflg = UIO_SYSSPACE;
+			aiov.iov_len = count;
+			MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
+			aiov.iov_base = dirbuf;
+			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
+			if (error == 0) {
+				readcnt = count - auio.uio_resid;
+				edp = (struct dirent *)&dirbuf[readcnt];
+				for (dp = (struct dirent *)dirbuf; dp < edp; ) {
+					tmp = dp->d_namlen;
+					dp->d_namlen = dp->d_type;
+					dp->d_type = tmp;
+					if (dp->d_reclen > 0) {
+						dp = (struct dirent *)
+						    ((char *)dp + dp->d_reclen);
+					} else {
+						error = EIO;
+						break;
+					}
+				}
+				if (dp >= edp)
+					error = uiomove(dirbuf, readcnt, uio);
+			}
+			FREE(dirbuf, M_TEMP);
+		}
+#	else
+		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
+#	endif
+	uio->uio_resid += lost;
+	return (error);
+}
+
+/*
+ * Return target name of a symbolic link
+ */
+int
+ufs_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+	int isize;
+
+	isize = ip->i_size;
+	if (isize < vp->v_mount->mnt_maxsymlinklen) {
+		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
+		return (0);
+	}
+	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
+}
+
+/*
+ * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. If a buffer has been saved in anticipation of a CREATE, delete it.
+ */
+/* ARGSUSED */
+int
+ufs_abortop(ap)
+	struct vop_abortop_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+		FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+	return (0);
+}
+
+/*
+ * Lock an inode. If its already locked, set the WANT bit and sleep.
+ */
+int
+ufs_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip;
+	struct proc *p = curproc;	/* XXX */
+
+start:
+	while (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		sleep((caddr_t)vp, PINOD);
+	}
+	if (vp->v_tag == VT_NON)
+		return (ENOENT);
+	ip = VTOI(vp);
+	if (ip->i_flag & IN_LOCKED) {
+		ip->i_flag |= IN_WANTED;
+#ifdef DIAGNOSTIC
+		if (p) {
+			if (p->p_pid == ip->i_lockholder)
+				panic("locking against myself");
+			ip->i_lockwaiter = p->p_pid;
+		} else
+			ip->i_lockwaiter = -1;
+#endif
+		(void) sleep((caddr_t)ip, PINOD);
+		goto start;
+	}
+#ifdef DIAGNOSTIC
+	ip->i_lockwaiter = 0;
+	if (ip->i_lockholder != 0)
+		panic("lockholder (%d) != 0", ip->i_lockholder);
+	if (p && p->p_pid == 0)
+		printf("locking by process 0\n");
+	if (p)
+		ip->i_lockholder = p->p_pid;
+	else
+		ip->i_lockholder = -1;
+#endif
+	ip->i_flag |= IN_LOCKED;
+	return (0);
+}
+
+/*
+ * Unlock an inode.  If WANT bit is on, wakeup.
+ */
+int lockcount = 90;
+int
+ufs_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct inode *ip = VTOI(ap->a_vp);
+	struct proc *p = curproc;	/* XXX */
+
+#ifdef DIAGNOSTIC
+	if ((ip->i_flag & IN_LOCKED) == 0) {
+		vprint("ufs_unlock: unlocked inode", ap->a_vp);
+		panic("ufs_unlock NOT LOCKED");
+	}
+	if (p && p->p_pid != ip->i_lockholder && p->p_pid > -1 &&
+	    ip->i_lockholder > -1 && lockcount++ < 100)
+		panic("unlocker (%d) != lock holder (%d)",
+		    p->p_pid, ip->i_lockholder);
+	ip->i_lockholder = 0;
+#endif
+	ip->i_flag &= ~IN_LOCKED;
+	if (ip->i_flag & IN_WANTED) {
+		ip->i_flag &= ~IN_WANTED;
+		wakeup((caddr_t)ip);
+	}
+	return (0);
+}
+
+/*
+ * Check for a locked inode.
+ */
+int
+ufs_islocked(ap)
+	struct vop_islocked_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	if (VTOI(ap->a_vp)->i_flag & IN_LOCKED)
+		return (1);
+	return (0);
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ */
+int
+ufs_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	register struct buf *bp = ap->a_bp;
+	register struct vnode *vp = bp->b_vp;
+	register struct inode *ip;
+	int error;
+
+	ip = VTOI(vp);
+	if (vp->v_type == VBLK || vp->v_type == VCHR)
+		panic("ufs_strategy: spec");
+	if (bp->b_blkno == bp->b_lblkno) {
+		if (error =
+		    VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL)) {
+			bp->b_error = error;
+			bp->b_flags |= B_ERROR;
+			biodone(bp);
+			return (error);
+		}
+		if ((long)bp->b_blkno == -1)
+			clrbuf(bp);
+	}
+	if ((long)bp->b_blkno == -1) {
+		biodone(bp);
+		return (0);
+	}
+	vp = ip->i_devvp;
+	bp->b_dev = vp->v_rdev;
+	VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
+	return (0);
+}
+
+/*
+ * Print out the contents of an inode.
+ */
+int
+ufs_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct inode *ip = VTOI(vp);
+
+	printf("tag VT_UFS, ino %d, on dev %d, %d", ip->i_number,
+		major(ip->i_dev), minor(ip->i_dev));
+#ifdef FIFO
+	if (vp->v_type == VFIFO)
+		fifo_printinfo(vp);
+#endif /* FIFO */
+	printf("%s\n", (ip->i_flag & IN_LOCKED) ? " (LOCKED)" : "");
+	if (ip->i_lockholder == 0)
+		return (0);
+	printf("\towner pid %d", ip->i_lockholder);
+	if (ip->i_lockwaiter)
+		printf(" waiting pid %d", ip->i_lockwaiter);
+	printf("\n");
+	return (0);
+}
+
+/*
+ * Read wrapper for special devices.
+ */
+int
+ufsspec_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	/*
+	 * Set access flag.
+	 */
+	VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
+	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for special devices.
+ */
+int
+ufsspec_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	/*
+	 * Set update and change flags.
+	 */
+	VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
+	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for special devices.
+ *
+ * Update the times on the inode then do device close.
+ */
+int
+ufsspec_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct inode *ip = VTOI(ap->a_vp);
+
+	if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+		ITIMES(ip, &time, &time);
+	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
+}
+
+#ifdef FIFO
+/*
+ * Read wrapper for fifo's
+ */
+int
+ufsfifo_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	extern int (**fifo_vnodeop_p)();
+
+	/*
+	 * Set access flag.
+	 */
+	VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
+	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for fifo's.
+ */
+int
+ufsfifo_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	extern int (**fifo_vnodeop_p)();
+
+	/*
+	 * Set update and change flags.
+	 */
+	VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
+	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for fifo's.
+ *
+ * Update the times on the inode then do device close.
+ */
+ufsfifo_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	extern int (**fifo_vnodeop_p)();
+	register struct inode *ip = VTOI(ap->a_vp);
+
+	if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+		ITIMES(ip, &time, &time);
+	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
+}
+#endif /* FIFO */
+
+/*
+ * Return POSIX pathconf information applicable to ufs filesystems.
+ */
+ufs_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_NAME_MAX:
+		*ap->a_retval = NAME_MAX;
+		return (0);
+	case _PC_PATH_MAX:
+		*ap->a_retval = PATH_MAX;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_NO_TRUNC:
+		*ap->a_retval = 1;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Advisory record locking support
+ */
+int
+ufs_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+	register struct inode *ip = VTOI(ap->a_vp);
+	register struct flock *fl = ap->a_fl;
+	register struct lockf *lock;
+	off_t start, end;
+	int error;
+
+	/*
+	 * Avoid the common case of unlocking when inode has no locks.
+	 */
+	if (ip->i_lockf == (struct lockf *)0) {
+		if (ap->a_op != F_SETLK) {
+			fl->l_type = F_UNLCK;
+			return (0);
+		}
+	}
+	/*
+	 * Convert the flock structure into a start and end.
+	 */
+	switch (fl->l_whence) {
+
+	case SEEK_SET:
+	case SEEK_CUR:
+		/*
+		 * Caller is responsible for adding any necessary offset
+		 * when SEEK_CUR is used.
+		 */
+		start = fl->l_start;
+		break;
+
+	case SEEK_END:
+		start = ip->i_size + fl->l_start;
+		break;
+
+	default:
+		return (EINVAL);
+	}
+	if (start < 0)
+		return (EINVAL);
+	if (fl->l_len == 0)
+		end = -1;
+	else
+		end = start + fl->l_len - 1;
+	/*
+	 * Create the lockf structure
+	 */
+	MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+	lock->lf_start = start;
+	lock->lf_end = end;
+	lock->lf_id = ap->a_id;
+	lock->lf_inode = ip;
+	lock->lf_type = fl->l_type;
+	lock->lf_next = (struct lockf *)0;
+	lock->lf_block = (struct lockf *)0;
+	lock->lf_flags = ap->a_flags;
+	/*
+	 * Do the requested operation.
+	 */
+	switch(ap->a_op) {
+	case F_SETLK:
+		return (lf_setlock(lock));
+
+	case F_UNLCK:
+		error = lf_clearlock(lock);
+		FREE(lock, M_LOCKF);
+		return (error);
+
+	case F_GETLK:
+		error = lf_getlock(lock, fl);
+		FREE(lock, M_LOCKF);
+		return (error);
+	
+	default:
+		free(lock, M_LOCKF);
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Initialize the vnode associated with a new inode, handle aliased
+ * vnodes.
+ */
+int
+ufs_vinit(mntp, specops, fifoops, vpp)
+	struct mount *mntp;
+	int (**specops)();
+	int (**fifoops)();
+	struct vnode **vpp;
+{
+	struct inode *ip;
+	struct vnode *vp, *nvp;
+
+	vp = *vpp;
+	ip = VTOI(vp);
+	switch(vp->v_type = IFTOVT(ip->i_mode)) {
+	case VCHR:
+	case VBLK:
+		vp->v_op = specops;
+		if (nvp = checkalias(vp, ip->i_rdev, mntp)) {
+			/*
+			 * Discard unneeded vnode, but save its inode.
+			 */
+			ufs_ihashrem(ip);
+			VOP_UNLOCK(vp);
+			nvp->v_data = vp->v_data;
+			vp->v_data = NULL;
+			vp->v_op = spec_vnodeop_p;
+			vrele(vp);
+			vgone(vp);
+			/*
+			 * Reinitialize aliased inode.
+			 */
+			vp = nvp;
+			ip->i_vnode = vp;
+			ufs_ihashins(ip);
+		}
+		break;
+	case VFIFO:
+#ifdef FIFO
+		vp->v_op = fifoops;
+		break;
+#else
+		return (EOPNOTSUPP);
+#endif
+	}
+	if (ip->i_number == ROOTINO)
+                vp->v_flag |= VROOT;
+	/*
+	 * Initialize modrev times
+	 */
+	SETHIGH(ip->i_modrev, mono_time.tv_sec);
+	SETLOW(ip->i_modrev, mono_time.tv_usec * 4294);
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ * Allocate a new inode.
+ */
+int
+ufs_makeinode(mode, dvp, vpp, cnp)
+	int mode;
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+{
+	register struct inode *ip, *pdir;
+	struct timeval tv;
+	struct vnode *tvp;
+	int error;
+
+	pdir = VTOI(dvp);
+#ifdef DIAGNOSTIC
+	if ((cnp->cn_flags & HASBUF) == 0)
+		panic("ufs_makeinode: no name");
+#endif
+	*vpp = NULL;
+	if ((mode & IFMT) == 0)
+		mode |= IFREG;
+
+	if (error = VOP_VALLOC(dvp, mode, cnp->cn_cred, &tvp)) {
+		free(cnp->cn_pnbuf, M_NAMEI);
+		vput(dvp);
+		return (error);
+	}
+	ip = VTOI(tvp);
+	ip->i_gid = pdir->i_gid;
+	if ((mode & IFMT) == IFLNK)
+		ip->i_uid = pdir->i_uid;
+	else
+		ip->i_uid = cnp->cn_cred->cr_uid;
+#ifdef QUOTA
+	if ((error = getinoquota(ip)) ||
+	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
+		free(cnp->cn_pnbuf, M_NAMEI);
+		VOP_VFREE(tvp, ip->i_number, mode);
+		vput(tvp);
+		vput(dvp);
+		return (error);
+	}
+#endif
+	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
+	ip->i_mode = mode;
+	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
+	ip->i_nlink = 1;
+	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
+	    suser(cnp->cn_cred, NULL))
+		ip->i_mode &= ~ISGID;
+
+	/*
+	 * Make sure inode goes to disk before directory entry.
+	 */
+	tv = time;
+	if (error = VOP_UPDATE(tvp, &tv, &tv, 1))
+		goto bad;
+	if (error = ufs_direnter(ip, dvp, cnp))
+		goto bad;
+	if ((cnp->cn_flags & SAVESTART) == 0)
+		FREE(cnp->cn_pnbuf, M_NAMEI);
+	vput(dvp);
+	*vpp = tvp;
+	return (0);
+
+bad:
+	/*
+	 * Write error occurred trying to update the inode
+	 * or the directory so must deallocate the inode.
+	 */
+	free(cnp->cn_pnbuf, M_NAMEI);
+	vput(dvp);
+	ip->i_nlink = 0;
+	ip->i_flag |= IN_CHANGE;
+	vput(tvp);
+	return (error);
+}
diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h
new file mode 100644
index 0000000..237871f
--- /dev/null
+++ b/sys/ufs/ufs/ufsmount.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ufsmount.h	8.2 (Berkeley) 1/12/94
+ */
+
+struct buf;
+struct inode;
+struct nameidata;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct netexport;
+
+/* This structure describes the UFS specific mount structure data. */
+struct ufsmount {
+	struct	mount *um_mountp;		/* filesystem vfs structure */
+	dev_t	um_dev;				/* device mounted */
+	struct	vnode *um_devvp;		/* block device mounted vnode */
+	union {					/* pointer to superblock */
+		struct	lfs *lfs;		/* LFS */
+		struct	fs *fs;			/* FFS */
+	} ufsmount_u;
+#define	um_fs	ufsmount_u.fs
+#define	um_lfs	ufsmount_u.lfs
+	struct	vnode *um_quotas[MAXQUOTAS];	/* pointer to quota files */
+	struct	ucred *um_cred[MAXQUOTAS];	/* quota file access cred */
+	u_long	um_nindir;			/* indirect ptrs per block */
+	u_long	um_bptrtodb;			/* indir ptr to disk block */
+	u_long	um_seqinc;			/* inc between seq blocks */
+	time_t	um_btime[MAXQUOTAS];		/* block quota time limit */
+	time_t	um_itime[MAXQUOTAS];		/* inode quota time limit */
+	char	um_qflags[MAXQUOTAS];		/* quota specific flags */
+	struct	netexport um_export;		/* export information */
+};
+/*
+ * Flags describing the state of quotas.
+ */
+#define	QTF_OPENING	0x01			/* Q_QUOTAON in progress */
+#define	QTF_CLOSING	0x02			/* Q_QUOTAOFF in progress */
+
+/* Convert mount ptr to ufsmount ptr. */
+#define VFSTOUFS(mp)	((struct ufsmount *)((mp)->mnt_data))
+
+/*
+ * Macros to access file system parameters in the ufsmount structure.
+ * Used by ufs_bmap.
+ */
+#define	blkptrtodb(ump, b)	((b) << (ump)->um_bptrtodb)
+#define	is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc)
+#define MNINDIR(ump)	((ump)->um_nindir)
+
+
author	rgrimes <rgrimes@FreeBSD.org>	1994-05-24 10:09:53 +0000
committer	rgrimes <rgrimes@FreeBSD.org>	1994-05-24 10:09:53 +0000
commit	8fb65ce818b3e3c6f165b583b910af24000768a5 (patch)
tree	ba751e4f2166aefec707c9d7401c7ff432506642 /sys/ufs
parent	a6ce65d368e623088a4c1a29865889f431b15420 (diff)
download	FreeBSD-src-8fb65ce818b3e3c6f165b583b910af24000768a5.zip FreeBSD-src-8fb65ce818b3e3c6f165b583b910af24000768a5.tar.gz